From 1790a72b68760d380ddf04b88c7d23a92e8cc188 Mon Sep 17 00:00:00 2001 From: Hyeonung Baek Date: Sat, 12 Feb 2022 03:17:26 +0900 Subject: [PATCH] Add column info to Lex --- lex.ml | 44 +++++++++++++++++++++++--------------------- main.ml | 5 +++-- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/lex.ml b/lex.ml index d6e2425..d058499 100644 --- a/lex.ml +++ b/lex.ml @@ -1,10 +1,10 @@ open Token -exception Invalid_character of char -exception Expected of char +exception Invalid_character of int * char +exception Expected of int * char -let invalid_char c = raise @@ Invalid_character c -let expected c = raise @@ Expected c +let invalid_char col c = raise @@ Invalid_character (col, c) +let expected col c = raise @@ Expected (col, c) let either f g c = f c || g c @@ -30,18 +30,18 @@ let is_ident_start = let is_ident = either is_ident_start is_digit -let expect_char c seq = +let expect_char col c seq = match seq () with - | Seq.Nil -> expected c - | Seq.Cons (x, seq) -> - if x = c then seq else expected c + | Seq.Nil -> expected col c + | Seq.Cons ((_, x), seq) -> + if x = c then seq else expected col c let expect_token str tok seq = let rec aux ts seq = match ts (), seq () with | Seq.Nil, _ -> Some seq | Seq.Cons _, Seq.Nil -> None - | Seq.Cons (a, ts), Seq.Cons (b, seq) -> + | Seq.Cons (a, ts), Seq.Cons ((_, b), seq) -> if a = b then aux ts seq else None in let str = String.to_seq str in @@ -62,29 +62,31 @@ let rec partition_while f seq = else Seq.(empty, cons x seq) +let snds f = fun x -> f @@ snd x + let tokenize str = - let seq = String.to_seq str in + let seq = String.to_seqi str in let rec aux seq = let open Token in let open Seq in match seq () with | Nil -> empty - | Cons (x, seq) -> + | Cons ((col, x), seq) -> (* skip whitespace *) if is_whitespace x then aux seq (* string *) else if x = '"' then - let str, seq = partition_while ((<>) '"') seq in - let str = String (String.of_seq str) in - let seq = expect_char '"' seq in - cons str (aux seq) + let str, seq = partition_while (fun (_, c) -> c <> '"') seq in + let str = String.of_seq @@ Seq.map snd str in + let seq = expect_char (col + String.length str + 1) '"' seq in + cons (String str) (aux seq) (* number (int, float) *) else if is_digit x then - let n, seq = partition_while is_num seq in - let n = String.of_seq @@ cons x n in + let n, seq = partition_while (snds is_num) seq in + let n = cons x (Seq.map snd n) |> String.of_seq in let n = if String.contains n '.' (* float *) then Float (float_of_string n) @@ -94,14 +96,14 @@ let tokenize str = (* idents *) else if is_ident_start x then - let id, seq = partition_while is_ident seq in - let id = String.of_seq @@ cons x id in + let id, seq = partition_while (snds is_ident) seq in + let id = String.of_seq @@ cons x @@ Seq.map snd id in cons (Ident id) (aux seq) (* tokens *) else - match find_token @@ cons x seq with - | None -> invalid_char x + match find_token @@ cons (col, x) seq with + | None -> invalid_char col x | Some (t, seq) -> cons t (aux seq) in aux seq diff --git a/main.ml b/main.ml index d5f4875..0fa1b4c 100644 --- a/main.ml +++ b/main.ml @@ -6,8 +6,9 @@ let debug = ref false let error_to_string e = try raise e with - | Lex.Invalid_character c -> sprintf "invalid character %c" c - | Lex.Expected c -> sprintf "expected %c" c + | Lex.Invalid_character (col, c) -> + sprintf "invalid character %c at col %d" c col + | Lex.Expected (col, c) -> sprintf "expected %c at col %d" c col | Parser.Expected t -> sprintf "expected %s" t | Parser.Unexpected_token t -> sprintf "unexpected token \"%s\"" t | Type.Invalid t -> sprintf "invalid type %s" (Type.to_string t)