Add column info to Lex

This commit is contained in:
백현웅 2022-02-12 03:17:26 +09:00
parent fcf2c0388d
commit 1790a72b68
2 changed files with 26 additions and 23 deletions

44
lex.ml
View file

@ -1,10 +1,10 @@
open Token open Token
exception Invalid_character of char exception Invalid_character of int * char
exception Expected of char exception Expected of int * char
let invalid_char c = raise @@ Invalid_character c let invalid_char col c = raise @@ Invalid_character (col, c)
let expected c = raise @@ Expected c let expected col c = raise @@ Expected (col, c)
let either f g c = let either f g c =
f c || g c f c || g c
@ -30,18 +30,18 @@ let is_ident_start =
let is_ident = let is_ident =
either is_ident_start is_digit either is_ident_start is_digit
let expect_char c seq = let expect_char col c seq =
match seq () with match seq () with
| Seq.Nil -> expected c | Seq.Nil -> expected col c
| Seq.Cons (x, seq) -> | Seq.Cons ((_, x), seq) ->
if x = c then seq else expected c if x = c then seq else expected col c
let expect_token str tok seq = let expect_token str tok seq =
let rec aux ts seq = let rec aux ts seq =
match ts (), seq () with match ts (), seq () with
| Seq.Nil, _ -> Some seq | Seq.Nil, _ -> Some seq
| Seq.Cons _, Seq.Nil -> None | Seq.Cons _, Seq.Nil -> None
| Seq.Cons (a, ts), Seq.Cons (b, seq) -> | Seq.Cons (a, ts), Seq.Cons ((_, b), seq) ->
if a = b then aux ts seq else None if a = b then aux ts seq else None
in in
let str = String.to_seq str in let str = String.to_seq str in
@ -62,29 +62,31 @@ let rec partition_while f seq =
else else
Seq.(empty, cons x seq) Seq.(empty, cons x seq)
let snds f = fun x -> f @@ snd x
let tokenize str = let tokenize str =
let seq = String.to_seq str in let seq = String.to_seqi str in
let rec aux seq = let rec aux seq =
let open Token in let open Token in
let open Seq in let open Seq in
match seq () with match seq () with
| Nil -> empty | Nil -> empty
| Cons (x, seq) -> | Cons ((col, x), seq) ->
(* skip whitespace *) (* skip whitespace *)
if is_whitespace x then if is_whitespace x then
aux seq aux seq
(* string *) (* string *)
else if x = '"' then else if x = '"' then
let str, seq = partition_while ((<>) '"') seq in let str, seq = partition_while (fun (_, c) -> c <> '"') seq in
let str = String (String.of_seq str) in let str = String.of_seq @@ Seq.map snd str in
let seq = expect_char '"' seq in let seq = expect_char (col + String.length str + 1) '"' seq in
cons str (aux seq) cons (String str) (aux seq)
(* number (int, float) *) (* number (int, float) *)
else if is_digit x then else if is_digit x then
let n, seq = partition_while is_num seq in let n, seq = partition_while (snds is_num) seq in
let n = String.of_seq @@ cons x n in let n = cons x (Seq.map snd n) |> String.of_seq in
let n = let n =
if String.contains n '.' (* float *) if String.contains n '.' (* float *)
then Float (float_of_string n) then Float (float_of_string n)
@ -94,14 +96,14 @@ let tokenize str =
(* idents *) (* idents *)
else if is_ident_start x then else if is_ident_start x then
let id, seq = partition_while is_ident seq in let id, seq = partition_while (snds is_ident) seq in
let id = String.of_seq @@ cons x id in let id = String.of_seq @@ cons x @@ Seq.map snd id in
cons (Ident id) (aux seq) cons (Ident id) (aux seq)
(* tokens *) (* tokens *)
else else
match find_token @@ cons x seq with match find_token @@ cons (col, x) seq with
| None -> invalid_char x | None -> invalid_char col x
| Some (t, seq) -> cons t (aux seq) | Some (t, seq) -> cons t (aux seq)
in in
aux seq aux seq

View file

@ -6,8 +6,9 @@ let debug = ref false
let error_to_string e = let error_to_string e =
try raise e with try raise e with
| Lex.Invalid_character c -> sprintf "invalid character %c" c | Lex.Invalid_character (col, c) ->
| Lex.Expected c -> sprintf "expected %c" c sprintf "invalid character %c at col %d" c col
| Lex.Expected (col, c) -> sprintf "expected %c at col %d" c col
| Parser.Expected t -> sprintf "expected %s" t | Parser.Expected t -> sprintf "expected %s" t
| Parser.Unexpected_token t -> sprintf "unexpected token \"%s\"" t | Parser.Unexpected_token t -> sprintf "unexpected token \"%s\"" t
| Type.Invalid t -> sprintf "invalid type %s" (Type.to_string t) | Type.Invalid t -> sprintf "invalid type %s" (Type.to_string t)