Revert Lex.tokenize to be non-laxy

This commit is contained in:
백현웅 2022-01-22 03:04:00 +09:00
parent c2308d7939
commit f77c1db22a

14
lex.ml
View file

@ -57,13 +57,13 @@ let rec partition_while f seq : 'a Seq.t * 'a Seq.t =
let tokenize (str : string) : tokens = let tokenize (str : string) : tokens =
let seq = String.to_seq str in let seq = String.to_seq str in
let rec aux seq () = let rec aux seq =
let open Token in let open Token in
match seq () with match seq () with
| Seq.Nil -> Seq.Nil | Seq.Nil -> Seq.empty
| Seq.Cons (x, seq) -> | Seq.Cons (x, seq) ->
if is_whitespace x then if is_whitespace x then
aux seq () (* skip whitespace *) aux seq (* skip whitespace *)
else if x = '"' then else if x = '"' then
let str, seq = partition_while ((<>) '"') seq in let str, seq = partition_while ((<>) '"') seq in
@ -71,7 +71,7 @@ let tokenize (str : string) : tokens =
begin match seq () with begin match seq () with
| Seq.Nil -> raise Unclosed_quote | Seq.Nil -> raise Unclosed_quote
| Seq.Cons (x, seq) -> | Seq.Cons (x, seq) ->
if x = '"' then Seq.Cons (Value str, aux seq) if x = '"' then Seq.cons (Value str) (aux seq)
else raise Unclosed_quote else raise Unclosed_quote
end end
@ -83,16 +83,16 @@ let tokenize (str : string) : tokens =
then Float (float_of_string n) then Float (float_of_string n)
else Int (int_of_string n) else Int (int_of_string n)
in in
Seq.Cons (Value n, aux seq) Seq.cons (Value n) (aux seq)
else if is_ident_start x then else if is_ident_start x then
let id, seq = partition_while is_ident seq in let id, seq = partition_while is_ident seq in
let id = String.of_seq @@ Seq.cons x id in let id = String.of_seq @@ Seq.cons x id in
Seq.Cons (Ident id, aux seq) Seq.cons (Ident id) (aux seq)
else else
match find_token @@ Seq.cons x seq with match find_token @@ Seq.cons x seq with
| None -> raise Token_not_found | None -> raise Token_not_found
| Some (t, s) -> Seq.Cons (t, aux s) | Some (t, seq) -> Seq.cons t (aux seq)
in in
aux seq aux seq