Rewrite token lexer

This commit is contained in:
백현웅 2022-01-17 15:17:18 +09:00
parent dd2a1e160e
commit 7d6c833e58
2 changed files with 41 additions and 36 deletions

22
lex.ml
View file

@ -1,5 +1,7 @@
type tokens = Token.t Seq.t type tokens = Token.t Seq.t
exception Token_not_found
let either f g c = let either f g c =
f c || g c f c || g c
@ -42,18 +44,18 @@ let tokenize (str : string) : tokens =
| Seq.Nil -> Seq.empty | Seq.Nil -> Seq.empty
| Seq.Cons (x, s) -> | Seq.Cons (x, s) ->
if is_whitespace x then if is_whitespace x then
aux s aux s (* skip whitespace *)
else if is_digit x then else if is_digit x then
let n, s = partition_while is_num s in let n, s = partition_while is_num seq in
let n = String.of_seq @@ Seq.cons x n in let n = int_of_string @@ String.of_seq n in
Seq.cons (of_string n) (aux s) Seq.cons (Int n) (aux s)
else if is_ident_start x then else if is_ident_start x then
begin let id, s = partition_while is_ident seq in
let id, s = partition_while is_ident s in let id = String.of_seq id in
let id = String.of_seq @@ Seq.cons x id in Seq.cons (Ident id) (aux s)
Seq.cons (Ident id) (aux s)
end
else else
Seq.cons (of_char x) (aux s) match find_token seq with
| None -> raise Token_not_found
| Some (t, s) -> Seq.cons t (aux s)
in in
aux seq aux seq

View file

@ -10,34 +10,37 @@ type t =
| LParen | LParen
| RParen | RParen
let of_char = function let tokens = ref [
| '+' -> Plus "+", Plus;
| '-' -> Minus "-", Minus;
| '*' -> Asterisk "*", Asterisk;
| '/' -> Slash "/", Slash;
| '^' -> Carret "^", Carret;
| '%' -> Percent "%", Percent;
| '(' -> LParen "(", LParen;
| ')' -> RParen ")", RParen;
| _ -> invalid_arg "Token.of_char" ]
let of_string str = let expect_token str tok seq =
let fc = Char.code str.[0] in let rec aux ts seq =
if Char.(code '0' <= fc && fc <= code '9') then match ts (), seq () with
Int (int_of_string str) | Seq.Nil, _ -> Some seq
else | Seq.Cons _, Seq.Nil -> None
match str with | Seq.Cons (a, ts), Seq.Cons (b, seq) ->
| _ when String.length str = 1 -> of_char str.[0] if a = b then aux ts seq else None
| _ -> failwith "Token.of_string" in
let str = String.to_seq str in
aux str seq |> Option.map (fun s -> tok, s)
let find_token seq =
!tokens |> List.find_map
(fun (s, t) -> expect_token s t seq)
let to_string = function let to_string = function
| Int n -> string_of_int n | Int n -> string_of_int n
| Ident s -> s | Ident s -> s
| Plus -> "+" | t ->
| Minus -> "-" begin match List.find_opt (fun (_, tok) -> t = tok) !tokens with
| Asterisk -> "*" | None -> failwith "Token.to_string"
| Slash -> "/" | Some (s, _) -> s
| Carret -> "^" end
| Percent -> "%"
| LParen -> "("
| RParen -> ")"