From 7d6c833e586ff55c7f71da64bedcf10d82095508 Mon Sep 17 00:00:00 2001 From: Hyeonung Baek Date: Mon, 17 Jan 2022 15:17:18 +0900 Subject: [PATCH] Rewrite token lexer --- lex.ml | 22 ++++++++++++---------- token.ml | 55 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/lex.ml b/lex.ml index 3ecf1ab..d45c269 100644 --- a/lex.ml +++ b/lex.ml @@ -1,5 +1,7 @@ type tokens = Token.t Seq.t +exception Token_not_found + let either f g c = f c || g c @@ -42,18 +44,18 @@ let tokenize (str : string) : tokens = | Seq.Nil -> Seq.empty | Seq.Cons (x, s) -> if is_whitespace x then - aux s + aux s (* skip whitespace *) else if is_digit x then - let n, s = partition_while is_num s in - let n = String.of_seq @@ Seq.cons x n in - Seq.cons (of_string n) (aux s) + let n, s = partition_while is_num seq in + let n = int_of_string @@ String.of_seq n in + Seq.cons (Int n) (aux s) else if is_ident_start x then - begin - let id, s = partition_while is_ident s in - let id = String.of_seq @@ Seq.cons x id in - Seq.cons (Ident id) (aux s) - end + let id, s = partition_while is_ident seq in + let id = String.of_seq id in + Seq.cons (Ident id) (aux s) else - Seq.cons (of_char x) (aux s) + match find_token seq with + | None -> raise Token_not_found + | Some (t, s) -> Seq.cons t (aux s) in aux seq diff --git a/token.ml b/token.ml index a153b20..ac5394b 100644 --- a/token.ml +++ b/token.ml @@ -10,34 +10,37 @@ type t = | LParen | RParen -let of_char = function - | '+' -> Plus - | '-' -> Minus - | '*' -> Asterisk - | '/' -> Slash - | '^' -> Carret - | '%' -> Percent - | '(' -> LParen - | ')' -> RParen - | _ -> invalid_arg "Token.of_char" +let tokens = ref [ + "+", Plus; + "-", Minus; + "*", Asterisk; + "/", Slash; + "^", Carret; + "%", Percent; + "(", LParen; + ")", RParen; +] -let of_string str = - let fc = Char.code str.[0] in - if Char.(code '0' <= fc && fc <= code '9') then - Int (int_of_string str) - else - match str with - | _ when String.length str = 1 -> of_char str.[0] - | _ -> failwith "Token.of_string" +let expect_token str tok seq = + let rec aux ts seq = + match ts (), seq () with + | Seq.Nil, _ -> Some seq + | Seq.Cons _, Seq.Nil -> None + | Seq.Cons (a, ts), Seq.Cons (b, seq) -> + if a = b then aux ts seq else None + in + let str = String.to_seq str in + aux str seq |> Option.map (fun s -> tok, s) + +let find_token seq = + !tokens |> List.find_map + (fun (s, t) -> expect_token s t seq) let to_string = function | Int n -> string_of_int n | Ident s -> s - | Plus -> "+" - | Minus -> "-" - | Asterisk -> "*" - | Slash -> "/" - | Carret -> "^" - | Percent -> "%" - | LParen -> "(" - | RParen -> ")" + | t -> + begin match List.find_opt (fun (_, tok) -> t = tok) !tokens with + | None -> failwith "Token.to_string" + | Some (s, _) -> s + end