From b699ed6b2f2e4ba24ab29d9738e7f17382258dd2 Mon Sep 17 00:00:00 2001 From: Hyeonung Baek Date: Fri, 28 Jan 2022 01:52:57 +0900 Subject: [PATCH] refactor lex --- lex.ml | 51 ++++++++++++++++++++++++++++++--------------------- main.ml | 14 +++++++------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/lex.ml b/lex.ml index 6827430..3c0a252 100644 --- a/lex.ml +++ b/lex.ml @@ -1,9 +1,10 @@ open Ast.Value -type tokens = Token.t Seq.t +exception Invalid_character of char +exception Expected of char -exception Token_not_found -exception Unclosed_quote +let invalid_char c = raise @@ Invalid_character c +let expected c = raise @@ Expected c let either f g c = f c || g c @@ -29,6 +30,12 @@ let is_ident_start = let is_ident = either is_ident_start is_digit +let expect_char c seq = + match seq () with + | Seq.Nil -> expected c + | Seq.Cons (x, seq) -> + if x = c then seq else expected c + let expect_token str tok seq = let rec aux ts seq = match ts (), seq () with @@ -45,7 +52,7 @@ let find_token seq = (fun (s, t) -> expect_token s t seq) (* same as take_while f seq, drop_while f seq *) -let rec partition_while f seq : 'a Seq.t * 'a Seq.t = +let rec partition_while f seq = match seq () with | Seq.Nil -> Seq.empty, seq | Seq.Cons (x, seq) -> @@ -55,44 +62,46 @@ let rec partition_while f seq : 'a Seq.t * 'a Seq.t = else Seq.(empty, cons x seq) -let tokenize (str : string) : tokens = +let tokenize str = let seq = String.to_seq str in let rec aux seq = let open Token in + let open Seq in match seq () with - | Seq.Nil -> Seq.empty - | Seq.Cons (x, seq) -> + | Nil -> empty + | Cons (x, seq) -> + (* skip whitespace *) if is_whitespace x then - aux seq (* skip whitespace *) + aux seq + (* string *) else if x = '"' then let str, seq = partition_while ((<>) '"') seq in let str = String (String.of_seq str) in - begin match seq () with - | Seq.Nil -> raise Unclosed_quote - | Seq.Cons (x, seq) -> - if x = '"' then Seq.cons (Value str) (aux seq) - else raise Unclosed_quote - end + let seq = expect_char '"' seq in + cons (Value str) (aux seq) + (* number (int, float) *) else if is_digit x then let n, seq = partition_while is_num seq in - let n = String.of_seq @@ Seq.cons x n in + let n = String.of_seq @@ cons x n in let n = if String.contains n '.' (* float *) then Float (float_of_string n) else Int (int_of_string n) in - Seq.cons (Value n) (aux seq) + cons (Value n) (aux seq) + (* idents *) else if is_ident_start x then let id, seq = partition_while is_ident seq in - let id = String.of_seq @@ Seq.cons x id in - Seq.cons (Ident id) (aux seq) + let id = String.of_seq @@ cons x id in + cons (Ident id) (aux seq) + (* tokens *) else - match find_token @@ Seq.cons x seq with - | None -> raise Token_not_found - | Some (t, seq) -> Seq.cons t (aux seq) + match find_token @@ cons x seq with + | None -> invalid_char x + | Some (t, seq) -> cons t (aux seq) in aux seq diff --git a/main.ml b/main.ml index f5ede32..16866f0 100644 --- a/main.ml +++ b/main.ml @@ -1,13 +1,11 @@ open Printf -exception Reset_line (* used to indicate ^C is pressed *) - let version = "%%VERSION%%" let error_to_string e = try raise e with - | Lex.Token_not_found -> sprintf "invalid token" - | Lex.Unclosed_quote -> sprintf "string not closed" + | Lex.Invalid_character c -> sprintf "invalid character %c" c + | Lex.Expected c -> sprintf "expected %c" c | Parser.Expected t -> sprintf "expected %s" t | Parser.Unexpected_token t -> sprintf "unexpected token \"%s\"" t | Ast.Invalid_type t -> sprintf "invalid type %s" (Ast.Type.to_string t) @@ -38,16 +36,18 @@ let rep vars : unit = Hashtbl.replace vars "ans" v; printf "%s\n" @@ Ast.Value.to_string v +exception Reset_line (* used to indicate ^C is pressed *) + let init_repl () = Hashtbl.replace vars "ans" (Ast.Value.Int 0); (* treat Ctrl-C as to reset line *) - let sigintf _ = raise Reset_line in - Sys.(set_signal sigint (Signal_handle sigintf)) + let reset_line _ = raise Reset_line in + Sys.(set_signal sigint (Signal_handle reset_line)) (* simple REPL with error handling *) let rec repl vars : unit = try rep vars; repl vars with - | Exit | End_of_file -> () + | Exit | End_of_file (* Ctrl-D *) -> () | Reset_line -> printf "\n"; repl vars | e -> print_error e; repl vars