Rewrite token lexer
This commit is contained in:
parent
dd2a1e160e
commit
7d6c833e58
2 changed files with 41 additions and 36 deletions
22
lex.ml
22
lex.ml
|
@ -1,5 +1,7 @@
|
||||||
type tokens = Token.t Seq.t
|
type tokens = Token.t Seq.t
|
||||||
|
|
||||||
|
exception Token_not_found
|
||||||
|
|
||||||
let either f g c =
|
let either f g c =
|
||||||
f c || g c
|
f c || g c
|
||||||
|
|
||||||
|
@ -42,18 +44,18 @@ let tokenize (str : string) : tokens =
|
||||||
| Seq.Nil -> Seq.empty
|
| Seq.Nil -> Seq.empty
|
||||||
| Seq.Cons (x, s) ->
|
| Seq.Cons (x, s) ->
|
||||||
if is_whitespace x then
|
if is_whitespace x then
|
||||||
aux s
|
aux s (* skip whitespace *)
|
||||||
else if is_digit x then
|
else if is_digit x then
|
||||||
let n, s = partition_while is_num s in
|
let n, s = partition_while is_num seq in
|
||||||
let n = String.of_seq @@ Seq.cons x n in
|
let n = int_of_string @@ String.of_seq n in
|
||||||
Seq.cons (of_string n) (aux s)
|
Seq.cons (Int n) (aux s)
|
||||||
else if is_ident_start x then
|
else if is_ident_start x then
|
||||||
begin
|
let id, s = partition_while is_ident seq in
|
||||||
let id, s = partition_while is_ident s in
|
let id = String.of_seq id in
|
||||||
let id = String.of_seq @@ Seq.cons x id in
|
Seq.cons (Ident id) (aux s)
|
||||||
Seq.cons (Ident id) (aux s)
|
|
||||||
end
|
|
||||||
else
|
else
|
||||||
Seq.cons (of_char x) (aux s)
|
match find_token seq with
|
||||||
|
| None -> raise Token_not_found
|
||||||
|
| Some (t, s) -> Seq.cons t (aux s)
|
||||||
in
|
in
|
||||||
aux seq
|
aux seq
|
||||||
|
|
55
token.ml
55
token.ml
|
@ -10,34 +10,37 @@ type t =
|
||||||
| LParen
|
| LParen
|
||||||
| RParen
|
| RParen
|
||||||
|
|
||||||
let of_char = function
|
let tokens = ref [
|
||||||
| '+' -> Plus
|
"+", Plus;
|
||||||
| '-' -> Minus
|
"-", Minus;
|
||||||
| '*' -> Asterisk
|
"*", Asterisk;
|
||||||
| '/' -> Slash
|
"/", Slash;
|
||||||
| '^' -> Carret
|
"^", Carret;
|
||||||
| '%' -> Percent
|
"%", Percent;
|
||||||
| '(' -> LParen
|
"(", LParen;
|
||||||
| ')' -> RParen
|
")", RParen;
|
||||||
| _ -> invalid_arg "Token.of_char"
|
]
|
||||||
|
|
||||||
let of_string str =
|
let expect_token str tok seq =
|
||||||
let fc = Char.code str.[0] in
|
let rec aux ts seq =
|
||||||
if Char.(code '0' <= fc && fc <= code '9') then
|
match ts (), seq () with
|
||||||
Int (int_of_string str)
|
| Seq.Nil, _ -> Some seq
|
||||||
else
|
| Seq.Cons _, Seq.Nil -> None
|
||||||
match str with
|
| Seq.Cons (a, ts), Seq.Cons (b, seq) ->
|
||||||
| _ when String.length str = 1 -> of_char str.[0]
|
if a = b then aux ts seq else None
|
||||||
| _ -> failwith "Token.of_string"
|
in
|
||||||
|
let str = String.to_seq str in
|
||||||
|
aux str seq |> Option.map (fun s -> tok, s)
|
||||||
|
|
||||||
|
let find_token seq =
|
||||||
|
!tokens |> List.find_map
|
||||||
|
(fun (s, t) -> expect_token s t seq)
|
||||||
|
|
||||||
let to_string = function
|
let to_string = function
|
||||||
| Int n -> string_of_int n
|
| Int n -> string_of_int n
|
||||||
| Ident s -> s
|
| Ident s -> s
|
||||||
| Plus -> "+"
|
| t ->
|
||||||
| Minus -> "-"
|
begin match List.find_opt (fun (_, tok) -> t = tok) !tokens with
|
||||||
| Asterisk -> "*"
|
| None -> failwith "Token.to_string"
|
||||||
| Slash -> "/"
|
| Some (s, _) -> s
|
||||||
| Carret -> "^"
|
end
|
||||||
| Percent -> "%"
|
|
||||||
| LParen -> "("
|
|
||||||
| RParen -> ")"
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue