refactor: extract token
This commit is contained in:
parent
5ab22e2f68
commit
9569b20542
3 changed files with 167 additions and 112 deletions
107
lib/lexer.ml
107
lib/lexer.ml
|
@ -1,51 +1,3 @@
|
||||||
(* small set of ml *)
|
|
||||||
|
|
||||||
type op_type =
|
|
||||||
| Add
|
|
||||||
| Sub
|
|
||||||
| Mul
|
|
||||||
| Div
|
|
||||||
| Mod
|
|
||||||
| Pow
|
|
||||||
|
|
||||||
let op2str op =
|
|
||||||
match op with
|
|
||||||
| Add -> "+"
|
|
||||||
| Sub -> "-"
|
|
||||||
| Mul -> "*"
|
|
||||||
| Div -> "/"
|
|
||||||
| Mod -> "%"
|
|
||||||
| Pow -> "^"
|
|
||||||
|
|
||||||
type keyword_type =
|
|
||||||
| Let
|
|
||||||
| In
|
|
||||||
| If
|
|
||||||
| Then
|
|
||||||
| Else
|
|
||||||
| Fun
|
|
||||||
|
|
||||||
type token_type =
|
|
||||||
| Eof
|
|
||||||
| Identifier of string
|
|
||||||
| Digit of string
|
|
||||||
| Op of op_type
|
|
||||||
| LParen
|
|
||||||
| RParen
|
|
||||||
| Equal
|
|
||||||
| Arrow
|
|
||||||
| Keyword of keyword_type
|
|
||||||
| Comment of string
|
|
||||||
| Fail of string
|
|
||||||
|
|
||||||
|
|
||||||
type token = {
|
|
||||||
(* token type *)
|
|
||||||
token_type: token_type;
|
|
||||||
(* start position *)
|
|
||||||
pos: int;
|
|
||||||
}
|
|
||||||
|
|
||||||
let epsilon = '\000'
|
let epsilon = '\000'
|
||||||
|
|
||||||
(* Lexer is just state machine *)
|
(* Lexer is just state machine *)
|
||||||
|
@ -54,8 +6,6 @@ let is_digit c = c >= '0' && c <= '9'
|
||||||
let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||||
let is_alnum c = is_alpha c || is_digit c
|
let is_alnum c = is_alpha c || is_digit c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
type lexer_context = {
|
type lexer_context = {
|
||||||
total: string;
|
total: string;
|
||||||
pos: int;
|
pos: int;
|
||||||
|
@ -189,36 +139,33 @@ let%test "test get_digit" =
|
||||||
id = "123" && ctx'.pos = 3
|
id = "123" && ctx'.pos = 3
|
||||||
|
|
||||||
let id_to_token_type id =
|
let id_to_token_type id =
|
||||||
match id with
|
match (Token.str2keyword id) with
|
||||||
| "let" -> Keyword Let
|
| Some keyword -> keyword
|
||||||
| "in" -> Keyword In
|
| None -> Token.Identifier id
|
||||||
| "if" -> Keyword If
|
|
||||||
| "then" -> Keyword Then
|
|
||||||
| "else" -> Keyword Else
|
|
||||||
| "fun" -> Keyword Fun
|
|
||||||
| _ -> Identifier id
|
|
||||||
|
|
||||||
let lex_token (ctx: lexer_context) =
|
let lex_token (ctx: lexer_context) =
|
||||||
let make_token token_type pos = {token_type = token_type; pos = pos} in
|
let make_token token_type pos = {Token.token_type = token_type; pos = pos} in
|
||||||
let ctx = skip_spaces ctx in
|
let ctx = skip_spaces ctx in
|
||||||
let first_ch = input_first ctx in
|
let first_ch = input_first ctx in
|
||||||
let pos = ctx.pos in
|
let pos = ctx.pos in
|
||||||
|
let rest = input_rest ctx in
|
||||||
match first_ch with
|
match first_ch with
|
||||||
| '\000' -> {token_type = Eof; pos = pos}, ctx
|
| '\000' -> {Token.token_type = Eof; pos = pos}, ctx
|
||||||
| '(' -> make_token LParen pos, input_rest ctx
|
| '(' -> make_token LParen pos, rest
|
||||||
| ')' -> make_token RParen pos, input_rest ctx
|
| ')' -> make_token RParen pos, rest
|
||||||
| '=' -> make_token Equal pos, input_rest ctx
|
| '=' -> make_token Equal pos, rest
|
||||||
| '+' -> make_token (Op Add) pos, input_rest ctx
|
| '+' -> make_token Add pos, rest
|
||||||
| '-' ->
|
| '-' ->
|
||||||
let second_ch = input_first (input_rest ctx) in
|
let second_ch = input_first (rest) in
|
||||||
if second_ch = '>' then
|
if second_ch = '>' then
|
||||||
make_token Arrow pos, input_rest (input_rest ctx)
|
make_token Arrow pos, input_rest rest
|
||||||
else
|
else
|
||||||
make_token (Op Sub) pos, input_rest ctx
|
make_token Sub pos, rest
|
||||||
| '*' -> make_token (Op Mul) pos, input_rest ctx
|
| '*' -> make_token Mul pos, rest
|
||||||
| '/' ->
|
| '/' ->
|
||||||
(* check comment *)
|
(* check comment *)
|
||||||
let second_ch = input_first (input_rest ctx) in
|
let second_ch = input_first (rest) in
|
||||||
if second_ch = '/' then
|
if second_ch = '/' then
|
||||||
let rec aux ctx =
|
let rec aux ctx =
|
||||||
let ch = input_first ctx in
|
let ch = input_first ctx in
|
||||||
|
@ -231,30 +178,30 @@ let lex_token (ctx: lexer_context) =
|
||||||
let comment = String.sub ctx.total pos len in
|
let comment = String.sub ctx.total pos len in
|
||||||
make_token (Comment comment) pos, ctx
|
make_token (Comment comment) pos, ctx
|
||||||
else
|
else
|
||||||
make_token (Op Div) pos, input_rest ctx
|
make_token (Div) pos, rest
|
||||||
| '%' -> make_token (Op Mod) pos, input_rest ctx
|
| '%' -> make_token (Mod) pos, rest
|
||||||
| '^' -> make_token (Op Pow) pos, input_rest ctx
|
| '^' -> make_token (Pow) pos, rest
|
||||||
| c when is_alpha c ->
|
| c when is_alpha c ->
|
||||||
let id, ctx = get_identifier ctx in
|
let id, ctx = get_identifier ctx in
|
||||||
make_token (id_to_token_type id) pos, ctx
|
make_token (id_to_token_type id) pos, ctx
|
||||||
| c when is_digit c ->
|
| c when is_digit c ->
|
||||||
let id, ctx = get_digits ctx in
|
let id, ctx = get_digits ctx in
|
||||||
make_token (Digit id) pos, ctx
|
make_token (Digit id) pos, ctx
|
||||||
| _ -> make_token (Fail "invalid token") pos, input_rest ctx
|
| _ -> make_token (Fail "invalid token") pos, rest
|
||||||
|
|
||||||
let%test "test lex_token 1" =
|
let%test "test lex_token 1" =
|
||||||
let ctx = {total = "let"; pos = 0; line_pos = [||]} in
|
let ctx = {total = "let"; pos = 0; line_pos = [||]} in
|
||||||
let token, ctx' = lex_token ctx in
|
let token, ctx' = lex_token ctx in
|
||||||
token.token_type = Keyword Let && token.pos = 0 && ctx'.pos = 3
|
token.token_type = Let && token.pos = 0 && ctx'.pos = 3
|
||||||
|
|
||||||
let%test "test lex_token 2" =
|
let%test "test lex_token 2" =
|
||||||
let ctx = {total = "let in"; pos = 0; line_pos = [||]} in
|
let ctx = {total = "let in"; pos = 0; line_pos = [||]} in
|
||||||
let token, ctx' = lex_token ctx in
|
let token, ctx' = lex_token ctx in
|
||||||
let token', ctx'' = lex_token ctx' in
|
let token', ctx'' = lex_token ctx' in
|
||||||
token.token_type = Keyword Let && token.pos = 0 && ctx'.pos = 3 &&
|
token.token_type = Let && token.pos = 0 && ctx'.pos = 3 &&
|
||||||
token'.token_type = Keyword In && token'.pos = 4 && ctx''.pos = 6
|
token'.token_type = In && token'.pos = 4 && ctx''.pos = 6
|
||||||
|
|
||||||
let lex_tokens_seq (total: string): (token * lexer_context) Seq.t =
|
let lex_tokens_seq (total: string): (Token.t * lexer_context) Seq.t =
|
||||||
let rec aux ctx =
|
let rec aux ctx =
|
||||||
let token, next_ctx = lex_token ctx in
|
let token, next_ctx = lex_token ctx in
|
||||||
if token.token_type = Eof then
|
if token.token_type = Eof then
|
||||||
|
@ -269,8 +216,8 @@ let%test "test lex_tokens_seq" =
|
||||||
let seq = seq |> Seq.map (fun (token, _) -> token) in
|
let seq = seq |> Seq.map (fun (token, _) -> token) in
|
||||||
let tokens = List.of_seq seq in
|
let tokens = List.of_seq seq in
|
||||||
let expected = [
|
let expected = [
|
||||||
{token_type = Keyword Let; pos = 0};
|
{Token.token_type = Let; pos = 0};
|
||||||
{token_type = Keyword In; pos = 4};
|
{Token.token_type = In; pos = 4};
|
||||||
{token_type = Eof; pos = 6}
|
{Token.token_type = Eof; pos = 6}
|
||||||
] in
|
] in
|
||||||
tokens = expected
|
tokens = expected
|
||||||
|
|
104
lib/parser.ml
104
lib/parser.ml
|
@ -1,7 +1,7 @@
|
||||||
open Lexer
|
open Lexer
|
||||||
|
|
||||||
type parser_context = {
|
type parser_context = {
|
||||||
seq: (Lexer.token * Lexer.lexer_context) Seq.t;
|
seq: (Token.t * Lexer.lexer_context) Seq.t;
|
||||||
errors: string list;
|
errors: string list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,15 +32,15 @@ let or_parser (a: 'a parser) (b: 'a parser): 'a parser = fun (ctx: parser_contex
|
||||||
|
|
||||||
let (<|>) = or_parser
|
let (<|>) = or_parser
|
||||||
|
|
||||||
let peek_token: token parser = fun (ctx: parser_context) ->
|
let peek_token: Token.t parser = fun (ctx: parser_context) ->
|
||||||
Seq.uncons ctx.seq |> Option.map (fun ((t, _),_) -> (t,ctx))
|
Seq.uncons ctx.seq |> Option.map (fun ((t, _),_) -> (t,ctx))
|
||||||
|
|
||||||
let next_token: token parser = fun (ctx: parser_context) ->
|
let next_token: Token.t parser = fun (ctx: parser_context) ->
|
||||||
Seq.uncons ctx.seq |> Option.map (fun ((t,_), s) -> (t,
|
Seq.uncons ctx.seq |> Option.map (fun ((t,_), s) -> (t,
|
||||||
{ ctx with seq = s}
|
{ ctx with seq = s}
|
||||||
))
|
))
|
||||||
|
|
||||||
let match_token (tt: token_type) : token parser =
|
let match_token (tt: Token.token_type) : Token.t parser =
|
||||||
let* t = next_token in
|
let* t = next_token in
|
||||||
if t.token_type = tt then
|
if t.token_type = tt then
|
||||||
return t
|
return t
|
||||||
|
@ -79,6 +79,36 @@ BNF:
|
||||||
expr ::= let_expr | fun_expr | if_expr | level3
|
expr ::= let_expr | fun_expr | if_expr | level3
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
type bin_op_type =
|
||||||
|
| Add
|
||||||
|
| Sub
|
||||||
|
| Mul
|
||||||
|
| Div
|
||||||
|
| Mod
|
||||||
|
| Pow
|
||||||
|
|
||||||
|
let token2op (t: Token.token_type): bin_op_type option =
|
||||||
|
match t with
|
||||||
|
| Token.Add -> Some Add
|
||||||
|
| Token.Sub -> Some Sub
|
||||||
|
| Token.Mul -> Some Mul
|
||||||
|
| Token.Div -> Some Div
|
||||||
|
| Token.Mod -> Some Mod
|
||||||
|
| Token.Pow -> Some Pow
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
|
let op2str (op: bin_op_type): string =
|
||||||
|
match op with
|
||||||
|
| Add -> "+"
|
||||||
|
| Sub -> "-"
|
||||||
|
| Mul -> "*"
|
||||||
|
| Div -> "/"
|
||||||
|
| Mod -> "%"
|
||||||
|
| Pow -> "^"
|
||||||
|
|
||||||
|
type mono_op_type =
|
||||||
|
| Neg
|
||||||
|
|
||||||
type let_expr_tree = Let of string * expr_tree * expr_tree
|
type let_expr_tree = Let of string * expr_tree * expr_tree
|
||||||
and fun_expr_tree = Fun of string * expr_tree
|
and fun_expr_tree = Fun of string * expr_tree
|
||||||
and if_expr_tree = If of expr_tree * expr_tree * expr_tree
|
and if_expr_tree = If of expr_tree * expr_tree * expr_tree
|
||||||
|
@ -88,8 +118,8 @@ and expr_tree =
|
||||||
| FunExpr of fun_expr_tree
|
| FunExpr of fun_expr_tree
|
||||||
| IfExpr of if_expr_tree
|
| IfExpr of if_expr_tree
|
||||||
| CallExpr of call_expr_tree
|
| CallExpr of call_expr_tree
|
||||||
| BinOpExpr of Lexer.op_type * expr_tree * expr_tree
|
| BinOpExpr of bin_op_type * expr_tree * expr_tree
|
||||||
| MonoOpExpr of Lexer.op_type * expr_tree
|
| MonoOpExpr of bin_op_type * expr_tree
|
||||||
| Identifier of string
|
| Identifier of string
|
||||||
| Number of int
|
| Number of int
|
||||||
|
|
||||||
|
@ -101,55 +131,55 @@ let expr2str (e: expr_tree): string =
|
||||||
| FunExpr (Fun (id, e)) -> Printf.sprintf "fun %s ->\n%s%s" id (tab depth) (aux e (depth+1))
|
| FunExpr (Fun (id, e)) -> Printf.sprintf "fun %s ->\n%s%s" id (tab depth) (aux e (depth+1))
|
||||||
| IfExpr (If (e1, e2, e3)) -> Printf.sprintf "if %s then\n%s%selse\n%s%s" (aux e1 depth) (tab depth) (aux e2 depth) (tab depth) (aux e3 depth)
|
| IfExpr (If (e1, e2, e3)) -> Printf.sprintf "if %s then\n%s%selse\n%s%s" (aux e1 depth) (tab depth) (aux e2 depth) (tab depth) (aux e3 depth)
|
||||||
| CallExpr (Call (e1, e2)) -> Printf.sprintf "%s %s" (aux e1 depth) (aux e2 depth)
|
| CallExpr (Call (e1, e2)) -> Printf.sprintf "%s %s" (aux e1 depth) (aux e2 depth)
|
||||||
| BinOpExpr (op, e1, e2) -> Printf.sprintf "%s %s %s" (aux e1 depth) (Lexer.op2str op) (aux e2 depth)
|
| BinOpExpr (op, e1, e2) -> Printf.sprintf "%s %s %s" (aux e1 depth) (op2str op) (aux e2 depth)
|
||||||
| MonoOpExpr (op, e) -> Printf.sprintf "%s %s" (Lexer.op2str op) (aux e depth)
|
| MonoOpExpr (op, e) -> Printf.sprintf "%s %s" (op2str op) (aux e depth)
|
||||||
| Identifier id -> id
|
| Identifier id -> id
|
||||||
| Number n -> string_of_int n in
|
| Number n -> string_of_int n in
|
||||||
aux e 0
|
aux e 0
|
||||||
|
|
||||||
let rec parse_let_expr (): let_expr_tree parser =
|
let rec parse_let_expr (): let_expr_tree parser =
|
||||||
let* _ = match_token (Lexer.Keyword Lexer.Let) in
|
let* _ = match_token ( Token.Let) in
|
||||||
let* tt = next_token in
|
let* tt = next_token in
|
||||||
match tt.token_type with
|
match tt.token_type with
|
||||||
Lexer.Identifier(x) ->
|
Token.Identifier(x) ->
|
||||||
let id = x in
|
let id = x in
|
||||||
let* _ = match_token Lexer.Equal in
|
let* _ = match_token Token.Equal in
|
||||||
let* e1 = expr() in
|
let* e1 = expr() in
|
||||||
let* _ = match_token (Lexer.Keyword Lexer.In) in
|
let* _ = match_token (Token.In) in
|
||||||
let* e2 = expr() in
|
let* e2 = expr() in
|
||||||
return (Let (id, e1, e2))
|
return (Let (id, e1, e2))
|
||||||
| _ -> stop
|
| _ -> stop
|
||||||
and parse_fun_expr (): fun_expr_tree parser =
|
and parse_fun_expr (): fun_expr_tree parser =
|
||||||
let* _ = match_token (Lexer.Keyword Lexer.Fun) in
|
let* _ = match_token (Token.Fun) in
|
||||||
let* tt = next_token in
|
let* tt = next_token in
|
||||||
match tt.token_type with
|
match tt.token_type with
|
||||||
Lexer.Identifier(x) ->
|
Token.Identifier(x) ->
|
||||||
let id = x in
|
let id = x in
|
||||||
let* _ = match_token Lexer.Arrow in
|
let* _ = match_token Token.Arrow in
|
||||||
let* e = expr() in
|
let* e = expr() in
|
||||||
return (Fun (id, e))
|
return (Fun (id, e))
|
||||||
| _ -> stop
|
| _ -> stop
|
||||||
and parse_if_expr (): if_expr_tree parser =
|
and parse_if_expr (): if_expr_tree parser =
|
||||||
let* _ = match_token (Lexer.Keyword Lexer.If) in
|
let* _ = match_token (Token.If) in
|
||||||
let* e1 = expr() in
|
let* e1 = expr() in
|
||||||
let* _ = match_token (Lexer.Keyword Lexer.Then) in
|
let* _ = match_token (Token.Then) in
|
||||||
let* e2 = expr() in
|
let* e2 = expr() in
|
||||||
let* _ = match_token (Lexer.Keyword Lexer.Else) in
|
let* _ = match_token (Token.Else) in
|
||||||
let* e3 = expr() in
|
let* e3 = expr() in
|
||||||
return (If (e1, e2, e3))
|
return (If (e1, e2, e3))
|
||||||
and parse_factor (): expr_tree parser =
|
and parse_factor (): expr_tree parser =
|
||||||
let* tt = peek_token in
|
let* tt = peek_token in
|
||||||
match tt.token_type with
|
match tt.token_type with
|
||||||
| Lexer.Identifier x ->
|
| Token.Identifier x ->
|
||||||
let* _ = next_token in
|
let* _ = next_token in
|
||||||
return (Identifier x)
|
return (Identifier x)
|
||||||
| Lexer.Digit x ->
|
| Token.Digit x ->
|
||||||
let* _ = next_token in
|
let* _ = next_token in
|
||||||
return (Number (int_of_string x))
|
return (Number (int_of_string x))
|
||||||
| Lexer.LParen ->
|
| Token.LParen ->
|
||||||
let* _ = match_token Lexer.LParen in
|
let* _ = match_token Token.LParen in
|
||||||
let* e = expr() in
|
let* e = expr() in
|
||||||
let* _ = match_token Lexer.RParen in
|
let* _ = match_token Token.RParen in
|
||||||
return e
|
return e
|
||||||
| _ -> stop
|
| _ -> stop
|
||||||
and parse_call_expr (): expr_tree parser =
|
and parse_call_expr (): expr_tree parser =
|
||||||
|
@ -157,7 +187,7 @@ and parse_call_expr (): expr_tree parser =
|
||||||
let rec aux e1 =
|
let rec aux e1 =
|
||||||
let* c = peek_token in
|
let* c = peek_token in
|
||||||
match c.token_type with
|
match c.token_type with
|
||||||
| Lexer.Identifier _ | Lexer.Digit _ | Lexer.LParen ->
|
| Token.Identifier _ | Token.Digit _ | Token.LParen ->
|
||||||
let* e2 = parse_factor() in
|
let* e2 = parse_factor() in
|
||||||
aux (CallExpr (Call (e1, e2)))
|
aux (CallExpr (Call (e1, e2)))
|
||||||
| _ -> return e1 in
|
| _ -> return e1 in
|
||||||
|
@ -166,10 +196,14 @@ and parse_level1 (): expr_tree parser =
|
||||||
let* e1 = parse_call_expr() in
|
let* e1 = parse_call_expr() in
|
||||||
let rec aux e1 =
|
let rec aux e1 =
|
||||||
let* c = peek_token in
|
let* c = peek_token in
|
||||||
match c.token_type with
|
let tt = c.token_type in
|
||||||
| Lexer.Op op when op = Lexer.Add || op = Lexer.Sub ->
|
match tt with
|
||||||
|
| Token.Add | Token.Sub ->
|
||||||
let* _ = next_token in
|
let* _ = next_token in
|
||||||
let* e2 = parse_call_expr() in
|
let* e2 = parse_call_expr() in
|
||||||
|
let op = match token2op tt with
|
||||||
|
| Some x -> x
|
||||||
|
| None -> failwith "unreachable" in
|
||||||
aux (BinOpExpr (op, e1, e2))
|
aux (BinOpExpr (op, e1, e2))
|
||||||
| _ -> return e1 in
|
| _ -> return e1 in
|
||||||
aux e1
|
aux e1
|
||||||
|
@ -178,9 +212,12 @@ and parse_level2 (): expr_tree parser =
|
||||||
let rec aux e1 =
|
let rec aux e1 =
|
||||||
let* c = peek_token in
|
let* c = peek_token in
|
||||||
match c.token_type with
|
match c.token_type with
|
||||||
| Lexer.Op op when op = Lexer.Mul || op = Lexer.Div || op = Lexer.Mod ->
|
| Token.Mul | Token.Div | Token.Mod ->
|
||||||
let* _ = next_token in
|
let* _ = next_token in
|
||||||
let* e2 = parse_level1() in
|
let* e2 = parse_level1() in
|
||||||
|
let op = match token2op c.token_type with
|
||||||
|
| Some x -> x
|
||||||
|
| None -> failwith "unreachable" in
|
||||||
aux (BinOpExpr (op, e1, e2))
|
aux (BinOpExpr (op, e1, e2))
|
||||||
| _ -> return e1 in
|
| _ -> return e1 in
|
||||||
aux e1
|
aux e1
|
||||||
|
@ -189,9 +226,12 @@ and parse_level3 (): expr_tree parser =
|
||||||
let rec aux e1 =
|
let rec aux e1 =
|
||||||
let* c = peek_token in
|
let* c = peek_token in
|
||||||
match c.token_type with
|
match c.token_type with
|
||||||
| Lexer.Op op when op = Lexer.Pow ->
|
| Token.Pow ->
|
||||||
let* _ = next_token in
|
let* _ = next_token in
|
||||||
let* e2 = parse_level3() in
|
let* e2 = parse_level3() in
|
||||||
|
let op = match token2op c.token_type with
|
||||||
|
| Some x -> x
|
||||||
|
| None -> failwith "unreachable" in
|
||||||
aux (BinOpExpr (op, e1, e2))
|
aux (BinOpExpr (op, e1, e2))
|
||||||
| _ -> return e1 in
|
| _ -> return e1 in
|
||||||
aux e1
|
aux e1
|
||||||
|
@ -201,10 +241,10 @@ and expr (): expr_tree parser =
|
||||||
(parse_if_expr() |> fmap (fun x -> IfExpr x)) <|> parse_level3() in
|
(parse_if_expr() |> fmap (fun x -> IfExpr x)) <|> parse_level3() in
|
||||||
return e
|
return e
|
||||||
|
|
||||||
let get_expr_tree_from_tokens (tokens: (Lexer.token * Lexer.lexer_context) Seq.t): expr_tree option =
|
let get_expr_tree_from_tokens (tokens: (Token.t * Lexer.lexer_context) Seq.t): expr_tree option =
|
||||||
let ntokens = Seq.filter (fun (x,_) ->
|
let ntokens = Seq.filter (fun ((token,_): Token.t * Lexer.lexer_context) ->
|
||||||
match x.token_type with
|
match token.Token.token_type with
|
||||||
| Lexer.Comment(_) -> false
|
| Token.Comment(_) -> false
|
||||||
| _ -> true
|
| _ -> true
|
||||||
) tokens in
|
) tokens in
|
||||||
let ctx = { seq = ntokens; errors = [] } in
|
let ctx = { seq = ntokens; errors = [] } in
|
||||||
|
|
68
lib/token.ml
Normal file
68
lib/token.ml
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
|
||||||
|
type token_type =
|
||||||
|
| Eof
|
||||||
|
| Identifier of string
|
||||||
|
| Digit of string
|
||||||
|
| Add
|
||||||
|
| Sub
|
||||||
|
| Mul
|
||||||
|
| Div
|
||||||
|
| Mod
|
||||||
|
| Pow
|
||||||
|
| LParen
|
||||||
|
| RParen
|
||||||
|
| Equal
|
||||||
|
| Arrow
|
||||||
|
| Colon
|
||||||
|
| Let
|
||||||
|
| In
|
||||||
|
| If
|
||||||
|
| Then
|
||||||
|
| Else
|
||||||
|
| Fun
|
||||||
|
| Comment of string
|
||||||
|
| Fail of string
|
||||||
|
|
||||||
|
let op = [
|
||||||
|
Add, "+";
|
||||||
|
Sub, "-";
|
||||||
|
Mul, "*";
|
||||||
|
Div, "/";
|
||||||
|
Mod, "%";
|
||||||
|
Pow, "^";
|
||||||
|
] |> List.to_seq |> Hashtbl.of_seq
|
||||||
|
|
||||||
|
let op2str op = Hashtbl.find op
|
||||||
|
|
||||||
|
let symbol = [
|
||||||
|
LParen, "(";
|
||||||
|
RParen, ")";
|
||||||
|
Equal, "=";
|
||||||
|
Arrow, "->";
|
||||||
|
Colon, ":";
|
||||||
|
] |> List.to_seq |> Hashtbl.of_seq
|
||||||
|
|
||||||
|
|
||||||
|
let keywordTable = [
|
||||||
|
Let, "let";
|
||||||
|
In, "in";
|
||||||
|
If, "if";
|
||||||
|
Then, "then";
|
||||||
|
Else, "else";
|
||||||
|
Fun, "fun";
|
||||||
|
] |> List.to_seq |> Hashtbl.of_seq
|
||||||
|
|
||||||
|
let str2keywordTable = keywordTable
|
||||||
|
|> Hashtbl.to_seq
|
||||||
|
|> Seq.map (fun (x, y) -> (y, x))
|
||||||
|
|> Hashtbl.of_seq
|
||||||
|
|
||||||
|
let keyword2str keyword = Hashtbl.find keywordTable keyword
|
||||||
|
let str2keyword str = Hashtbl.find_opt str2keywordTable str
|
||||||
|
|
||||||
|
type t = {
|
||||||
|
(* token type *)
|
||||||
|
token_type: token_type;
|
||||||
|
(* start position *)
|
||||||
|
pos: int;
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue