small-set-of-ml/lib/parser.ml

221 lines
6.9 KiB
OCaml
Raw Normal View History

2025-01-29 17:56:00 +09:00
open Lexer
2025-01-29 17:17:22 +09:00
type parser_context = {
seq: Lexer.token Seq.t;
errors: string list;
2025-01-29 17:56:00 +09:00
}
2025-01-29 17:17:22 +09:00
(* The parser is a function that takes a parser_context and returns an option of a tuple of a value and a parser_context.*)
2025-01-29 17:56:00 +09:00
type 'a parser = parser_context -> ('a * parser_context) option
2025-01-29 17:17:22 +09:00
2025-01-29 17:56:00 +09:00
let return (a: 'a) = fun (ctx: parser_context) -> Some (a, ctx)
let stop = fun (_: parser_context) -> None
2025-01-29 17:17:22 +09:00
let fmap (f: 'a -> 'b) (p: 'a parser): 'b parser = fun (ctx: parser_context) ->
match p ctx with
| Some (a, ctx') -> Some (f a, ctx')
2025-01-29 17:56:00 +09:00
| None -> None
2025-01-29 17:17:22 +09:00
let bind (a: 'a parser) (b:'a -> 'b parser) = fun (ctx: parser_context) ->
let p = a ctx in
match p with
| Some (a', ctx') -> b a' ctx'
2025-01-29 17:56:00 +09:00
| None -> None
2025-01-29 17:17:22 +09:00
2025-01-29 17:56:00 +09:00
let (>>=) = bind
let (let*) = bind
2025-01-29 17:17:22 +09:00
let or_parser (a: 'a parser) (b: 'a parser): 'a parser = fun (ctx: parser_context) ->
match a ctx with
| Some _ as res -> res
2025-01-29 17:56:00 +09:00
| None -> b ctx
2025-01-29 17:17:22 +09:00
2025-01-29 17:56:00 +09:00
let (<|>) = or_parser
2025-01-29 17:17:22 +09:00
let peek_token: token parser = fun (ctx: parser_context) ->
2025-01-29 17:56:00 +09:00
Seq.uncons ctx.seq |> Option.map (fun (t,_) -> (t,ctx))
2025-01-29 17:17:22 +09:00
let next_token: token parser = fun (ctx: parser_context) ->
Seq.uncons ctx.seq |> Option.map (fun (t, s) -> (t,
{ ctx with seq = s}
2025-01-29 17:56:00 +09:00
))
2025-01-29 17:17:22 +09:00
let match_token (tt: token_type) : token parser =
let* t = next_token in
if t.token_type = tt then
return t
else
2025-01-29 17:56:00 +09:00
stop
2025-01-29 17:17:22 +09:00
let zero_or_one (p: 'a parser): ('a option) parser = fun (ctx) ->
match p ctx with
| Some (a, ctx') -> Some (Some a, ctx')
2025-01-29 17:56:00 +09:00
| None -> Some (None, ctx)
2025-01-29 17:17:22 +09:00
let rec many (p: 'a parser): 'a list parser =
let* a = zero_or_one p in
match a with
| Some a' -> (
let* as' = many p in
return (a'::as')
)
2025-01-29 17:56:00 +09:00
| None -> return []
2025-01-29 17:17:22 +09:00
let many1 (p: 'a parser): 'a list parser =
let* a = p in
let* as' = many p in
2025-01-29 17:56:00 +09:00
return (a::as')
2025-01-29 17:17:22 +09:00
(*
BNF:
let_expr ::= let identifier = expr in expr
fun_expr ::= fun identifier -> expr
if_expr ::= if expr then expr else expr
2025-01-29 18:07:47 +09:00
factor ::= (expr) | identifier | number
call_expr ::= factor | factor factor
level1 ::= call_expr | level1 + call_expr | level1 - call_expr
2025-01-29 17:17:22 +09:00
level2 ::= level2 * level1 | level2 / level1 | level2 % level1 | level1
level3 ::= level2 ^ level3 | level2
expr ::= let_expr | fun_expr | if_expr | level3
*)
type let_expr_tree = Let of string * expr_tree * expr_tree
and fun_expr_tree = Fun of string * expr_tree
and if_expr_tree = If of expr_tree * expr_tree * expr_tree
2025-01-29 18:07:47 +09:00
and call_expr_tree = Call of expr_tree * expr_tree
2025-01-29 17:17:22 +09:00
and expr_tree =
| LetExpr of let_expr_tree
| FunExpr of fun_expr_tree
| IfExpr of if_expr_tree
2025-01-29 18:07:47 +09:00
| CallExpr of call_expr_tree
2025-01-29 17:17:22 +09:00
| BinOpExpr of Lexer.op_type * expr_tree * expr_tree
| MonoOpExpr of Lexer.op_type * expr_tree
| Identifier of string
2025-01-29 17:56:00 +09:00
| Number of int
2025-01-29 17:17:22 +09:00
let expr2str (e: expr_tree): string =
2025-01-29 20:44:36 +09:00
let tab n = String.make (n * 2) ' ' in
let rec aux e depth =
2025-01-29 17:17:22 +09:00
match e with
2025-01-29 20:44:36 +09:00
| LetExpr (Let (id, e1, e2)) -> Printf.sprintf "let %s = %s in\n%s%s" id (aux e1 depth) (tab depth) (aux e2 (depth+1))
| FunExpr (Fun (id, e)) -> Printf.sprintf "fun %s ->\n%s%s" id (tab depth) (aux e (depth+1))
| IfExpr (If (e1, e2, e3)) -> Printf.sprintf "if %s then\n%s%selse\n%s%s" (aux e1 depth) (tab depth) (aux e2 depth) (tab depth) (aux e3 depth)
| CallExpr (Call (e1, e2)) -> Printf.sprintf "%s %s" (aux e1 depth) (aux e2 depth)
| BinOpExpr (op, e1, e2) -> Printf.sprintf "%s %s %s" (aux e1 depth) (Lexer.op2str op) (aux e2 depth)
| MonoOpExpr (op, e) -> Printf.sprintf "%s %s" (Lexer.op2str op) (aux e depth)
2025-01-29 17:17:22 +09:00
| Identifier id -> id
| Number n -> string_of_int n in
2025-01-29 20:44:36 +09:00
aux e 0
2025-01-29 17:17:22 +09:00
let rec parse_let_expr (): let_expr_tree parser =
let* _ = match_token (Lexer.Keyword Lexer.Let) in
let* tt = next_token in
match tt.token_type with
Lexer.Identifier(x) ->
let id = x in
let* _ = match_token Lexer.Equal in
let* e1 = expr() in
let* _ = match_token (Lexer.Keyword Lexer.In) in
let* e2 = expr() in
return (Let (id, e1, e2))
| _ -> stop
and parse_fun_expr (): fun_expr_tree parser =
let* _ = match_token (Lexer.Keyword Lexer.Fun) in
let* tt = next_token in
match tt.token_type with
Lexer.Identifier(x) ->
let id = x in
let* _ = match_token Lexer.Arrow in
let* e = expr() in
return (Fun (id, e))
| _ -> stop
and parse_if_expr (): if_expr_tree parser =
let* _ = match_token (Lexer.Keyword Lexer.If) in
let* e1 = expr() in
let* _ = match_token (Lexer.Keyword Lexer.Then) in
let* e2 = expr() in
let* _ = match_token (Lexer.Keyword Lexer.Else) in
let* e3 = expr() in
return (If (e1, e2, e3))
2025-01-29 18:07:47 +09:00
and parse_factor (): expr_tree parser =
2025-01-29 17:17:22 +09:00
let* tt = peek_token in
match tt.token_type with
| Lexer.Identifier x ->
let* _ = next_token in
return (Identifier x)
| Lexer.Digit x ->
let* _ = next_token in
return (Number (int_of_string x))
| Lexer.LParen ->
let* _ = match_token Lexer.LParen in
let* e = expr() in
let* _ = match_token Lexer.RParen in
return e
| _ -> stop
2025-01-29 18:07:47 +09:00
and parse_call_expr (): expr_tree parser =
let* e1 = parse_factor() in
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Identifier _ | Lexer.Digit _ | Lexer.LParen ->
let* e2 = parse_factor() in
aux (CallExpr (Call (e1, e2)))
| _ -> return e1 in
aux e1
2025-01-29 17:17:22 +09:00
and parse_level1 (): expr_tree parser =
2025-01-29 18:07:47 +09:00
let* e1 = parse_call_expr() in
2025-01-29 17:17:22 +09:00
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Op op when op = Lexer.Add || op = Lexer.Sub ->
let* _ = next_token in
2025-01-29 18:07:47 +09:00
let* e2 = parse_call_expr() in
2025-01-29 17:17:22 +09:00
aux (BinOpExpr (op, e1, e2))
| _ -> return e1 in
aux e1
and parse_level2 (): expr_tree parser =
let* e1 = parse_level1() in
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Op op when op = Lexer.Mul || op = Lexer.Div || op = Lexer.Mod ->
let* _ = next_token in
let* e2 = parse_level1() in
aux (BinOpExpr (op, e1, e2))
| _ -> return e1 in
aux e1
and parse_level3 (): expr_tree parser =
let* e1 = parse_level2() in
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Op op when op = Lexer.Pow ->
let* _ = next_token in
let* e2 = parse_level3() in
aux (BinOpExpr (op, e1, e2))
| _ -> return e1 in
aux e1
and expr (): expr_tree parser =
let* e = (parse_let_expr() |> fmap (fun x -> LetExpr x)) <|>
(parse_fun_expr() |> fmap (fun x -> FunExpr x)) <|>
(parse_if_expr() |> fmap (fun x -> IfExpr x)) <|> parse_level3() in
2025-01-29 17:56:00 +09:00
return e
2025-01-29 17:17:22 +09:00
let get_expr_tree_from_tokens (tokens: Lexer.token Seq.t): expr_tree option =
let ntokens = Seq.filter (fun x ->
match x.token_type with
| Lexer.Comment(_) -> false
| _ -> true
) tokens in
let ctx = { seq = ntokens; errors = [] } in
match expr() ctx with
| Some (e, _) -> Some e
2025-01-29 17:56:00 +09:00
| None -> None
2025-01-29 17:17:22 +09:00
let%test "test get_expr_tree_from_tokens 1" =
2025-01-29 20:44:36 +09:00
let tokens = Lexer.lex_tokens_seq "let x = 1 in\n x" in
2025-01-29 17:17:22 +09:00
let tokens = tokens |> Seq.map (fun (x,_) -> x) in
match get_expr_tree_from_tokens tokens with
2025-01-29 20:44:36 +09:00
| Some e -> expr2str e = "let x = 1 in\n x"
2025-01-29 17:56:00 +09:00
| None -> false