From 9da77686e36009327bbf635a464f77de87c8d9f6 Mon Sep 17 00:00:00 2001 From: monoid Date: Wed, 29 Jan 2025 17:17:22 +0900 Subject: [PATCH] initial commit --- bin/dune | 6 + bin/main.ml | 12 ++ calc.opam | 31 ++++++ dune-project | 26 +++++ lib/dune | 6 + lib/eval.ml | 101 +++++++++++++++++ lib/lexer.ml | 276 ++++++++++++++++++++++++++++++++++++++++++++++ lib/parser.ml | 205 ++++++++++++++++++++++++++++++++++ test/dune | 2 + test/test_calc.ml | 0 10 files changed, 665 insertions(+) create mode 100644 bin/dune create mode 100644 bin/main.ml create mode 100644 calc.opam create mode 100644 dune-project create mode 100644 lib/dune create mode 100644 lib/eval.ml create mode 100644 lib/lexer.ml create mode 100644 lib/parser.ml create mode 100644 test/dune create mode 100644 test/test_calc.ml diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..6f06be9 --- /dev/null +++ b/bin/dune @@ -0,0 +1,6 @@ +(executable + (public_name calc) + (name main) + (libraries calc) + (flags (:standard -w -32)) + ) diff --git a/bin/main.ml b/bin/main.ml new file mode 100644 index 0000000..3576804 --- /dev/null +++ b/bin/main.ml @@ -0,0 +1,12 @@ +open Calc;; + +let main () = + let input = Sys.argv.(1) in + let _ = Printf.printf "input: %s\n" input in + let result = Eval.eval_str input in + match result with + | Eval.Int n -> Printf.printf "%d\n" n + | _ -> failwith "Type error" +;; + +main ();; \ No newline at end of file diff --git a/calc.opam b/calc.opam new file mode 100644 index 0000000..aed4d6e --- /dev/null +++ b/calc.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +maintainer: ["Maintainer Name "] +authors: ["Author Name "] +license: "LICENSE" +tags: ["add topics" "to describe" "your" "project"] +homepage: "https://github.com/username/reponame" +doc: "https://url/to/documentation" +bug-reports: "https://github.com/username/reponame/issues" +depends: [ + "dune" {>= "3.17"} + "ocaml" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/username/reponame.git" diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..b15bb92 --- /dev/null +++ b/dune-project @@ -0,0 +1,26 @@ +(lang dune 3.17) + +(name calc) + +(generate_opam_files true) + +(source + (github username/reponame)) + +(authors "Author Name ") + +(maintainers "Maintainer Name ") + +(license LICENSE) + +(documentation https://url/to/documentation) + +(package + (name calc) + (synopsis "A short synopsis") + (description "A longer description") + (depends ocaml) + (tags + ("add topics" "to describe" your project))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/reference/dune-project/index.html diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..c6e8515 --- /dev/null +++ b/lib/dune @@ -0,0 +1,6 @@ +(library + (name calc) + (inline_tests) + (preprocess (pps ppx_inline_test)) + (flags (:standard -w -32)) +) diff --git a/lib/eval.ml b/lib/eval.ml new file mode 100644 index 0000000..bd10b31 --- /dev/null +++ b/lib/eval.ml @@ -0,0 +1,101 @@ + + +module VariableBindingMap = Map.Make(String);; + + +type value_type = +| Int of int +| Fun of function_type +and scope = { + parent: scope option; + bindings: value_type VariableBindingMap.t; +} +and function_type = { + name: string; + body: Parser.expr_tree; + scope: scope; +};; + +let rec eval_expr (scope: scope) (expr: Parser.expr_tree): value_type = + match expr with + | Parser.LetExpr (Parser.Let (name, value_expr, in_expr)) -> + eval_let_expr scope name value_expr in_expr + | Parser.FunExpr (Parser.Fun (name, body_expr)) -> + eval_fun_expr scope name body_expr + | Parser.IfExpr (Parser.If (cond_expr, then_expr, else_expr)) -> + eval_if_expr scope cond_expr then_expr else_expr + | Parser.BinOpExpr (op, left_expr, right_expr) -> + eval_bin_op_expr scope op left_expr right_expr + | Parser.MonoOpExpr (_op, _expr) -> + failwith "Not implemented" + | Parser.Identifier(name) -> + let rec find_binding scope = + match scope with + | None -> failwith "Unbound variable" + | Some s -> + match VariableBindingMap.find_opt name s.bindings with + | Some v -> v + | None -> find_binding s.parent in + find_binding (Some scope) + | Parser.Number(n) -> Int n +and eval_if_expr scope cond_expr then_expr else_expr = + let cond = eval_expr scope cond_expr in + (match cond with + | Int 0 -> eval_expr scope else_expr + | _ -> eval_expr scope then_expr) +and eval_let_expr scope name value_expr in_expr = + let value = eval_expr scope value_expr in + let new_scope = { scope with bindings = VariableBindingMap.add name value scope.bindings } in + eval_expr new_scope in_expr +and eval_fun_expr scope name body_expr = + Fun { name = name; body = body_expr; scope = scope } +and eval_bin_op_expr scope op left_expr right_expr = + let left = eval_expr scope left_expr in + let right = eval_expr scope right_expr in + (match op with + | Add -> ( + match (left, right) with + | (Int l, Int r) -> Int (l + r) + | _ -> failwith "Type error" + ) + | Sub -> ( + match (left, right) with + | (Int l, Int r) -> Int (l - r) + | _ -> failwith "Type error" + ) + | Mul -> ( + match (left, right) with + | (Int l, Int r) -> Int (l * r) + | _ -> failwith "Type error" + ) + | Div -> ( + match (left, right) with + | (Int l, Int r) -> Int (l / r) + | _ -> failwith "Type error" + ) + | Mod -> ( + match (left, right) with + | (Int l, Int r) -> Int (l mod r) + | _ -> failwith "Type error" + ) + | Pow -> ( + match (left, right) with + | (Int l, Int r) -> Int (int_of_float (float_of_int l ** float_of_int r)) + | _ -> failwith "Type error" + )) +;; + +let eval_str (str: string): value_type = + let tokens = Lexer.lex_tokens_seq str in + let tokens = tokens |> Seq.map (fun (x,_) -> x) in + let expr = Parser.get_expr_tree_from_tokens tokens in + match expr with + | Some e -> eval_expr { parent = None; bindings = VariableBindingMap.empty } e + | None -> failwith "Parse error" +;; + +let%test "test eval_str 1" = + let result = eval_str "let x = 1 in x" in + match result with + | Int n -> n = 1 + | _ -> false \ No newline at end of file diff --git a/lib/lexer.ml b/lib/lexer.ml new file mode 100644 index 0000000..d316437 --- /dev/null +++ b/lib/lexer.ml @@ -0,0 +1,276 @@ +(* small set of ml *) + +type op_type = + | Add + | Sub + | Mul + | Div + | Mod + | Pow + +let op2str op = + match op with + | Add -> "+" + | Sub -> "-" + | Mul -> "*" + | Div -> "/" + | Mod -> "%" + | Pow -> "^" + +type keyword_type = + | Let + | In + | If + | Then + | Else + | Fun + +type token_type = + | Eof + | Identifier of string + | Digit of string + | Op of op_type + | LParen + | RParen + | Equal + | Arrow + | Keyword of keyword_type + | Comment of string + | Fail of string +;; + +type token = { + (* token type *) + token_type: token_type; + (* start position *) + pos: int; +};; + +let epsilon = '\000';; + +(* Lexer is just state machine *) + +let is_digit c = c >= '0' && c <= '9' +let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +let is_alnum c = is_alpha c || is_digit c + + + +type lexer_context = { + total: string; + pos: int; + (* \n position array *) + line_pos: int array; +};; + +let binary_search_range arr x = + if Array.length arr = 0 then 0 + else + let rec aux low high = + match compare low high with + | 0 -> if arr.(low) >= x then low else low + 1 + (* unreachable *) + | c when c > 0 -> raise (Invalid_argument "binary_search_range") + | _ -> + let mid = (low + high) / 2 in + if arr.(mid) >= x && ( mid = 0 || arr.(mid - 1) < x) then mid + else if arr.(mid) < x then aux (mid + 1) high + else aux low (mid - 1) + in + aux 0 (Array.length arr - 1) +let get_line_and_col (line_pos: int array) (pos: int) = + let line_index = binary_search_range line_pos pos in + (* let _ = Printf.printf "line_index: %d\n" line_index in *) + let line_start_pos = if line_index > 0 then + (line_pos.(line_index - 1) + 1) else 0 in + (line_index + 1, pos - (line_start_pos) + 1);; + +let%test "test: get_line_and_col 1" = + let actual = get_line_and_col [|1; 2; 3; 4; 6|] 3 in + let expected = (3, 1) in + (* let _ = Printf.printf "(%d,%d)\n" (fst actual) (snd actual) in *) + actual = expected;; + +let%test "test: get_line_and_col 2" = + let actual = get_line_and_col [|1; 2; 3; 4; 6|] 10 in + let expected = (6, 4) in + (* let _ = Printf.printf "(%d,%d)\n" (fst actual) (snd actual) in *) + actual = expected;; + + +let input_first (ctx: lexer_context) = + if ctx.pos < String.length ctx.total then + ctx.total.[ctx.pos] + else + epsilon;; + +let%test "test first" = + let ctx = {total = "abc"; pos = 0; line_pos = [||]} in + input_first ctx = 'a';; + +let input_rest (ctx: lexer_context) = let ch = input_first ctx in + if ch = '\n' then + {ctx with pos = ctx.pos + 1; line_pos = Array.append ctx.line_pos [|ctx.pos|]} + else + {ctx with pos = ctx.pos + 1};; + +let%test "test rest" = + let ctx = {total = "abc"; pos = 0; line_pos = [||]} in + let ctx' = input_rest ctx in + ctx'.pos = 1 && ctx'.line_pos = [||];; + +let%test "test rest with new line" = + let ctx = {total = "a\nbc"; pos = 1; line_pos = [||]} in + let ctx' = input_rest ctx in + ctx'.pos = 2 && ctx'.line_pos = [|1|];; + +let%test "test rest with new line 2" = + let ctx = {total = "a\nb\nc"; pos = 3; line_pos = [|1|]} in + let ctx' = input_rest ctx in + (* let _ = Printf.printf "pos: %d, line_pos: %s" ctx'.pos (String.concat "," + (List.map string_of_int (Array.to_list ctx'.line_pos))) in *) + ctx'.pos = 4 && ctx'.line_pos = [|1; 3|];; + +let rec skip_spaces (ctx: lexer_context) = + let ch = input_first ctx in + if ch = ' ' || ch = '\t' || ch = '\n' then + skip_spaces (input_rest ctx) + else + ctx;; + +let%test "test skip_spaces" = + let ctx = {total = " \nabc"; pos = 0; line_pos = [||]} in + let ctx' = skip_spaces ctx in + (* let _ = Printf.printf "pos: %d, line_pos: %s" ctx'.pos (String.concat "," + (List.map string_of_int (Array.to_list ctx'.line_pos))) in *) + ctx'.pos = 3 && ctx'.line_pos = [|2|];; + +(* + 1. identifier: [a-zA-Z][a-zA-Z0-9]* + 2. digit: [0-9]+ + 3. operator: +, -, *, /, %, ^ + 4. keyword: let, in, if, then, else, fun + 5. (, ), = + 6. comment: //.* +*) + +let get_identifier (ctx: lexer_context) = + let rec aux ctx = + let ch = input_first ctx in + if is_alnum ch then + aux (input_rest ctx) + else + ctx in + let ctx' = aux ctx in + let len = ctx'.pos - ctx.pos in + let id = String.sub ctx'.total ctx.pos len in + id, ctx';; + +let%test "test get_identifier" = + let ctx = {total = "abc"; pos = 0; line_pos = [||]} in + let id, ctx' = get_identifier ctx in + id = "abc" && ctx'.pos = 3;; + +let get_digits (ctx: lexer_context) = + let rec aux ctx = + let ch = input_first ctx in + if is_digit ch then + aux (input_rest ctx) + else + ctx in + let ctx' = aux ctx in + let len = ctx'.pos - ctx.pos in + let id = String.sub ctx'.total ctx.pos len in + id, ctx';; + +let%test "test get_digit" = + let ctx = {total = "123"; pos = 0; line_pos = [||]} in + let id, ctx' = get_digits ctx in + id = "123" && ctx'.pos = 3;; + +let id_to_token_type id = + match id with + | "let" -> Keyword Let + | "in" -> Keyword In + | "if" -> Keyword If + | "then" -> Keyword Then + | "else" -> Keyword Else + | "fun" -> Keyword Fun + | _ -> Identifier id;; + +let lex_token (ctx: lexer_context) = + let make_token token_type pos = {token_type = token_type; pos = pos} in + let ctx = skip_spaces ctx in + let first_ch = input_first ctx in + let pos = ctx.pos in + match first_ch with + | '\000' -> {token_type = Eof; pos = pos}, ctx + | '(' -> make_token LParen pos, input_rest ctx + | ')' -> make_token RParen pos, input_rest ctx + | '=' -> make_token Equal pos, input_rest ctx + | '+' -> make_token (Op Add) pos, input_rest ctx + | '-' -> + let second_ch = input_first (input_rest ctx) in + if second_ch = '>' then + make_token Arrow pos, input_rest (input_rest ctx) + else + make_token (Op Sub) pos, input_rest ctx + | '*' -> make_token (Op Mul) pos, input_rest ctx + | '/' -> + (* check comment *) + let second_ch = input_first (input_rest ctx) in + if second_ch = '/' then + let rec aux ctx = + let ch = input_first ctx in + if ch = '\n' then + ctx + else + aux (input_rest ctx) in + let ctx = aux ctx in + let len = ctx.pos - pos in + let comment = String.sub ctx.total pos len in + make_token (Comment comment) pos, ctx + else + make_token (Op Div) pos, input_rest ctx + | '%' -> make_token (Op Mod) pos, input_rest ctx + | '^' -> make_token (Op Pow) pos, input_rest ctx + | c when is_alpha c -> + let id, ctx = get_identifier ctx in + make_token (id_to_token_type id) pos, ctx + | c when is_digit c -> + let id, ctx = get_digits ctx in + make_token (Digit id) pos, ctx + | _ -> make_token (Fail "invalid token") pos, input_rest ctx;; + +let%test "test lex_token 1" = + let ctx = {total = "let"; pos = 0; line_pos = [||]} in + let token, ctx' = lex_token ctx in + token.token_type = Keyword Let && token.pos = 0 && ctx'.pos = 3;; + +let%test "test lex_token 2" = + let ctx = {total = "let in"; pos = 0; line_pos = [||]} in + let token, ctx' = lex_token ctx in + let token', ctx'' = lex_token ctx' in + token.token_type = Keyword Let && token.pos = 0 && ctx'.pos = 3 && + token'.token_type = Keyword In && token'.pos = 4 && ctx''.pos = 6;; + +let lex_tokens_seq (total: string): (token * lexer_context) Seq.t = + let rec aux ctx = + let token, next_ctx = lex_token ctx in + if token.token_type = Eof then + Seq.Cons ((token, next_ctx), fun () -> Seq.Nil) + else + Seq.Cons ((token, next_ctx), fun () -> aux next_ctx) in + fun () -> aux {total = total; pos = 0; line_pos = [||]};; + +let%test "test lex_tokens_seq" = + let total = "let in" in + let seq = lex_tokens_seq total in + let seq = seq |> Seq.map (fun (token, _) -> token) in + let tokens = List.of_seq seq in + let expected = [ + {token_type = Keyword Let; pos = 0}; + {token_type = Keyword In; pos = 4}; + {token_type = Eof; pos = 6} + ] in + tokens = expected;; diff --git a/lib/parser.ml b/lib/parser.ml new file mode 100644 index 0000000..6f12499 --- /dev/null +++ b/lib/parser.ml @@ -0,0 +1,205 @@ +open Lexer;; + +type parser_context = { + seq: Lexer.token Seq.t; + errors: string list; +};; + +(* The parser is a function that takes a parser_context and returns an option of a tuple of a value and a parser_context.*) +type 'a parser = parser_context -> ('a * parser_context) option;; + +let return (a: 'a) = fun (ctx: parser_context) -> Some (a, ctx);; +let stop = fun (_: parser_context) -> None;; + +let fmap (f: 'a -> 'b) (p: 'a parser): 'b parser = fun (ctx: parser_context) -> + match p ctx with + | Some (a, ctx') -> Some (f a, ctx') + | None -> None;; + +let bind (a: 'a parser) (b:'a -> 'b parser) = fun (ctx: parser_context) -> + let p = a ctx in + match p with + | Some (a', ctx') -> b a' ctx' + | None -> None;; + +let (>>=) = bind;; +let (let*) = bind;; + +let or_parser (a: 'a parser) (b: 'a parser): 'a parser = fun (ctx: parser_context) -> + match a ctx with + | Some _ as res -> res + | None -> b ctx;; + +let (<|>) = or_parser;; + +let peek_token: token parser = fun (ctx: parser_context) -> + Seq.uncons ctx.seq |> Option.map (fun (t,_) -> (t,ctx));; + +let next_token: token parser = fun (ctx: parser_context) -> + Seq.uncons ctx.seq |> Option.map (fun (t, s) -> (t, + { ctx with seq = s} +));; + +let match_token (tt: token_type) : token parser = + let* t = next_token in + if t.token_type = tt then + return t + else + stop;; + +let zero_or_one (p: 'a parser): ('a option) parser = fun (ctx) -> + match p ctx with + | Some (a, ctx') -> Some (Some a, ctx') + | None -> Some (None, ctx);; + +let rec many (p: 'a parser): 'a list parser = + let* a = zero_or_one p in + match a with + | Some a' -> ( + let* as' = many p in + return (a'::as') + ) + | None -> return [];; + +let many1 (p: 'a parser): 'a list parser = + let* a = p in + let* as' = many p in + return (a::as');; + +(* +BNF: + let_expr ::= let identifier = expr in expr + fun_expr ::= fun identifier -> expr + if_expr ::= if expr then expr else expr + level0 ::= (expr) | identifier | number + level1 ::= level0 | level1 + level0 | level1 - level0 + level2 ::= level2 * level1 | level2 / level1 | level2 % level1 | level1 + level3 ::= level2 ^ level3 | level2 + expr ::= let_expr | fun_expr | if_expr | level3 +*) + +type let_expr_tree = Let of string * expr_tree * expr_tree +and fun_expr_tree = Fun of string * expr_tree +and if_expr_tree = If of expr_tree * expr_tree * expr_tree +and expr_tree = + | LetExpr of let_expr_tree + | FunExpr of fun_expr_tree + | IfExpr of if_expr_tree + | BinOpExpr of Lexer.op_type * expr_tree * expr_tree + | MonoOpExpr of Lexer.op_type * expr_tree + | Identifier of string + | Number of int;; + +let expr2str (e: expr_tree): string = + let rec aux e = + match e with + | LetExpr (Let (id, e1, e2)) -> Printf.sprintf "let %s = %s in\n %s" id (aux e1) (aux e2) + | FunExpr (Fun (id, e)) -> Printf.sprintf "fun %s -> %s" id (aux e) + | IfExpr (If (e1, e2, e3)) -> Printf.sprintf "if %s then %s else %s" (aux e1) (aux e2) (aux e3) + | BinOpExpr (op, e1, e2) -> Printf.sprintf "%s %s %s" (aux e1) (Lexer.op2str op) (aux e2) + | MonoOpExpr (op, e) -> Printf.sprintf "%s %s" (Lexer.op2str op) (aux e) + | Identifier id -> id + | Number n -> string_of_int n in + aux e;; + +let rec parse_let_expr (): let_expr_tree parser = + let* _ = match_token (Lexer.Keyword Lexer.Let) in + let* tt = next_token in + match tt.token_type with + Lexer.Identifier(x) -> + let id = x in + let* _ = match_token Lexer.Equal in + let* e1 = expr() in + let* _ = match_token (Lexer.Keyword Lexer.In) in + let* e2 = expr() in + return (Let (id, e1, e2)) + | _ -> stop +and parse_fun_expr (): fun_expr_tree parser = + let* _ = match_token (Lexer.Keyword Lexer.Fun) in + let* tt = next_token in + match tt.token_type with + Lexer.Identifier(x) -> + let id = x in + let* _ = match_token Lexer.Arrow in + let* e = expr() in + return (Fun (id, e)) + | _ -> stop +and parse_if_expr (): if_expr_tree parser = + let* _ = match_token (Lexer.Keyword Lexer.If) in + let* e1 = expr() in + let* _ = match_token (Lexer.Keyword Lexer.Then) in + let* e2 = expr() in + let* _ = match_token (Lexer.Keyword Lexer.Else) in + let* e3 = expr() in + return (If (e1, e2, e3)) +and parse_level0 (): expr_tree parser = + let* tt = peek_token in + match tt.token_type with + | Lexer.Identifier x -> + let* _ = next_token in + return (Identifier x) + | Lexer.Digit x -> + let* _ = next_token in + return (Number (int_of_string x)) + | Lexer.LParen -> + let* _ = match_token Lexer.LParen in + let* e = expr() in + let* _ = match_token Lexer.RParen in + return e + | _ -> stop +and parse_level1 (): expr_tree parser = + let* e1 = parse_level0() in + let rec aux e1 = + let* c = peek_token in + match c.token_type with + | Lexer.Op op when op = Lexer.Add || op = Lexer.Sub -> + let* _ = next_token in + let* e2 = parse_level0() in + aux (BinOpExpr (op, e1, e2)) + | _ -> return e1 in + aux e1 +and parse_level2 (): expr_tree parser = + let* e1 = parse_level1() in + let rec aux e1 = + let* c = peek_token in + match c.token_type with + | Lexer.Op op when op = Lexer.Mul || op = Lexer.Div || op = Lexer.Mod -> + let* _ = next_token in + let* e2 = parse_level1() in + aux (BinOpExpr (op, e1, e2)) + | _ -> return e1 in + aux e1 +and parse_level3 (): expr_tree parser = + let* e1 = parse_level2() in + let rec aux e1 = + let* c = peek_token in + match c.token_type with + | Lexer.Op op when op = Lexer.Pow -> + let* _ = next_token in + let* e2 = parse_level3() in + aux (BinOpExpr (op, e1, e2)) + | _ -> return e1 in + aux e1 +and expr (): expr_tree parser = + let* e = (parse_let_expr() |> fmap (fun x -> LetExpr x)) <|> + (parse_fun_expr() |> fmap (fun x -> FunExpr x)) <|> + (parse_if_expr() |> fmap (fun x -> IfExpr x)) <|> parse_level3() in + return e;; + +let get_expr_tree_from_tokens (tokens: Lexer.token Seq.t): expr_tree option = + let ntokens = Seq.filter (fun x -> + match x.token_type with + | Lexer.Comment(_) -> false + | _ -> true + ) tokens in + let ctx = { seq = ntokens; errors = [] } in + match expr() ctx with + | Some (e, _) -> Some e + | None -> None;; + +let%test "test get_expr_tree_from_tokens 1" = + let tokens = Lexer.lex_tokens_seq "let x = 1 in x" in + let tokens = tokens |> Seq.map (fun (x,_) -> x) in + match get_expr_tree_from_tokens tokens with + | Some e -> expr2str e = "let x = 1 in\n x" + | None -> false;; diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..cb2800b --- /dev/null +++ b/test/dune @@ -0,0 +1,2 @@ +(test + (name test_calc)) diff --git a/test/test_calc.ml b/test/test_calc.ml new file mode 100644 index 0000000..e69de29