initial commit

This commit is contained in:
monoid 2025-01-29 17:17:22 +09:00
commit 9da77686e3
10 changed files with 665 additions and 0 deletions

6
bin/dune Normal file
View file

@ -0,0 +1,6 @@
(executable
(public_name calc)
(name main)
(libraries calc)
(flags (:standard -w -32))
)

12
bin/main.ml Normal file
View file

@ -0,0 +1,12 @@
open Calc;;
let main () =
let input = Sys.argv.(1) in
let _ = Printf.printf "input: %s\n" input in
let result = Eval.eval_str input in
match result with
| Eval.Int n -> Printf.printf "%d\n" n
| _ -> failwith "Type error"
;;
main ();;

31
calc.opam Normal file
View file

@ -0,0 +1,31 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "A short synopsis"
description: "A longer description"
maintainer: ["Maintainer Name <maintainer@example.com>"]
authors: ["Author Name <author@example.com>"]
license: "LICENSE"
tags: ["add topics" "to describe" "your" "project"]
homepage: "https://github.com/username/reponame"
doc: "https://url/to/documentation"
bug-reports: "https://github.com/username/reponame/issues"
depends: [
"dune" {>= "3.17"}
"ocaml"
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/username/reponame.git"

26
dune-project Normal file
View file

@ -0,0 +1,26 @@
(lang dune 3.17)
(name calc)
(generate_opam_files true)
(source
(github username/reponame))
(authors "Author Name <author@example.com>")
(maintainers "Maintainer Name <maintainer@example.com>")
(license LICENSE)
(documentation https://url/to/documentation)
(package
(name calc)
(synopsis "A short synopsis")
(description "A longer description")
(depends ocaml)
(tags
("add topics" "to describe" your project)))
; See the complete stanza docs at https://dune.readthedocs.io/en/stable/reference/dune-project/index.html

6
lib/dune Normal file
View file

@ -0,0 +1,6 @@
(library
(name calc)
(inline_tests)
(preprocess (pps ppx_inline_test))
(flags (:standard -w -32))
)

101
lib/eval.ml Normal file
View file

@ -0,0 +1,101 @@
module VariableBindingMap = Map.Make(String);;
type value_type =
| Int of int
| Fun of function_type
and scope = {
parent: scope option;
bindings: value_type VariableBindingMap.t;
}
and function_type = {
name: string;
body: Parser.expr_tree;
scope: scope;
};;
let rec eval_expr (scope: scope) (expr: Parser.expr_tree): value_type =
match expr with
| Parser.LetExpr (Parser.Let (name, value_expr, in_expr)) ->
eval_let_expr scope name value_expr in_expr
| Parser.FunExpr (Parser.Fun (name, body_expr)) ->
eval_fun_expr scope name body_expr
| Parser.IfExpr (Parser.If (cond_expr, then_expr, else_expr)) ->
eval_if_expr scope cond_expr then_expr else_expr
| Parser.BinOpExpr (op, left_expr, right_expr) ->
eval_bin_op_expr scope op left_expr right_expr
| Parser.MonoOpExpr (_op, _expr) ->
failwith "Not implemented"
| Parser.Identifier(name) ->
let rec find_binding scope =
match scope with
| None -> failwith "Unbound variable"
| Some s ->
match VariableBindingMap.find_opt name s.bindings with
| Some v -> v
| None -> find_binding s.parent in
find_binding (Some scope)
| Parser.Number(n) -> Int n
and eval_if_expr scope cond_expr then_expr else_expr =
let cond = eval_expr scope cond_expr in
(match cond with
| Int 0 -> eval_expr scope else_expr
| _ -> eval_expr scope then_expr)
and eval_let_expr scope name value_expr in_expr =
let value = eval_expr scope value_expr in
let new_scope = { scope with bindings = VariableBindingMap.add name value scope.bindings } in
eval_expr new_scope in_expr
and eval_fun_expr scope name body_expr =
Fun { name = name; body = body_expr; scope = scope }
and eval_bin_op_expr scope op left_expr right_expr =
let left = eval_expr scope left_expr in
let right = eval_expr scope right_expr in
(match op with
| Add -> (
match (left, right) with
| (Int l, Int r) -> Int (l + r)
| _ -> failwith "Type error"
)
| Sub -> (
match (left, right) with
| (Int l, Int r) -> Int (l - r)
| _ -> failwith "Type error"
)
| Mul -> (
match (left, right) with
| (Int l, Int r) -> Int (l * r)
| _ -> failwith "Type error"
)
| Div -> (
match (left, right) with
| (Int l, Int r) -> Int (l / r)
| _ -> failwith "Type error"
)
| Mod -> (
match (left, right) with
| (Int l, Int r) -> Int (l mod r)
| _ -> failwith "Type error"
)
| Pow -> (
match (left, right) with
| (Int l, Int r) -> Int (int_of_float (float_of_int l ** float_of_int r))
| _ -> failwith "Type error"
))
;;
let eval_str (str: string): value_type =
let tokens = Lexer.lex_tokens_seq str in
let tokens = tokens |> Seq.map (fun (x,_) -> x) in
let expr = Parser.get_expr_tree_from_tokens tokens in
match expr with
| Some e -> eval_expr { parent = None; bindings = VariableBindingMap.empty } e
| None -> failwith "Parse error"
;;
let%test "test eval_str 1" =
let result = eval_str "let x = 1 in x" in
match result with
| Int n -> n = 1
| _ -> false

276
lib/lexer.ml Normal file
View file

@ -0,0 +1,276 @@
(* small set of ml *)
type op_type =
| Add
| Sub
| Mul
| Div
| Mod
| Pow
let op2str op =
match op with
| Add -> "+"
| Sub -> "-"
| Mul -> "*"
| Div -> "/"
| Mod -> "%"
| Pow -> "^"
type keyword_type =
| Let
| In
| If
| Then
| Else
| Fun
type token_type =
| Eof
| Identifier of string
| Digit of string
| Op of op_type
| LParen
| RParen
| Equal
| Arrow
| Keyword of keyword_type
| Comment of string
| Fail of string
;;
type token = {
(* token type *)
token_type: token_type;
(* start position *)
pos: int;
};;
let epsilon = '\000';;
(* Lexer is just state machine *)
let is_digit c = c >= '0' && c <= '9'
let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
let is_alnum c = is_alpha c || is_digit c
type lexer_context = {
total: string;
pos: int;
(* \n position array *)
line_pos: int array;
};;
let binary_search_range arr x =
if Array.length arr = 0 then 0
else
let rec aux low high =
match compare low high with
| 0 -> if arr.(low) >= x then low else low + 1
(* unreachable *)
| c when c > 0 -> raise (Invalid_argument "binary_search_range")
| _ ->
let mid = (low + high) / 2 in
if arr.(mid) >= x && ( mid = 0 || arr.(mid - 1) < x) then mid
else if arr.(mid) < x then aux (mid + 1) high
else aux low (mid - 1)
in
aux 0 (Array.length arr - 1)
let get_line_and_col (line_pos: int array) (pos: int) =
let line_index = binary_search_range line_pos pos in
(* let _ = Printf.printf "line_index: %d\n" line_index in *)
let line_start_pos = if line_index > 0 then
(line_pos.(line_index - 1) + 1) else 0 in
(line_index + 1, pos - (line_start_pos) + 1);;
let%test "test: get_line_and_col 1" =
let actual = get_line_and_col [|1; 2; 3; 4; 6|] 3 in
let expected = (3, 1) in
(* let _ = Printf.printf "(%d,%d)\n" (fst actual) (snd actual) in *)
actual = expected;;
let%test "test: get_line_and_col 2" =
let actual = get_line_and_col [|1; 2; 3; 4; 6|] 10 in
let expected = (6, 4) in
(* let _ = Printf.printf "(%d,%d)\n" (fst actual) (snd actual) in *)
actual = expected;;
let input_first (ctx: lexer_context) =
if ctx.pos < String.length ctx.total then
ctx.total.[ctx.pos]
else
epsilon;;
let%test "test first" =
let ctx = {total = "abc"; pos = 0; line_pos = [||]} in
input_first ctx = 'a';;
let input_rest (ctx: lexer_context) = let ch = input_first ctx in
if ch = '\n' then
{ctx with pos = ctx.pos + 1; line_pos = Array.append ctx.line_pos [|ctx.pos|]}
else
{ctx with pos = ctx.pos + 1};;
let%test "test rest" =
let ctx = {total = "abc"; pos = 0; line_pos = [||]} in
let ctx' = input_rest ctx in
ctx'.pos = 1 && ctx'.line_pos = [||];;
let%test "test rest with new line" =
let ctx = {total = "a\nbc"; pos = 1; line_pos = [||]} in
let ctx' = input_rest ctx in
ctx'.pos = 2 && ctx'.line_pos = [|1|];;
let%test "test rest with new line 2" =
let ctx = {total = "a\nb\nc"; pos = 3; line_pos = [|1|]} in
let ctx' = input_rest ctx in
(* let _ = Printf.printf "pos: %d, line_pos: %s" ctx'.pos (String.concat ","
(List.map string_of_int (Array.to_list ctx'.line_pos))) in *)
ctx'.pos = 4 && ctx'.line_pos = [|1; 3|];;
let rec skip_spaces (ctx: lexer_context) =
let ch = input_first ctx in
if ch = ' ' || ch = '\t' || ch = '\n' then
skip_spaces (input_rest ctx)
else
ctx;;
let%test "test skip_spaces" =
let ctx = {total = " \nabc"; pos = 0; line_pos = [||]} in
let ctx' = skip_spaces ctx in
(* let _ = Printf.printf "pos: %d, line_pos: %s" ctx'.pos (String.concat ","
(List.map string_of_int (Array.to_list ctx'.line_pos))) in *)
ctx'.pos = 3 && ctx'.line_pos = [|2|];;
(*
1. identifier: [a-zA-Z][a-zA-Z0-9]*
2. digit: [0-9]+
3. operator: +, -, *, /, %, ^
4. keyword: let, in, if, then, else, fun
5. (, ), =
6. comment: //.*
*)
let get_identifier (ctx: lexer_context) =
let rec aux ctx =
let ch = input_first ctx in
if is_alnum ch then
aux (input_rest ctx)
else
ctx in
let ctx' = aux ctx in
let len = ctx'.pos - ctx.pos in
let id = String.sub ctx'.total ctx.pos len in
id, ctx';;
let%test "test get_identifier" =
let ctx = {total = "abc"; pos = 0; line_pos = [||]} in
let id, ctx' = get_identifier ctx in
id = "abc" && ctx'.pos = 3;;
let get_digits (ctx: lexer_context) =
let rec aux ctx =
let ch = input_first ctx in
if is_digit ch then
aux (input_rest ctx)
else
ctx in
let ctx' = aux ctx in
let len = ctx'.pos - ctx.pos in
let id = String.sub ctx'.total ctx.pos len in
id, ctx';;
let%test "test get_digit" =
let ctx = {total = "123"; pos = 0; line_pos = [||]} in
let id, ctx' = get_digits ctx in
id = "123" && ctx'.pos = 3;;
let id_to_token_type id =
match id with
| "let" -> Keyword Let
| "in" -> Keyword In
| "if" -> Keyword If
| "then" -> Keyword Then
| "else" -> Keyword Else
| "fun" -> Keyword Fun
| _ -> Identifier id;;
let lex_token (ctx: lexer_context) =
let make_token token_type pos = {token_type = token_type; pos = pos} in
let ctx = skip_spaces ctx in
let first_ch = input_first ctx in
let pos = ctx.pos in
match first_ch with
| '\000' -> {token_type = Eof; pos = pos}, ctx
| '(' -> make_token LParen pos, input_rest ctx
| ')' -> make_token RParen pos, input_rest ctx
| '=' -> make_token Equal pos, input_rest ctx
| '+' -> make_token (Op Add) pos, input_rest ctx
| '-' ->
let second_ch = input_first (input_rest ctx) in
if second_ch = '>' then
make_token Arrow pos, input_rest (input_rest ctx)
else
make_token (Op Sub) pos, input_rest ctx
| '*' -> make_token (Op Mul) pos, input_rest ctx
| '/' ->
(* check comment *)
let second_ch = input_first (input_rest ctx) in
if second_ch = '/' then
let rec aux ctx =
let ch = input_first ctx in
if ch = '\n' then
ctx
else
aux (input_rest ctx) in
let ctx = aux ctx in
let len = ctx.pos - pos in
let comment = String.sub ctx.total pos len in
make_token (Comment comment) pos, ctx
else
make_token (Op Div) pos, input_rest ctx
| '%' -> make_token (Op Mod) pos, input_rest ctx
| '^' -> make_token (Op Pow) pos, input_rest ctx
| c when is_alpha c ->
let id, ctx = get_identifier ctx in
make_token (id_to_token_type id) pos, ctx
| c when is_digit c ->
let id, ctx = get_digits ctx in
make_token (Digit id) pos, ctx
| _ -> make_token (Fail "invalid token") pos, input_rest ctx;;
let%test "test lex_token 1" =
let ctx = {total = "let"; pos = 0; line_pos = [||]} in
let token, ctx' = lex_token ctx in
token.token_type = Keyword Let && token.pos = 0 && ctx'.pos = 3;;
let%test "test lex_token 2" =
let ctx = {total = "let in"; pos = 0; line_pos = [||]} in
let token, ctx' = lex_token ctx in
let token', ctx'' = lex_token ctx' in
token.token_type = Keyword Let && token.pos = 0 && ctx'.pos = 3 &&
token'.token_type = Keyword In && token'.pos = 4 && ctx''.pos = 6;;
let lex_tokens_seq (total: string): (token * lexer_context) Seq.t =
let rec aux ctx =
let token, next_ctx = lex_token ctx in
if token.token_type = Eof then
Seq.Cons ((token, next_ctx), fun () -> Seq.Nil)
else
Seq.Cons ((token, next_ctx), fun () -> aux next_ctx) in
fun () -> aux {total = total; pos = 0; line_pos = [||]};;
let%test "test lex_tokens_seq" =
let total = "let in" in
let seq = lex_tokens_seq total in
let seq = seq |> Seq.map (fun (token, _) -> token) in
let tokens = List.of_seq seq in
let expected = [
{token_type = Keyword Let; pos = 0};
{token_type = Keyword In; pos = 4};
{token_type = Eof; pos = 6}
] in
tokens = expected;;

205
lib/parser.ml Normal file
View file

@ -0,0 +1,205 @@
open Lexer;;
type parser_context = {
seq: Lexer.token Seq.t;
errors: string list;
};;
(* The parser is a function that takes a parser_context and returns an option of a tuple of a value and a parser_context.*)
type 'a parser = parser_context -> ('a * parser_context) option;;
let return (a: 'a) = fun (ctx: parser_context) -> Some (a, ctx);;
let stop = fun (_: parser_context) -> None;;
let fmap (f: 'a -> 'b) (p: 'a parser): 'b parser = fun (ctx: parser_context) ->
match p ctx with
| Some (a, ctx') -> Some (f a, ctx')
| None -> None;;
let bind (a: 'a parser) (b:'a -> 'b parser) = fun (ctx: parser_context) ->
let p = a ctx in
match p with
| Some (a', ctx') -> b a' ctx'
| None -> None;;
let (>>=) = bind;;
let (let*) = bind;;
let or_parser (a: 'a parser) (b: 'a parser): 'a parser = fun (ctx: parser_context) ->
match a ctx with
| Some _ as res -> res
| None -> b ctx;;
let (<|>) = or_parser;;
let peek_token: token parser = fun (ctx: parser_context) ->
Seq.uncons ctx.seq |> Option.map (fun (t,_) -> (t,ctx));;
let next_token: token parser = fun (ctx: parser_context) ->
Seq.uncons ctx.seq |> Option.map (fun (t, s) -> (t,
{ ctx with seq = s}
));;
let match_token (tt: token_type) : token parser =
let* t = next_token in
if t.token_type = tt then
return t
else
stop;;
let zero_or_one (p: 'a parser): ('a option) parser = fun (ctx) ->
match p ctx with
| Some (a, ctx') -> Some (Some a, ctx')
| None -> Some (None, ctx);;
let rec many (p: 'a parser): 'a list parser =
let* a = zero_or_one p in
match a with
| Some a' -> (
let* as' = many p in
return (a'::as')
)
| None -> return [];;
let many1 (p: 'a parser): 'a list parser =
let* a = p in
let* as' = many p in
return (a::as');;
(*
BNF:
let_expr ::= let identifier = expr in expr
fun_expr ::= fun identifier -> expr
if_expr ::= if expr then expr else expr
level0 ::= (expr) | identifier | number
level1 ::= level0 | level1 + level0 | level1 - level0
level2 ::= level2 * level1 | level2 / level1 | level2 % level1 | level1
level3 ::= level2 ^ level3 | level2
expr ::= let_expr | fun_expr | if_expr | level3
*)
type let_expr_tree = Let of string * expr_tree * expr_tree
and fun_expr_tree = Fun of string * expr_tree
and if_expr_tree = If of expr_tree * expr_tree * expr_tree
and expr_tree =
| LetExpr of let_expr_tree
| FunExpr of fun_expr_tree
| IfExpr of if_expr_tree
| BinOpExpr of Lexer.op_type * expr_tree * expr_tree
| MonoOpExpr of Lexer.op_type * expr_tree
| Identifier of string
| Number of int;;
let expr2str (e: expr_tree): string =
let rec aux e =
match e with
| LetExpr (Let (id, e1, e2)) -> Printf.sprintf "let %s = %s in\n %s" id (aux e1) (aux e2)
| FunExpr (Fun (id, e)) -> Printf.sprintf "fun %s -> %s" id (aux e)
| IfExpr (If (e1, e2, e3)) -> Printf.sprintf "if %s then %s else %s" (aux e1) (aux e2) (aux e3)
| BinOpExpr (op, e1, e2) -> Printf.sprintf "%s %s %s" (aux e1) (Lexer.op2str op) (aux e2)
| MonoOpExpr (op, e) -> Printf.sprintf "%s %s" (Lexer.op2str op) (aux e)
| Identifier id -> id
| Number n -> string_of_int n in
aux e;;
let rec parse_let_expr (): let_expr_tree parser =
let* _ = match_token (Lexer.Keyword Lexer.Let) in
let* tt = next_token in
match tt.token_type with
Lexer.Identifier(x) ->
let id = x in
let* _ = match_token Lexer.Equal in
let* e1 = expr() in
let* _ = match_token (Lexer.Keyword Lexer.In) in
let* e2 = expr() in
return (Let (id, e1, e2))
| _ -> stop
and parse_fun_expr (): fun_expr_tree parser =
let* _ = match_token (Lexer.Keyword Lexer.Fun) in
let* tt = next_token in
match tt.token_type with
Lexer.Identifier(x) ->
let id = x in
let* _ = match_token Lexer.Arrow in
let* e = expr() in
return (Fun (id, e))
| _ -> stop
and parse_if_expr (): if_expr_tree parser =
let* _ = match_token (Lexer.Keyword Lexer.If) in
let* e1 = expr() in
let* _ = match_token (Lexer.Keyword Lexer.Then) in
let* e2 = expr() in
let* _ = match_token (Lexer.Keyword Lexer.Else) in
let* e3 = expr() in
return (If (e1, e2, e3))
and parse_level0 (): expr_tree parser =
let* tt = peek_token in
match tt.token_type with
| Lexer.Identifier x ->
let* _ = next_token in
return (Identifier x)
| Lexer.Digit x ->
let* _ = next_token in
return (Number (int_of_string x))
| Lexer.LParen ->
let* _ = match_token Lexer.LParen in
let* e = expr() in
let* _ = match_token Lexer.RParen in
return e
| _ -> stop
and parse_level1 (): expr_tree parser =
let* e1 = parse_level0() in
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Op op when op = Lexer.Add || op = Lexer.Sub ->
let* _ = next_token in
let* e2 = parse_level0() in
aux (BinOpExpr (op, e1, e2))
| _ -> return e1 in
aux e1
and parse_level2 (): expr_tree parser =
let* e1 = parse_level1() in
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Op op when op = Lexer.Mul || op = Lexer.Div || op = Lexer.Mod ->
let* _ = next_token in
let* e2 = parse_level1() in
aux (BinOpExpr (op, e1, e2))
| _ -> return e1 in
aux e1
and parse_level3 (): expr_tree parser =
let* e1 = parse_level2() in
let rec aux e1 =
let* c = peek_token in
match c.token_type with
| Lexer.Op op when op = Lexer.Pow ->
let* _ = next_token in
let* e2 = parse_level3() in
aux (BinOpExpr (op, e1, e2))
| _ -> return e1 in
aux e1
and expr (): expr_tree parser =
let* e = (parse_let_expr() |> fmap (fun x -> LetExpr x)) <|>
(parse_fun_expr() |> fmap (fun x -> FunExpr x)) <|>
(parse_if_expr() |> fmap (fun x -> IfExpr x)) <|> parse_level3() in
return e;;
let get_expr_tree_from_tokens (tokens: Lexer.token Seq.t): expr_tree option =
let ntokens = Seq.filter (fun x ->
match x.token_type with
| Lexer.Comment(_) -> false
| _ -> true
) tokens in
let ctx = { seq = ntokens; errors = [] } in
match expr() ctx with
| Some (e, _) -> Some e
| None -> None;;
let%test "test get_expr_tree_from_tokens 1" =
let tokens = Lexer.lex_tokens_seq "let x = 1 in x" in
let tokens = tokens |> Seq.map (fun (x,_) -> x) in
match get_expr_tree_from_tokens tokens with
| Some e -> expr2str e = "let x = 1 in\n x"
| None -> false;;

2
test/dune Normal file
View file

@ -0,0 +1,2 @@
(test
(name test_calc))

0
test/test_calc.ml Normal file
View file