ce/parser.ml

312 lines
7 KiB
OCaml
Raw Normal View History

2022-01-10 01:31:47 +09:00
open Ast
2022-01-11 01:05:29 +09:00
module S = Set.Make(String)
2022-02-17 02:51:56 +09:00
(* fatal exception that parsing need to be stopped *)
exception Fatal of exn
2022-02-15 00:28:29 +09:00
exception Expected of int * string
exception Unexpected_token of int * string
2022-01-19 02:10:34 +09:00
exception End_of_tokens
2022-01-10 01:31:47 +09:00
2022-02-15 00:28:29 +09:00
let expected col t =
raise @@ Expected (col, t)
2022-01-10 01:31:47 +09:00
2022-02-15 00:28:29 +09:00
let unexpected_token col t =
raise @@ Unexpected_token (col, Token.to_string t)
2022-01-10 01:31:47 +09:00
2022-01-10 23:11:13 +09:00
(* precedence table.
* my first thought was using some sort of partially-ordered graph for
* precedency, but infering precedence relation from the graph is hard
* and the graph can be made to have loops, I just used plain table. *)
2022-01-10 01:31:47 +09:00
let precedence = [
2022-02-24 02:31:29 +09:00
"=", 1;
"<>", 1;
">=", 1;
"<=", 1;
">", 1;
"<", 1;
"+", 10;
"-", 10;
"*", 20;
"/", 20;
"%", 30;
"^", 30;
2022-01-10 01:31:47 +09:00
] |> List.to_seq |> Hashtbl.of_seq
let precedence_of op =
2022-01-11 01:05:29 +09:00
Hashtbl.find precedence op
2022-01-10 01:31:47 +09:00
2022-01-19 15:28:41 +09:00
type associativity =
| Left_to_right
| Right_to_left
2022-01-20 23:36:53 +09:00
let assoc_of_string = function
| "left" -> Left_to_right
| "right" -> Right_to_left
| _ -> invalid_arg "assoc_of_string"
let assoc_to_string = function
| Left_to_right -> "left"
| Right_to_left -> "right"
2022-01-19 15:28:41 +09:00
let oper_assoc = [
2022-02-24 02:31:29 +09:00
"^", Right_to_left;
2022-01-19 15:28:41 +09:00
] |> List.to_seq |> Hashtbl.of_seq
let op_is_right_to_left op =
let a =
Hashtbl.find_opt oper_assoc op
|> Option.value ~default: Left_to_right
in
a = Right_to_left
2022-01-10 23:11:13 +09:00
2022-01-18 15:36:09 +09:00
let operators = [
2022-02-24 02:31:29 +09:00
Token.Plus;
Minus;
Asterisk;
Slash;
Carret;
Percent;
Equal;
Not_equal;
Greater_equal;
Less_equal;
Greater;
Less;
]
2022-01-18 15:36:09 +09:00
let token_to_op tok =
2022-02-24 02:31:29 +09:00
Token.to_string tok
2022-01-10 01:31:47 +09:00
2022-01-19 02:10:34 +09:00
let token_is_operator tok =
2022-02-24 02:31:29 +09:00
List.mem tok operators
2022-01-19 02:10:34 +09:00
2022-02-08 22:03:16 +09:00
let is_keyword = function
2022-02-09 17:27:19 +09:00
| "if" | "then" | "else" | "let" | "in" -> true
2022-02-08 22:03:16 +09:00
| _ -> false
2022-02-17 02:51:56 +09:00
(* parser primitives *)
2022-01-13 00:31:26 +09:00
2022-02-08 00:26:03 +09:00
let any seq =
match seq () with
| Seq.Nil -> raise End_of_tokens
2022-02-15 00:28:29 +09:00
| Seq.Cons ((col, x), seq) -> col, x, seq
2022-02-08 00:26:03 +09:00
2022-01-20 23:36:53 +09:00
let token tok seq =
2022-02-15 00:28:29 +09:00
let col, x, seq = any seq in
if x = tok then x, seq
else expected col @@ Token.to_string tok
2022-01-20 23:36:53 +09:00
2022-01-21 00:17:01 +09:00
let any_ident seq =
2022-02-15 00:28:29 +09:00
let col, x, seq = any seq in
match x with
| Token.Ident id -> id, seq
| _ -> expected col "ident"
2022-01-21 00:17:01 +09:00
2022-01-13 00:31:26 +09:00
let idents set seq =
2022-02-15 00:28:29 +09:00
let col, x, seq = any seq in
match x with
| Token.Ident id when S.mem id set -> id, seq
| _ ->
2022-01-13 00:31:26 +09:00
let msg = "ident " ^ (S.elements set |> String.concat " or ") in
2022-02-15 00:28:29 +09:00
expected col msg
2022-01-13 00:31:26 +09:00
let ident str seq =
idents (S.singleton str) seq
let operator seq =
2022-02-15 00:28:29 +09:00
let col, x, seq = any seq in
try token_to_op x, seq with
| _ -> expected col "operator"
2022-01-13 00:31:26 +09:00
(* parser combinators *)
2022-02-17 02:51:56 +09:00
let mustbe f seq =
try f seq with
| e -> raise @@ Fatal e
2022-01-20 23:36:53 +09:00
let oneof fs seq =
let rec aux = function
| [] -> assert false
| [f] -> f seq
2022-02-17 02:51:56 +09:00
| f::fs ->
(try f seq with
| Fatal _ as e -> raise e
| _ -> aux fs)
2022-01-20 23:36:53 +09:00
in
aux fs
2022-01-13 00:31:26 +09:00
2022-01-29 20:01:48 +09:00
let either f g = fun seq ->
2022-02-17 02:51:56 +09:00
try f seq with
| Fatal _ as e -> raise e
| _ -> g seq
2022-01-29 20:01:48 +09:00
2022-01-20 23:36:53 +09:00
let (@>) f g = fun seq ->
2022-01-13 00:31:26 +09:00
let a, seq = f seq in
g a seq
2022-01-29 20:01:48 +09:00
let more f seq =
let rec aux xs seq =
try
let x, seq = f seq in
aux (x::xs) seq
with
2022-02-17 02:51:56 +09:00
| Fatal _ as e -> raise e
2022-01-29 20:01:48 +09:00
| _ -> xs, seq
in
let xs, seq = aux [] seq in
List.rev xs, seq
2022-02-09 17:27:19 +09:00
(* decl := let_global
2022-02-08 01:22:49 +09:00
* | expr
*)
let rec decl seq =
seq |> oneof [
expr min_int;
2022-02-09 17:27:19 +09:00
let_global;
2022-02-17 02:51:56 +09:00
nothing;
2022-02-08 01:22:49 +09:00
]
2022-02-17 02:51:56 +09:00
and nothing seq =
match seq () with
| Seq.Nil -> Nothing, seq
| Seq.Cons ((col, x), _) -> unexpected_token col x
2022-02-09 17:27:19 +09:00
(* let_global := "let" ident "=" expr *)
and let_global seq =
2022-02-08 16:05:33 +09:00
let _, seq = ident "let" seq in
let id, seq = any_ident seq in
2022-02-23 02:45:36 +09:00
let args, seq = more any_ident seq in
2022-02-08 16:05:33 +09:00
let _, seq = token Token.Equal seq in
let e, seq = expr min_int seq in
2022-02-23 02:45:36 +09:00
(if args = []
then Let (id, e)
else Let (id, Nfunction (args, e))),
seq
2022-02-08 16:05:33 +09:00
2022-01-20 23:36:53 +09:00
(* expr := level
2022-02-09 17:27:19 +09:00
* | let_value
2022-01-20 23:36:53 +09:00
* | assoc
2022-01-29 20:01:48 +09:00
* | apply
2022-01-20 01:35:48 +09:00
* | value binop_right
*)
2022-02-08 01:22:49 +09:00
and expr pre seq =
2022-01-20 23:36:53 +09:00
seq |> oneof [
2022-02-08 16:05:33 +09:00
ifexpr;
2022-02-09 17:27:19 +09:00
let_value;
2022-02-01 21:38:00 +09:00
lambda;
extern_value;
oneof [apply; unary; value] @> binop pre;
2022-02-17 02:51:56 +09:00
(* TODO: place error routine here *)
2022-01-20 23:36:53 +09:00
]
2022-02-09 17:27:19 +09:00
(* let_value := "let" id "=" expr "in" expr *)
and let_value seq =
let _, seq = ident "let" seq in
2022-02-17 02:51:56 +09:00
let id, seq = mustbe any_ident seq in
2022-02-09 17:27:19 +09:00
let _, seq = token Equal seq in
2022-02-17 02:51:56 +09:00
let e, seq = mustbe (expr min_int) seq in
2022-02-09 17:27:19 +09:00
let _, seq = ident "in" seq in
2022-02-17 02:51:56 +09:00
let f, seq = mustbe (expr min_int) seq in
2022-02-09 17:27:19 +09:00
Letin (id, e, f), seq
2022-02-01 21:38:00 +09:00
(* lambda := "fun" [ident]+ "->" expr *)
and lambda seq =
let _, seq = ident "fun" seq in
2022-02-17 02:51:56 +09:00
seq |> mustbe (fun seq ->
let v0, seq = any_ident seq in
let vars, seq = more any_ident seq in
let _, seq = token Right_arrow seq in
let e, seq = expr min_int seq in
2022-02-22 17:01:39 +09:00
Nfunction (v0::vars, e), seq)
2022-02-01 21:38:00 +09:00
2022-02-08 16:05:33 +09:00
(* ifexpr := "if" expr "then" expr "else" expr *)
and ifexpr seq =
let _, seq = ident "if" seq in
2022-02-17 02:51:56 +09:00
seq |> mustbe (fun seq ->
let co, seq = expr min_int seq in
let _, seq = ident "then" seq in
let th, seq = expr min_int seq in
let _, seq = ident "else" seq in
let el, seq = expr min_int seq in
If (co, th, el), seq)
2022-02-08 16:05:33 +09:00
2022-02-01 21:38:00 +09:00
(* apply := value [value]+ *)
2022-01-29 20:01:48 +09:00
and apply seq =
2022-02-01 21:38:00 +09:00
let v, seq = value seq in
let a0, seq = value seq in
2022-01-29 20:01:48 +09:00
let args, seq = more value seq in
2022-02-01 21:38:00 +09:00
Apply (v, a0::args), seq
(* extern_value := external ident *)
and extern_value seq =
let _, seq = ident "external" seq in
2022-02-17 02:51:56 +09:00
let id, seq = mustbe any_ident seq in
Nexternal id, seq
2022-01-29 20:01:48 +09:00
2022-01-23 01:20:34 +09:00
(* unary := - value *)
and unary seq =
let op, seq =
2022-02-15 00:28:29 +09:00
let col, x, seq = any seq in
if x = Minus
2022-02-24 02:31:29 +09:00
then "-", seq
2022-02-15 00:28:29 +09:00
else expected col "minus (-)"
2022-01-23 01:20:34 +09:00
in
2022-02-17 02:51:56 +09:00
let v, seq = mustbe value seq in
2022-01-23 01:20:34 +09:00
Ast.unary op v, seq
2022-01-20 23:36:53 +09:00
(* value := int | float | ( expr ) *)
2022-01-20 01:35:48 +09:00
and value seq =
match seq () with
| Seq.Nil -> raise End_of_tokens
2022-02-15 00:28:29 +09:00
| Seq.Cons ((col, x), seq) -> begin match x with
2022-02-17 02:51:56 +09:00
| Ident id when is_keyword id -> expected col "value"
2022-02-08 16:05:33 +09:00
| Ident "true" -> Nbool true, seq
| Ident "false" -> Nbool false, seq
| Ident id -> Var id, seq
| Int x -> Nint x, seq
| Float x -> Nfloat x, seq
| String x -> Nstring x, seq
2022-02-08 00:26:03 +09:00
| Hash ->
2022-02-15 00:28:29 +09:00
let _, t, seq = any seq in
2022-02-08 00:26:03 +09:00
Nsymbol (Token.to_string t), seq
2022-01-20 23:36:53 +09:00
| LParen ->
2022-02-24 19:57:26 +09:00
seq |> either
(fun seq ->
let _, seq = token RParen seq in
Nunit, seq)
(fun seq ->
let e, seq = mustbe (expr min_int) seq in
let _, seq = mustbe (token RParen) seq in
e, seq)
2022-02-15 00:28:29 +09:00
| _ -> unexpected_token col x
2022-01-20 01:35:48 +09:00
end
(* binop := binop op binop *)
and binop pre left seq =
match seq () with
| Seq.Nil -> left, Seq.empty
2022-02-15 00:28:29 +09:00
| Seq.Cons ((col, x), seq) -> begin match x with
2022-01-20 01:35:48 +09:00
| op when token_is_operator op ->
let op = token_to_op op in
2022-01-20 23:36:53 +09:00
let op_pre = precedence_of op in
2022-01-20 01:35:48 +09:00
(* op has to be calculated first *)
2022-01-20 23:36:53 +09:00
if op_pre > pre
|| (op_is_right_to_left op && op_pre = pre)
then
2022-02-17 02:51:56 +09:00
let right, seq = mustbe (expr op_pre) seq in
2022-01-20 01:35:48 +09:00
binop pre (Ast.binop left op right) seq
else
2022-02-15 00:28:29 +09:00
left, Seq.cons (col, x) seq
2022-01-20 23:36:53 +09:00
2022-02-15 00:28:29 +09:00
| RParen -> left, Seq.cons (col, x) seq
| Ident id when is_keyword id -> left, Seq.cons (col, x) seq
| _ -> unexpected_token col x
2022-01-20 01:35:48 +09:00
end
2022-01-13 00:31:26 +09:00
(* parse tokens *)
2022-01-10 23:11:13 +09:00
let parse ts =
2022-02-17 02:51:56 +09:00
let ast, rest = try decl ts with Fatal e -> raise e in
2022-01-10 01:31:47 +09:00
if rest () <> Seq.Nil then failwith "Parser.parse";
ast