2022-02-01 02:06:18 +09:00
|
|
|
open Token
|
2022-01-21 02:17:34 +09:00
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
exception Invalid_character of char
|
|
|
|
exception Expected of char
|
2022-01-10 01:31:47 +09:00
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
let invalid_char c = raise @@ Invalid_character c
|
|
|
|
let expected c = raise @@ Expected c
|
2022-01-17 15:17:18 +09:00
|
|
|
|
2022-01-10 01:31:47 +09:00
|
|
|
let either f g c =
|
|
|
|
f c || g c
|
|
|
|
|
|
|
|
let is_digit c =
|
|
|
|
'0' <= c && c <= '9'
|
|
|
|
|
|
|
|
let is_num = function
|
|
|
|
| 'x' -> true
|
2022-01-18 15:33:56 +09:00
|
|
|
| '.' -> true
|
2022-01-10 01:31:47 +09:00
|
|
|
| c -> is_digit c
|
|
|
|
|
|
|
|
let is_whitespace = function
|
|
|
|
| ' ' | '\t' | '\n' -> true
|
|
|
|
| _ -> false
|
|
|
|
|
|
|
|
let is_alpha c =
|
2022-01-10 23:11:13 +09:00
|
|
|
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
|
2022-01-10 01:31:47 +09:00
|
|
|
|
|
|
|
let is_ident_start =
|
|
|
|
either is_alpha ((=) '_')
|
|
|
|
|
|
|
|
let is_ident =
|
|
|
|
either is_ident_start is_digit
|
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
let expect_char c seq =
|
|
|
|
match seq () with
|
|
|
|
| Seq.Nil -> expected c
|
|
|
|
| Seq.Cons (x, seq) ->
|
|
|
|
if x = c then seq else expected c
|
|
|
|
|
2022-01-18 15:36:09 +09:00
|
|
|
let expect_token str tok seq =
|
|
|
|
let rec aux ts seq =
|
|
|
|
match ts (), seq () with
|
|
|
|
| Seq.Nil, _ -> Some seq
|
|
|
|
| Seq.Cons _, Seq.Nil -> None
|
|
|
|
| Seq.Cons (a, ts), Seq.Cons (b, seq) ->
|
|
|
|
if a = b then aux ts seq else None
|
|
|
|
in
|
|
|
|
let str = String.to_seq str in
|
|
|
|
aux str seq |> Option.map (fun s -> tok, s)
|
|
|
|
|
|
|
|
let find_token seq =
|
2022-02-01 02:06:18 +09:00
|
|
|
!tokens |> List.find_map
|
2022-01-18 15:36:09 +09:00
|
|
|
(fun (s, t) -> expect_token s t seq)
|
|
|
|
|
2022-01-10 01:31:47 +09:00
|
|
|
(* same as take_while f seq, drop_while f seq *)
|
2022-01-28 01:52:57 +09:00
|
|
|
let rec partition_while f seq =
|
2022-01-10 01:31:47 +09:00
|
|
|
match seq () with
|
|
|
|
| Seq.Nil -> Seq.empty, seq
|
|
|
|
| Seq.Cons (x, seq) ->
|
|
|
|
if f x then
|
|
|
|
let n, s = partition_while f seq in
|
|
|
|
Seq.cons x n, s
|
|
|
|
else
|
|
|
|
Seq.(empty, cons x seq)
|
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
let tokenize str =
|
2022-01-10 01:31:47 +09:00
|
|
|
let seq = String.to_seq str in
|
2022-01-22 03:04:00 +09:00
|
|
|
let rec aux seq =
|
2022-01-10 01:31:47 +09:00
|
|
|
let open Token in
|
2022-01-28 01:52:57 +09:00
|
|
|
let open Seq in
|
2022-01-10 01:31:47 +09:00
|
|
|
match seq () with
|
2022-01-28 01:52:57 +09:00
|
|
|
| Nil -> empty
|
|
|
|
| Cons (x, seq) ->
|
|
|
|
(* skip whitespace *)
|
2022-01-10 01:31:47 +09:00
|
|
|
if is_whitespace x then
|
2022-01-28 01:52:57 +09:00
|
|
|
aux seq
|
2022-01-21 02:17:34 +09:00
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
(* string *)
|
2022-01-21 02:17:34 +09:00
|
|
|
else if x = '"' then
|
|
|
|
let str, seq = partition_while ((<>) '"') seq in
|
|
|
|
let str = String (String.of_seq str) in
|
2022-01-28 01:52:57 +09:00
|
|
|
let seq = expect_char '"' seq in
|
2022-02-01 02:06:18 +09:00
|
|
|
cons str (aux seq)
|
2022-01-21 02:17:34 +09:00
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
(* number (int, float) *)
|
2022-01-10 01:31:47 +09:00
|
|
|
else if is_digit x then
|
2022-01-21 02:17:34 +09:00
|
|
|
let n, seq = partition_while is_num seq in
|
2022-01-28 01:52:57 +09:00
|
|
|
let n = String.of_seq @@ cons x n in
|
2022-01-18 15:33:56 +09:00
|
|
|
let n =
|
|
|
|
if String.contains n '.' (* float *)
|
|
|
|
then Float (float_of_string n)
|
|
|
|
else Int (int_of_string n)
|
|
|
|
in
|
2022-02-01 02:06:18 +09:00
|
|
|
cons n (aux seq)
|
2022-01-21 02:17:34 +09:00
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
(* idents *)
|
2022-01-10 01:31:47 +09:00
|
|
|
else if is_ident_start x then
|
2022-01-21 02:17:34 +09:00
|
|
|
let id, seq = partition_while is_ident seq in
|
2022-01-28 01:52:57 +09:00
|
|
|
let id = String.of_seq @@ cons x id in
|
|
|
|
cons (Ident id) (aux seq)
|
2022-01-21 02:17:34 +09:00
|
|
|
|
2022-01-28 01:52:57 +09:00
|
|
|
(* tokens *)
|
2022-01-10 01:31:47 +09:00
|
|
|
else
|
2022-01-28 01:52:57 +09:00
|
|
|
match find_token @@ cons x seq with
|
|
|
|
| None -> invalid_char x
|
|
|
|
| Some (t, seq) -> cons t (aux seq)
|
2022-01-10 01:31:47 +09:00
|
|
|
in
|
|
|
|
aux seq
|