ce/lex.ml

99 lines
2.4 KiB
OCaml
Raw Normal View History

open Ast.Value
2022-01-10 01:31:47 +09:00
type tokens = Token.t Seq.t
2022-01-17 15:17:18 +09:00
exception Token_not_found
exception Unclosed_quote
2022-01-17 15:17:18 +09:00
2022-01-10 01:31:47 +09:00
let either f g c =
f c || g c
let is_digit c =
'0' <= c && c <= '9'
let is_num = function
| 'x' -> true
2022-01-18 15:33:56 +09:00
| '.' -> true
2022-01-10 01:31:47 +09:00
| c -> is_digit c
let is_whitespace = function
| ' ' | '\t' | '\n' -> true
| _ -> false
let is_alpha c =
2022-01-10 23:11:13 +09:00
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
2022-01-10 01:31:47 +09:00
let is_ident_start =
either is_alpha ((=) '_')
let is_ident =
either is_ident_start is_digit
2022-01-18 15:36:09 +09:00
let expect_token str tok seq =
let rec aux ts seq =
match ts (), seq () with
| Seq.Nil, _ -> Some seq
| Seq.Cons _, Seq.Nil -> None
| Seq.Cons (a, ts), Seq.Cons (b, seq) ->
if a = b then aux ts seq else None
in
let str = String.to_seq str in
aux str seq |> Option.map (fun s -> tok, s)
let find_token seq =
!Token.tokens |> List.find_map
(fun (s, t) -> expect_token s t seq)
2022-01-10 01:31:47 +09:00
(* same as take_while f seq, drop_while f seq *)
let rec partition_while f seq : 'a Seq.t * 'a Seq.t =
match seq () with
| Seq.Nil -> Seq.empty, seq
| Seq.Cons (x, seq) ->
if f x then
let n, s = partition_while f seq in
Seq.cons x n, s
else
Seq.(empty, cons x seq)
let tokenize (str : string) : tokens =
let seq = String.to_seq str in
2022-01-17 15:21:27 +09:00
let rec aux seq () =
2022-01-10 01:31:47 +09:00
let open Token in
match seq () with
2022-01-17 15:21:27 +09:00
| Seq.Nil -> Seq.Nil
| Seq.Cons (x, seq) ->
2022-01-10 01:31:47 +09:00
if is_whitespace x then
aux seq () (* skip whitespace *)
else if x = '"' then
let str, seq = partition_while ((<>) '"') seq in
let str = String (String.of_seq str) in
begin match seq () with
| Seq.Nil -> raise Unclosed_quote
| Seq.Cons (x, seq) ->
if x = '"' then Seq.Cons (Value str, aux seq)
else raise Unclosed_quote
end
2022-01-10 01:31:47 +09:00
else if is_digit x then
let n, seq = partition_while is_num seq in
2022-01-18 15:33:56 +09:00
let n = String.of_seq @@ Seq.cons x n in
let n =
if String.contains n '.' (* float *)
then Float (float_of_string n)
else Int (int_of_string n)
in
Seq.Cons (Value n, aux seq)
2022-01-10 01:31:47 +09:00
else if is_ident_start x then
let id, seq = partition_while is_ident seq in
let id = String.of_seq @@ Seq.cons x id in
Seq.Cons (Ident id, aux seq)
2022-01-10 01:31:47 +09:00
else
match find_token @@ Seq.cons x seq with
2022-01-17 15:17:18 +09:00
| None -> raise Token_not_found
2022-01-17 15:21:27 +09:00
| Some (t, s) -> Seq.Cons (t, aux s)
2022-01-10 01:31:47 +09:00
in
aux seq