ce/lex.ml

110 lines
2.7 KiB
OCaml
Raw Normal View History

2022-02-01 02:06:18 +09:00
open Token
2022-02-12 03:17:26 +09:00
exception Invalid_character of int * char
exception Expected of int * char
2022-01-10 01:31:47 +09:00
2022-02-12 03:17:26 +09:00
let invalid_char col c = raise @@ Invalid_character (col, c)
let expected col c = raise @@ Expected (col, c)
2022-01-17 15:17:18 +09:00
2022-01-10 01:31:47 +09:00
let either f g c =
f c || g c
let is_digit c =
'0' <= c && c <= '9'
let is_num = function
| 'x' -> true
2022-01-18 15:33:56 +09:00
| '.' -> true
2022-01-10 01:31:47 +09:00
| c -> is_digit c
let is_whitespace = function
| ' ' | '\t' | '\n' -> true
| _ -> false
let is_alpha c =
2022-01-10 23:11:13 +09:00
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
2022-01-10 01:31:47 +09:00
let is_ident_start =
either is_alpha ((=) '_')
let is_ident =
either is_ident_start is_digit
2022-02-12 03:17:26 +09:00
let expect_char col c seq =
2022-01-28 01:52:57 +09:00
match seq () with
2022-02-12 03:17:26 +09:00
| Seq.Nil -> expected col c
| Seq.Cons ((_, x), seq) ->
if x = c then seq else expected col c
2022-01-28 01:52:57 +09:00
2022-01-18 15:36:09 +09:00
let expect_token str tok seq =
let rec aux ts seq =
match ts (), seq () with
| Seq.Nil, _ -> Some seq
| Seq.Cons _, Seq.Nil -> None
2022-02-12 03:17:26 +09:00
| Seq.Cons (a, ts), Seq.Cons ((_, b), seq) ->
2022-01-18 15:36:09 +09:00
if a = b then aux ts seq else None
in
let str = String.to_seq str in
aux str seq |> Option.map (fun s -> tok, s)
let find_token seq =
2022-02-01 02:06:18 +09:00
!tokens |> List.find_map
2022-01-18 15:36:09 +09:00
(fun (s, t) -> expect_token s t seq)
2022-01-10 01:31:47 +09:00
(* same as take_while f seq, drop_while f seq *)
2022-01-28 01:52:57 +09:00
let rec partition_while f seq =
2022-01-10 01:31:47 +09:00
match seq () with
| Seq.Nil -> Seq.empty, seq
| Seq.Cons (x, seq) ->
if f x then
let n, s = partition_while f seq in
Seq.cons x n, s
else
Seq.(empty, cons x seq)
2022-02-12 03:17:26 +09:00
let snds f = fun x -> f @@ snd x
2022-01-28 01:52:57 +09:00
let tokenize str =
2022-02-12 03:17:26 +09:00
let seq = String.to_seqi str in
2022-01-22 03:04:00 +09:00
let rec aux seq =
2022-01-10 01:31:47 +09:00
let open Token in
2022-01-28 01:52:57 +09:00
let open Seq in
2022-01-10 01:31:47 +09:00
match seq () with
2022-01-28 01:52:57 +09:00
| Nil -> empty
2022-02-12 03:17:26 +09:00
| Cons ((col, x), seq) ->
2022-01-28 01:52:57 +09:00
(* skip whitespace *)
2022-01-10 01:31:47 +09:00
if is_whitespace x then
2022-01-28 01:52:57 +09:00
aux seq
2022-01-28 01:52:57 +09:00
(* string *)
else if x = '"' then
2022-02-12 03:17:26 +09:00
let str, seq = partition_while (fun (_, c) -> c <> '"') seq in
let str = String.of_seq @@ Seq.map snd str in
let seq = expect_char (col + String.length str + 1) '"' seq in
cons (String str) (aux seq)
2022-01-28 01:52:57 +09:00
(* number (int, float) *)
2022-01-10 01:31:47 +09:00
else if is_digit x then
2022-02-12 03:17:26 +09:00
let n, seq = partition_while (snds is_num) seq in
let n = cons x (Seq.map snd n) |> String.of_seq in
2022-01-18 15:33:56 +09:00
let n =
if String.contains n '.' (* float *)
then Float (float_of_string n)
else Int (int_of_string n)
in
2022-02-01 02:06:18 +09:00
cons n (aux seq)
2022-01-28 01:52:57 +09:00
(* idents *)
2022-01-10 01:31:47 +09:00
else if is_ident_start x then
2022-02-12 03:17:26 +09:00
let id, seq = partition_while (snds is_ident) seq in
let id = String.of_seq @@ cons x @@ Seq.map snd id in
2022-01-28 01:52:57 +09:00
cons (Ident id) (aux seq)
2022-01-28 01:52:57 +09:00
(* tokens *)
2022-01-10 01:31:47 +09:00
else
2022-02-12 03:17:26 +09:00
match find_token @@ cons (col, x) seq with
| None -> invalid_char col x
2022-01-28 01:52:57 +09:00
| Some (t, seq) -> cons t (aux seq)
2022-01-10 01:31:47 +09:00
in
aux seq