67 lines
1.6 KiB
OCaml
67 lines
1.6 KiB
OCaml
type tokens = Token.t Seq.t
|
|
|
|
exception Token_not_found
|
|
|
|
let either f g c =
|
|
f c || g c
|
|
|
|
let is_digit c =
|
|
'0' <= c && c <= '9'
|
|
|
|
let is_num = function
|
|
| 'x' -> true
|
|
| '.' -> true
|
|
| c -> is_digit c
|
|
|
|
let is_whitespace = function
|
|
| ' ' | '\t' | '\n' -> true
|
|
| _ -> false
|
|
|
|
let is_alpha c =
|
|
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
|
|
|
|
let is_ident_start =
|
|
either is_alpha ((=) '_')
|
|
|
|
let is_ident =
|
|
either is_ident_start is_digit
|
|
|
|
(* same as take_while f seq, drop_while f seq *)
|
|
let rec partition_while f seq : 'a Seq.t * 'a Seq.t =
|
|
match seq () with
|
|
| Seq.Nil -> Seq.empty, seq
|
|
| Seq.Cons (x, seq) ->
|
|
if f x then
|
|
let n, s = partition_while f seq in
|
|
Seq.cons x n, s
|
|
else
|
|
Seq.(empty, cons x seq)
|
|
|
|
let tokenize (str : string) : tokens =
|
|
let seq = String.to_seq str in
|
|
let rec aux seq () =
|
|
let open Token in
|
|
match seq () with
|
|
| Seq.Nil -> Seq.Nil
|
|
| Seq.Cons (x, s) ->
|
|
if is_whitespace x then
|
|
aux s () (* skip whitespace *)
|
|
else if is_digit x then
|
|
let n, s = partition_while is_num s in
|
|
let n = String.of_seq @@ Seq.cons x n in
|
|
let n =
|
|
if String.contains n '.' (* float *)
|
|
then Float (float_of_string n)
|
|
else Int (int_of_string n)
|
|
in
|
|
Seq.Cons (n, aux s)
|
|
else if is_ident_start x then
|
|
let id, s = partition_while is_ident seq in
|
|
let id = String.of_seq id in
|
|
Seq.Cons (Ident id, aux s)
|
|
else
|
|
match find_token seq with
|
|
| None -> raise Token_not_found
|
|
| Some (t, s) -> Seq.Cons (t, aux s)
|
|
in
|
|
aux seq
|