ce/lex.ml

61 lines
1.4 KiB
OCaml

type tokens = Token.t Seq.t
exception Token_not_found
let either f g c =
f c || g c
let is_digit c =
'0' <= c && c <= '9'
let is_num = function
| 'x' -> true
| c -> is_digit c
let is_whitespace = function
| ' ' | '\t' | '\n' -> true
| _ -> false
let is_alpha c =
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
let is_ident_start =
either is_alpha ((=) '_')
let is_ident =
either is_ident_start is_digit
(* same as take_while f seq, drop_while f seq *)
let rec partition_while f seq : 'a Seq.t * 'a Seq.t =
match seq () with
| Seq.Nil -> Seq.empty, seq
| Seq.Cons (x, seq) ->
if f x then
let n, s = partition_while f seq in
Seq.cons x n, s
else
Seq.(empty, cons x seq)
let tokenize (str : string) : tokens =
let seq = String.to_seq str in
let rec aux seq =
let open Token in
match seq () with
| Seq.Nil -> Seq.empty
| Seq.Cons (x, s) ->
if is_whitespace x then
aux s (* skip whitespace *)
else if is_digit x then
let n, s = partition_while is_num seq in
let n = int_of_string @@ String.of_seq n in
Seq.cons (Int n) (aux s)
else if is_ident_start x then
let id, s = partition_while is_ident seq in
let id = String.of_seq id in
Seq.cons (Ident id) (aux s)
else
match find_token seq with
| None -> raise Token_not_found
| Some (t, s) -> Seq.cons t (aux s)
in
aux seq