open Token exception Invalid_character of int * char exception Expected of int * char let invalid_char col c = raise @@ Invalid_character (col, c) let expected col c = raise @@ Expected (col, c) let either f g c = f c || g c (* test functions *) let is_digit c = '0' <= c && c <= '9' let is_num = function | 'x' -> true | '.' -> true | c -> is_digit c let is_whitespace = function | ' ' | '\t' | '\n' -> true | _ -> false let is_alpha c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') let is_ident_start = either is_alpha ((=) '_') let is_ident = either is_ident_start is_digit (* expect_char expects c at col of seq. If not, raise Expected. *) let expect_char col c seq = match seq () with | Seq.Nil -> expected col c | Seq.Cons ((col, x), seq) -> if x = c then seq else expected col c let expect_token str tok seq = let rec aux ts seq = match ts (), seq () with | Seq.Nil, _ -> Some seq | Seq.Cons _, Seq.Nil -> None | Seq.Cons (a, ts), Seq.Cons ((_, b), seq) -> if a = b then aux ts seq else None in let str = String.to_seq str in aux str seq |> Option.map (fun s -> tok, s) let find_token seq = !tokens |> List.find_map (fun (s, t) -> expect_token s t seq) (* same as take_while f seq, drop_while f seq *) let rec partition_while f seq = match seq () with | Seq.Nil -> Seq.empty, seq | Seq.Cons (x, seq) -> if f x then let n, s = partition_while f seq in Seq.cons x n, s else Seq.(empty, cons x seq) let snds f = fun x -> f @@ snd x let tokenize str = let seq = String.to_seqi str in let rec aux seq = let open Token in let open Seq in match seq () with | Nil -> empty | Cons ((col, x), seq) -> (* skip whitespace *) if is_whitespace x then aux seq (* string *) else if x = '"' then let str, seq = partition_while (fun (_, c) -> c <> '"') seq in let str = String.of_seq @@ Seq.map snd str in let seq = expect_char (col + String.length str + 1) '"' seq in cons (col, String str) (aux seq) (* number (int, float) *) else if is_digit x then let n, seq = partition_while (snds is_num) seq in let n = cons x (Seq.map snd n) |> String.of_seq in let n = if String.contains n '.' (* float *) then Float (float_of_string n) else Int (int_of_string n) in cons (col, n) (aux seq) (* idents *) else if is_ident_start x then let id, seq = partition_while (snds is_ident) seq in let id = String.of_seq @@ cons x @@ Seq.map snd id in cons (col, Ident id) (aux seq) (* tokens *) else match find_token @@ cons (col, x) seq with | None -> invalid_char col x | Some (t, seq) -> cons (col, t) (aux seq) in aux seq