type tokens = Token.t Seq.t exception Token_not_found let either f g c = f c || g c let is_digit c = '0' <= c && c <= '9' let is_num = function | 'x' -> true | c -> is_digit c let is_whitespace = function | ' ' | '\t' | '\n' -> true | _ -> false let is_alpha c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') let is_ident_start = either is_alpha ((=) '_') let is_ident = either is_ident_start is_digit (* same as take_while f seq, drop_while f seq *) let rec partition_while f seq : 'a Seq.t * 'a Seq.t = match seq () with | Seq.Nil -> Seq.empty, seq | Seq.Cons (x, seq) -> if f x then let n, s = partition_while f seq in Seq.cons x n, s else Seq.(empty, cons x seq) let tokenize (str : string) : tokens = let seq = String.to_seq str in let rec aux seq () = let open Token in match seq () with | Seq.Nil -> Seq.Nil | Seq.Cons (x, s) -> if is_whitespace x then aux s () (* skip whitespace *) else if is_digit x then let n, s = partition_while is_num seq in let n = int_of_string @@ String.of_seq n in Seq.Cons (Int n, aux s) else if is_ident_start x then let id, s = partition_while is_ident seq in let id = String.of_seq id in Seq.Cons (Ident id, aux s) else match find_token seq with | None -> raise Token_not_found | Some (t, s) -> Seq.Cons (t, aux s) in aux seq