Rewrite token lexer
This commit is contained in:
		
							parent
							
								
									dd2a1e160e
								
							
						
					
					
						commit
						7d6c833e58
					
				
					 2 changed files with 41 additions and 36 deletions
				
			
		
							
								
								
									
										22
									
								
								lex.ml
									
										
									
									
									
								
							
							
						
						
									
										22
									
								
								lex.ml
									
										
									
									
									
								
							| 
						 | 
					@ -1,5 +1,7 @@
 | 
				
			||||||
type tokens = Token.t Seq.t
 | 
					type tokens = Token.t Seq.t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exception Token_not_found
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let either f g c =
 | 
					let either f g c =
 | 
				
			||||||
  f c || g c
 | 
					  f c || g c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,18 +44,18 @@ let tokenize (str : string) : tokens =
 | 
				
			||||||
    | Seq.Nil -> Seq.empty
 | 
					    | Seq.Nil -> Seq.empty
 | 
				
			||||||
    | Seq.Cons (x, s) ->
 | 
					    | Seq.Cons (x, s) ->
 | 
				
			||||||
      if is_whitespace x then
 | 
					      if is_whitespace x then
 | 
				
			||||||
        aux s
 | 
					        aux s (* skip whitespace *)
 | 
				
			||||||
      else if is_digit x then
 | 
					      else if is_digit x then
 | 
				
			||||||
        let n, s = partition_while is_num s in
 | 
					        let n, s = partition_while is_num seq in
 | 
				
			||||||
        let n = String.of_seq @@ Seq.cons x n in
 | 
					        let n = int_of_string @@ String.of_seq n in
 | 
				
			||||||
        Seq.cons (of_string n) (aux s)
 | 
					        Seq.cons (Int n) (aux s)
 | 
				
			||||||
      else if is_ident_start x then
 | 
					      else if is_ident_start x then
 | 
				
			||||||
        begin
 | 
					        let id, s = partition_while is_ident seq in
 | 
				
			||||||
          let id, s = partition_while is_ident s in
 | 
					        let id = String.of_seq id in
 | 
				
			||||||
          let id = String.of_seq @@ Seq.cons x id in
 | 
					        Seq.cons (Ident id) (aux s)
 | 
				
			||||||
          Seq.cons (Ident id) (aux s)
 | 
					 | 
				
			||||||
        end
 | 
					 | 
				
			||||||
      else
 | 
					      else
 | 
				
			||||||
        Seq.cons (of_char x) (aux s)
 | 
					        match find_token seq with
 | 
				
			||||||
 | 
					        | None -> raise Token_not_found
 | 
				
			||||||
 | 
					        | Some (t, s) -> Seq.cons t (aux s)
 | 
				
			||||||
  in
 | 
					  in
 | 
				
			||||||
  aux seq
 | 
					  aux seq
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										55
									
								
								token.ml
									
										
									
									
									
								
							
							
						
						
									
										55
									
								
								token.ml
									
										
									
									
									
								
							| 
						 | 
					@ -10,34 +10,37 @@ type t =
 | 
				
			||||||
  | LParen
 | 
					  | LParen
 | 
				
			||||||
  | RParen
 | 
					  | RParen
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let of_char = function
 | 
					let tokens = ref [
 | 
				
			||||||
  | '+' -> Plus
 | 
					  "+", Plus;
 | 
				
			||||||
  | '-' -> Minus
 | 
					  "-", Minus;
 | 
				
			||||||
  | '*' -> Asterisk
 | 
					  "*", Asterisk;
 | 
				
			||||||
  | '/' -> Slash
 | 
					  "/", Slash;
 | 
				
			||||||
  | '^' -> Carret
 | 
					  "^", Carret;
 | 
				
			||||||
  | '%' -> Percent
 | 
					  "%", Percent;
 | 
				
			||||||
  | '(' -> LParen
 | 
					  "(", LParen;
 | 
				
			||||||
  | ')' -> RParen
 | 
					  ")", RParen;
 | 
				
			||||||
  | _ -> invalid_arg "Token.of_char"
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let of_string str =
 | 
					let expect_token str tok seq =
 | 
				
			||||||
  let fc = Char.code str.[0] in
 | 
					  let rec aux ts seq =
 | 
				
			||||||
  if Char.(code '0' <= fc && fc <= code '9') then
 | 
					    match ts (), seq () with
 | 
				
			||||||
    Int (int_of_string str)
 | 
					    | Seq.Nil, _ -> Some seq
 | 
				
			||||||
  else
 | 
					    | Seq.Cons _, Seq.Nil -> None
 | 
				
			||||||
    match str with
 | 
					    | Seq.Cons (a, ts), Seq.Cons (b, seq) ->
 | 
				
			||||||
    | _ when String.length str = 1 -> of_char str.[0]
 | 
					      if a = b then aux ts seq else None
 | 
				
			||||||
    | _ -> failwith "Token.of_string"
 | 
					  in
 | 
				
			||||||
 | 
					  let str = String.to_seq str in
 | 
				
			||||||
 | 
					  aux str seq |> Option.map (fun s -> tok, s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					let find_token seq =
 | 
				
			||||||
 | 
					  !tokens |> List.find_map
 | 
				
			||||||
 | 
					    (fun (s, t) -> expect_token s t seq)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let to_string = function
 | 
					let to_string = function
 | 
				
			||||||
  | Int n -> string_of_int n
 | 
					  | Int n -> string_of_int n
 | 
				
			||||||
  | Ident s -> s
 | 
					  | Ident s -> s
 | 
				
			||||||
  | Plus -> "+"
 | 
					  | t ->
 | 
				
			||||||
  | Minus -> "-"
 | 
					    begin match List.find_opt (fun (_, tok) -> t = tok) !tokens with
 | 
				
			||||||
  | Asterisk -> "*"
 | 
					      | None -> failwith "Token.to_string"
 | 
				
			||||||
  | Slash -> "/"
 | 
					      | Some (s, _) -> s
 | 
				
			||||||
  | Carret -> "^"
 | 
					    end
 | 
				
			||||||
  | Percent -> "%"
 | 
					 | 
				
			||||||
  | LParen -> "("
 | 
					 | 
				
			||||||
  | RParen -> ")"
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue