{ (* lexical.mll *) (* Analyseur lexical. Alain FRISCH *) #open "parser";; let comment_depth = ref 0;; exception Eof;; let initial_string_buffer = create_string 256;; let string_buff = ref initial_string_buffer;; let sbuf_size = ref 0;; let sbuf_length = ref 0;; let reset_string_buffer () = string_buff := initial_string_buffer; sbuf_length := 0; sbuf_size := string_length !string_buff ;; let store_string_char c = if !sbuf_length = !sbuf_size then begin let new_buff = create_string (!sbuf_size * 2) in blit_string !string_buff 0 new_buff 0 !sbuf_size; string_buff := new_buff; sbuf_size := !sbuf_size*2; end; set_nth_char !string_buff !sbuf_length c; incr sbuf_length ;; let get_stored_string () = let s = sub_string !string_buff 0 !sbuf_length in string_buff := initial_string_buffer; s ;; let char_for_backslash = function `n` -> `\010` | `r` -> `\013` | `b` -> `\008` | `t` -> `\009` | c -> c ;; let char_for_decimal_code lexbuf i = char_of_int(100 * (int_of_char(get_lexeme_char lexbuf i) - 48) + 10 * (int_of_char(get_lexeme_char lexbuf (i+1)) - 48) + (int_of_char(get_lexeme_char lexbuf (i+2)) - 48)) ;; } rule Token = parse [` ` `\t` `\n` `\r`] {Token lexbuf} | "(*" { comment_depth := 1; comment lexbuf; Token lexbuf } | [`0`-`9`]+ {INT(int_of_string(get_lexeme lexbuf))} | "->" {MAPSTO} | "if" {IF} | "then" {THEN} | "else" {ELSE} | "not" {NOT} | "let" {LET} | "rec" {REC} | "fun" {FUN} | "match" {MATCH} | "with" {WITH} | "type" {TYPE} | "in" {IN} | "true" {TRUE} | "false" {FALSE} | "xfst" {FIRST} | "xsnd" {SECOND} | "xhd" {HEAD} | "xtl" {TAIL} | "of" {OF} | `,` {VIRG} | `;` {PVIRG} | ";;" {DBLPVIRG} | `(` {PARG} | `)` {PARD} | `+` {PLUS} | `-` {MOINS} | `*` {MULT} | `/` {DIV} | "and" {AND} | "&" {ET} | "||" {OU} | "|" {PIPE} | `=` {EGAL} | `<` {INF} | "<=" {INFEGAL} | `>` {SUP} | ">=" {SUPEGAL} | "[" {CROCHG} | "]" {CROCHD} | "()" {UNIT} | "::" {CONS} | `#` {DIEZE} | `!` {EXCLAM} | ":=" {DPEGAL} | "'" {PRIM} | ([`a`-`z`] | [`A`-`Z`])([`a`-`z`] | [`A`-`Z`] | [`0`-`9`] | `_`)* {IDENT(get_lexeme lexbuf)} | "\"" { reset_string_buffer(); stringtoken lexbuf; STR (get_stored_string()) } | "`" { CHAR (chartoken lexbuf) } | eof { raise Eof } and comment = parse "(*" { incr comment_depth; comment lexbuf } | "*)" { decr comment_depth; if !comment_depth == 0 then () else comment lexbuf } | eof { failwith "Fin de fichier dans un commentaire" } | _ { comment lexbuf } and stringtoken = parse `"` { () } | `\\` [` ` `\010` `\013` `\009` `\026` `\012`] + { stringtoken lexbuf } | `\\` [`\\` `"` `n` `t` `b` `r`] { store_string_char(char_for_backslash(get_lexeme_char lexbuf 1)); stringtoken lexbuf } | `\\` [`0`-`9`] [`0`-`9`] [`0`-`9`] { store_string_char(char_for_decimal_code lexbuf 1); stringtoken lexbuf } | eof { failwith "Fin de fichier dans une chaine littérale" } | _ { store_string_char(get_lexeme_char lexbuf 0); stringtoken lexbuf } and chartoken = parse [^ `\\` `\``] "`" { get_lexeme_char lexbuf 0 } | `\\` [`\\` `\`` `n` `t` `b` `r`] "`" { char_for_backslash (get_lexeme_char lexbuf 1) } | `\\` [`0`-`9`] [`0`-`9`] [`0`-`9`] "`" { char_for_decimal_code lexbuf 1 } | _ { failwith "Erreur dans un caractère littéral"} ;;