Split dune and jbuild lexers
Signed-off-by: Rudi Grinberg <rudi.grinberg@gmail.com>
This commit is contained in:
parent
411552e4c7
commit
e3aa13424d
|
@ -3,4 +3,4 @@
|
||||||
(synopsis "[Internal] S-expression library")
|
(synopsis "[Internal] S-expression library")
|
||||||
(public_name dune.usexp)))
|
(public_name dune.usexp)))
|
||||||
|
|
||||||
(ocamllex (lexer))
|
(ocamllex (dune_lexer jbuild_lexer))
|
||||||
|
|
|
@ -1,71 +1,5 @@
|
||||||
{
|
{
|
||||||
module Token = struct
|
open Lexer0
|
||||||
type t =
|
|
||||||
| Atom of Atom.t
|
|
||||||
| Quoted_string of string
|
|
||||||
| Lparen
|
|
||||||
| Rparen
|
|
||||||
| Sexp_comment
|
|
||||||
| Eof
|
|
||||||
end
|
|
||||||
|
|
||||||
type t = Lexing.lexbuf -> Token.t
|
|
||||||
|
|
||||||
module Error = struct
|
|
||||||
type t =
|
|
||||||
{ start : Lexing.position
|
|
||||||
; stop : Lexing.position
|
|
||||||
; message : string
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
exception Error of Error.t
|
|
||||||
|
|
||||||
let error ?(delta=0) lexbuf message =
|
|
||||||
let start = Lexing.lexeme_start_p lexbuf in
|
|
||||||
raise
|
|
||||||
(Error { start = { start with pos_cnum = start.pos_cnum + delta }
|
|
||||||
; stop = Lexing.lexeme_end_p lexbuf
|
|
||||||
; message
|
|
||||||
})
|
|
||||||
|
|
||||||
(* The difference between the old and new syntax is that the old
|
|
||||||
syntax allows backslash following by any characters other than 'n',
|
|
||||||
'x', ... and interpret it as it. The new syntax is stricter in
|
|
||||||
order to allow introducing new escape sequence in the future if
|
|
||||||
needed. *)
|
|
||||||
type escape_mode =
|
|
||||||
| In_block_comment (* Inside #|...|# comments (old syntax) *)
|
|
||||||
| Old_syntax
|
|
||||||
| New_syntax
|
|
||||||
|
|
||||||
let eval_decimal_char c = Char.code c - Char.code '0'
|
|
||||||
|
|
||||||
let eval_decimal_escape c1 c2 c3 =
|
|
||||||
(eval_decimal_char c1) * 100 +
|
|
||||||
(eval_decimal_char c2) * 10 +
|
|
||||||
(eval_decimal_char c3)
|
|
||||||
|
|
||||||
let eval_hex_char c =
|
|
||||||
match c with
|
|
||||||
| '0'..'9' -> Char.code c - Char.code '0'
|
|
||||||
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
|
|
||||||
| 'A'..'F' -> Char.code c - Char.code 'A' + 10
|
|
||||||
| _ -> -1
|
|
||||||
|
|
||||||
let eval_hex_escape c1 c2 =
|
|
||||||
(eval_hex_char c1) * 16 +
|
|
||||||
(eval_hex_char c2)
|
|
||||||
|
|
||||||
type escape_sequence =
|
|
||||||
| Newline
|
|
||||||
| Other
|
|
||||||
|
|
||||||
let escaped_buf = Buffer.create 256
|
|
||||||
|
|
||||||
type block_string_line_kind =
|
|
||||||
| With_escape_sequences
|
|
||||||
| Raw
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let comment = ';' [^ '\n' '\r']*
|
let comment = ';' [^ '\n' '\r']*
|
||||||
|
@ -74,160 +8,11 @@ let blank = [' ' '\t' '\012']
|
||||||
let digit = ['0'-'9']
|
let digit = ['0'-'9']
|
||||||
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
||||||
|
|
||||||
let atom_char_jbuild =
|
|
||||||
[^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012']
|
|
||||||
let atom_char_dune =
|
let atom_char_dune =
|
||||||
[^ '%' ';' '(' ')' '"' '\000'-'\032' '\127'-'\255']
|
[^ '%' ';' '(' ')' '"' '\000'-'\032' '\127'-'\255']
|
||||||
|
|
||||||
(* rule for jbuild files *)
|
|
||||||
rule jbuild_token = parse
|
|
||||||
| newline
|
|
||||||
{ Lexing.new_line lexbuf; jbuild_token lexbuf }
|
|
||||||
| blank+ | comment
|
|
||||||
{ jbuild_token lexbuf }
|
|
||||||
| '('
|
|
||||||
{ Token.Lparen }
|
|
||||||
| ')'
|
|
||||||
{ Rparen }
|
|
||||||
| '"'
|
|
||||||
{ Buffer.clear escaped_buf;
|
|
||||||
let start = Lexing.lexeme_start_p lexbuf in
|
|
||||||
let s = quoted_string Old_syntax lexbuf in
|
|
||||||
lexbuf.lex_start_p <- start;
|
|
||||||
Quoted_string s
|
|
||||||
}
|
|
||||||
| "#|"
|
|
||||||
{ jbuild_block_comment lexbuf;
|
|
||||||
jbuild_token lexbuf
|
|
||||||
}
|
|
||||||
| "#;"
|
|
||||||
{ Sexp_comment }
|
|
||||||
| eof
|
|
||||||
{ Eof }
|
|
||||||
| ""
|
|
||||||
{ jbuild_atom "" (Lexing.lexeme_start_p lexbuf) lexbuf }
|
|
||||||
|
|
||||||
and jbuild_atom acc start = parse
|
|
||||||
| '#'+ '|'
|
|
||||||
{ lexbuf.lex_start_p <- start;
|
|
||||||
error lexbuf "jbuild_atoms cannot contain #|"
|
|
||||||
}
|
|
||||||
| '|'+ '#'
|
|
||||||
{ lexbuf.lex_start_p <- start;
|
|
||||||
error lexbuf "jbuild_atoms cannot contain |#"
|
|
||||||
}
|
|
||||||
| ('#'+ | '|'+ | (atom_char_jbuild # ['|' '#'])) as s
|
|
||||||
{ jbuild_atom (if acc = "" then s else acc ^ s) start lexbuf
|
|
||||||
}
|
|
||||||
| ""
|
|
||||||
{ if acc = "" then
|
|
||||||
error lexbuf "Internal error in the S-expression parser, \
|
|
||||||
please report upstream.";
|
|
||||||
lexbuf.lex_start_p <- start;
|
|
||||||
Token.Atom (Atom.of_string acc)
|
|
||||||
}
|
|
||||||
|
|
||||||
and quoted_string mode = parse
|
|
||||||
| '"'
|
|
||||||
{ Buffer.contents escaped_buf }
|
|
||||||
| '\\'
|
|
||||||
{ match escape_sequence mode lexbuf with
|
|
||||||
| Newline -> quoted_string_after_escaped_newline mode lexbuf
|
|
||||||
| Other -> quoted_string mode lexbuf
|
|
||||||
}
|
|
||||||
| newline as s
|
|
||||||
{ Lexing.new_line lexbuf;
|
|
||||||
Buffer.add_string escaped_buf s;
|
|
||||||
quoted_string mode lexbuf
|
|
||||||
}
|
|
||||||
| _ as c
|
|
||||||
{ Buffer.add_char escaped_buf c;
|
|
||||||
quoted_string mode lexbuf
|
|
||||||
}
|
|
||||||
| eof
|
|
||||||
{ if mode <> In_block_comment then
|
|
||||||
error lexbuf "unterminated quoted string";
|
|
||||||
Buffer.contents escaped_buf
|
|
||||||
}
|
|
||||||
|
|
||||||
and quoted_string_after_escaped_newline mode = parse
|
|
||||||
| [' ' '\t']*
|
|
||||||
{ quoted_string mode lexbuf }
|
|
||||||
|
|
||||||
and escape_sequence mode = parse
|
|
||||||
| newline
|
|
||||||
{ Lexing.new_line lexbuf;
|
|
||||||
Newline }
|
|
||||||
| ['\\' '\'' '"' 'n' 't' 'b' 'r'] as c
|
|
||||||
{ let c =
|
|
||||||
match c with
|
|
||||||
| 'n' -> '\n'
|
|
||||||
| 'r' -> '\r'
|
|
||||||
| 'b' -> '\b'
|
|
||||||
| 't' -> '\t'
|
|
||||||
| _ -> c
|
|
||||||
in
|
|
||||||
Buffer.add_char escaped_buf c;
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
| (digit as c1) (digit as c2) (digit as c3)
|
|
||||||
{ let v = eval_decimal_escape c1 c2 c3 in
|
|
||||||
if mode <> In_block_comment && v > 255 then
|
|
||||||
error lexbuf "escape sequence in quoted string out of range"
|
|
||||||
~delta:(-1);
|
|
||||||
Buffer.add_char escaped_buf (Char.chr v);
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
| digit* as s
|
|
||||||
{ if mode <> In_block_comment then
|
|
||||||
error lexbuf "unterminated decimal escape sequence" ~delta:(-1);
|
|
||||||
Buffer.add_char escaped_buf '\\';
|
|
||||||
Buffer.add_string escaped_buf s;
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
| 'x' (hexdigit as c1) (hexdigit as c2)
|
|
||||||
{ let v = eval_hex_escape c1 c2 in
|
|
||||||
Buffer.add_char escaped_buf (Char.chr v);
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
| 'x' hexdigit* as s
|
|
||||||
{ if mode <> In_block_comment then
|
|
||||||
error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1);
|
|
||||||
Buffer.add_char escaped_buf '\\';
|
|
||||||
Buffer.add_string escaped_buf s;
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
| _ as c
|
|
||||||
{ if mode = New_syntax then
|
|
||||||
error lexbuf "unknown escape sequence" ~delta:(-1);
|
|
||||||
Buffer.add_char escaped_buf '\\';
|
|
||||||
Buffer.add_char escaped_buf c;
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
| eof
|
|
||||||
{ if mode <> In_block_comment then
|
|
||||||
error lexbuf "unterminated escape sequence" ~delta:(-1);
|
|
||||||
Other
|
|
||||||
}
|
|
||||||
|
|
||||||
and jbuild_block_comment = parse
|
|
||||||
| '"'
|
|
||||||
{ Buffer.clear escaped_buf;
|
|
||||||
ignore (quoted_string In_block_comment lexbuf : string);
|
|
||||||
jbuild_block_comment lexbuf
|
|
||||||
}
|
|
||||||
| "|#"
|
|
||||||
{ ()
|
|
||||||
}
|
|
||||||
| eof
|
|
||||||
{ error lexbuf "unterminated block comment"
|
|
||||||
}
|
|
||||||
| _
|
|
||||||
{ jbuild_block_comment lexbuf
|
|
||||||
}
|
|
||||||
|
|
||||||
(* rule for dune files *)
|
(* rule for dune files *)
|
||||||
and token = parse
|
rule token = parse
|
||||||
| newline
|
| newline
|
||||||
{ Lexing.new_line lexbuf; token lexbuf }
|
{ Lexing.new_line lexbuf; token lexbuf }
|
||||||
| blank+ | comment
|
| blank+ | comment
|
||||||
|
@ -316,3 +101,86 @@ and raw_block_string = parse
|
||||||
| eof
|
| eof
|
||||||
{ Buffer.contents escaped_buf
|
{ Buffer.contents escaped_buf
|
||||||
}
|
}
|
||||||
|
|
||||||
|
and quoted_string mode = parse
|
||||||
|
| '"'
|
||||||
|
{ Buffer.contents escaped_buf }
|
||||||
|
| '\\'
|
||||||
|
{ match escape_sequence mode lexbuf with
|
||||||
|
| Newline -> quoted_string_after_escaped_newline mode lexbuf
|
||||||
|
| Other -> quoted_string mode lexbuf
|
||||||
|
}
|
||||||
|
| newline as s
|
||||||
|
{ Lexing.new_line lexbuf;
|
||||||
|
Buffer.add_string escaped_buf s;
|
||||||
|
quoted_string mode lexbuf
|
||||||
|
}
|
||||||
|
| _ as c
|
||||||
|
{ Buffer.add_char escaped_buf c;
|
||||||
|
quoted_string mode lexbuf
|
||||||
|
}
|
||||||
|
| eof
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated quoted string";
|
||||||
|
Buffer.contents escaped_buf
|
||||||
|
}
|
||||||
|
|
||||||
|
and escape_sequence mode = parse
|
||||||
|
| newline
|
||||||
|
{ Lexing.new_line lexbuf;
|
||||||
|
Newline }
|
||||||
|
| ['\\' '\'' '"' 'n' 't' 'b' 'r'] as c
|
||||||
|
{ let c =
|
||||||
|
match c with
|
||||||
|
| 'n' -> '\n'
|
||||||
|
| 'r' -> '\r'
|
||||||
|
| 'b' -> '\b'
|
||||||
|
| 't' -> '\t'
|
||||||
|
| _ -> c
|
||||||
|
in
|
||||||
|
Buffer.add_char escaped_buf c;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| (digit as c1) (digit as c2) (digit as c3)
|
||||||
|
{ let v = eval_decimal_escape c1 c2 c3 in
|
||||||
|
if mode <> In_block_comment && v > 255 then
|
||||||
|
error lexbuf "escape sequence in quoted string out of range"
|
||||||
|
~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf (Char.chr v);
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| digit* as s
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated decimal escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
|
Buffer.add_string escaped_buf s;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| 'x' (hexdigit as c1) (hexdigit as c2)
|
||||||
|
{ let v = eval_hex_escape c1 c2 in
|
||||||
|
Buffer.add_char escaped_buf (Char.chr v);
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| 'x' hexdigit* as s
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
|
Buffer.add_string escaped_buf s;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| _ as c
|
||||||
|
{ if mode = New_syntax then
|
||||||
|
error lexbuf "unknown escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
|
Buffer.add_char escaped_buf c;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| eof
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated escape sequence" ~delta:(-1);
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
|
||||||
|
and quoted_string_after_escaped_newline mode = parse
|
||||||
|
| [' ' '\t']*
|
||||||
|
{ quoted_string mode lexbuf }
|
|
@ -0,0 +1,159 @@
|
||||||
|
{
|
||||||
|
open Lexer0
|
||||||
|
}
|
||||||
|
|
||||||
|
let comment = ';' [^ '\n' '\r']*
|
||||||
|
let newline = '\r'? '\n'
|
||||||
|
let blank = [' ' '\t' '\012']
|
||||||
|
let digit = ['0'-'9']
|
||||||
|
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
||||||
|
|
||||||
|
let atom_char_jbuild =
|
||||||
|
[^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012']
|
||||||
|
|
||||||
|
(* rule for jbuild files *)
|
||||||
|
rule token = parse
|
||||||
|
| newline
|
||||||
|
{ Lexing.new_line lexbuf; token lexbuf }
|
||||||
|
| blank+ | comment
|
||||||
|
{ token lexbuf }
|
||||||
|
| '('
|
||||||
|
{ Token.Lparen }
|
||||||
|
| ')'
|
||||||
|
{ Rparen }
|
||||||
|
| '"'
|
||||||
|
{ Buffer.clear escaped_buf;
|
||||||
|
let start = Lexing.lexeme_start_p lexbuf in
|
||||||
|
let s = quoted_string Old_syntax lexbuf in
|
||||||
|
lexbuf.lex_start_p <- start;
|
||||||
|
Quoted_string s
|
||||||
|
}
|
||||||
|
| "#|"
|
||||||
|
{ jbuild_block_comment lexbuf;
|
||||||
|
token lexbuf
|
||||||
|
}
|
||||||
|
| "#;"
|
||||||
|
{ Sexp_comment }
|
||||||
|
| eof
|
||||||
|
{ Eof }
|
||||||
|
| ""
|
||||||
|
{ jbuild_atom "" (Lexing.lexeme_start_p lexbuf) lexbuf }
|
||||||
|
|
||||||
|
and jbuild_atom acc start = parse
|
||||||
|
| '#'+ '|'
|
||||||
|
{ lexbuf.lex_start_p <- start;
|
||||||
|
error lexbuf "jbuild_atoms cannot contain #|"
|
||||||
|
}
|
||||||
|
| '|'+ '#'
|
||||||
|
{ lexbuf.lex_start_p <- start;
|
||||||
|
error lexbuf "jbuild_atoms cannot contain |#"
|
||||||
|
}
|
||||||
|
| ('#'+ | '|'+ | (atom_char_jbuild # ['|' '#'])) as s
|
||||||
|
{ jbuild_atom (if acc = "" then s else acc ^ s) start lexbuf
|
||||||
|
}
|
||||||
|
| ""
|
||||||
|
{ if acc = "" then
|
||||||
|
error lexbuf "Internal error in the S-expression parser, \
|
||||||
|
please report upstream.";
|
||||||
|
lexbuf.lex_start_p <- start;
|
||||||
|
Token.Atom (Atom.of_string acc)
|
||||||
|
}
|
||||||
|
|
||||||
|
and quoted_string mode = parse
|
||||||
|
| '"'
|
||||||
|
{ Buffer.contents escaped_buf }
|
||||||
|
| '\\'
|
||||||
|
{ match escape_sequence mode lexbuf with
|
||||||
|
| Newline -> quoted_string_after_escaped_newline mode lexbuf
|
||||||
|
| Other -> quoted_string mode lexbuf
|
||||||
|
}
|
||||||
|
| newline as s
|
||||||
|
{ Lexing.new_line lexbuf;
|
||||||
|
Buffer.add_string escaped_buf s;
|
||||||
|
quoted_string mode lexbuf
|
||||||
|
}
|
||||||
|
| _ as c
|
||||||
|
{ Buffer.add_char escaped_buf c;
|
||||||
|
quoted_string mode lexbuf
|
||||||
|
}
|
||||||
|
| eof
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated quoted string";
|
||||||
|
Buffer.contents escaped_buf
|
||||||
|
}
|
||||||
|
|
||||||
|
and quoted_string_after_escaped_newline mode = parse
|
||||||
|
| [' ' '\t']*
|
||||||
|
{ quoted_string mode lexbuf }
|
||||||
|
|
||||||
|
and jbuild_block_comment = parse
|
||||||
|
| '"'
|
||||||
|
{ Buffer.clear escaped_buf;
|
||||||
|
ignore (quoted_string In_block_comment lexbuf : string);
|
||||||
|
jbuild_block_comment lexbuf
|
||||||
|
}
|
||||||
|
| "|#"
|
||||||
|
{ ()
|
||||||
|
}
|
||||||
|
| eof
|
||||||
|
{ error lexbuf "unterminated block comment"
|
||||||
|
}
|
||||||
|
| _
|
||||||
|
{ jbuild_block_comment lexbuf
|
||||||
|
}
|
||||||
|
|
||||||
|
and escape_sequence mode = parse
|
||||||
|
| newline
|
||||||
|
{ Lexing.new_line lexbuf;
|
||||||
|
Newline }
|
||||||
|
| ['\\' '\'' '"' 'n' 't' 'b' 'r'] as c
|
||||||
|
{ let c =
|
||||||
|
match c with
|
||||||
|
| 'n' -> '\n'
|
||||||
|
| 'r' -> '\r'
|
||||||
|
| 'b' -> '\b'
|
||||||
|
| 't' -> '\t'
|
||||||
|
| _ -> c
|
||||||
|
in
|
||||||
|
Buffer.add_char escaped_buf c;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| (digit as c1) (digit as c2) (digit as c3)
|
||||||
|
{ let v = eval_decimal_escape c1 c2 c3 in
|
||||||
|
if mode <> In_block_comment && v > 255 then
|
||||||
|
error lexbuf "escape sequence in quoted string out of range"
|
||||||
|
~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf (Char.chr v);
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| digit* as s
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated decimal escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
|
Buffer.add_string escaped_buf s;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| 'x' (hexdigit as c1) (hexdigit as c2)
|
||||||
|
{ let v = eval_hex_escape c1 c2 in
|
||||||
|
Buffer.add_char escaped_buf (Char.chr v);
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| 'x' hexdigit* as s
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
|
Buffer.add_string escaped_buf s;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| _ as c
|
||||||
|
{ if mode = New_syntax then
|
||||||
|
error lexbuf "unknown escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
|
Buffer.add_char escaped_buf c;
|
||||||
|
Other
|
||||||
|
}
|
||||||
|
| eof
|
||||||
|
{ if mode <> In_block_comment then
|
||||||
|
error lexbuf "unterminated escape sequence" ~delta:(-1);
|
||||||
|
Other
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
include Lexer0
|
||||||
|
|
||||||
|
let token = Dune_lexer.token
|
||||||
|
let jbuild_token = Jbuild_lexer.token
|
|
@ -0,0 +1,68 @@
|
||||||
|
|
||||||
|
module Token = struct
|
||||||
|
type t =
|
||||||
|
| Atom of Atom.t
|
||||||
|
| Quoted_string of string
|
||||||
|
| Lparen
|
||||||
|
| Rparen
|
||||||
|
| Sexp_comment
|
||||||
|
| Eof
|
||||||
|
end
|
||||||
|
|
||||||
|
type t = Lexing.lexbuf -> Token.t
|
||||||
|
|
||||||
|
module Error = struct
|
||||||
|
type t =
|
||||||
|
{ start : Lexing.position
|
||||||
|
; stop : Lexing.position
|
||||||
|
; message : string
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
exception Error of Error.t
|
||||||
|
|
||||||
|
let error ?(delta=0) lexbuf message =
|
||||||
|
let start = Lexing.lexeme_start_p lexbuf in
|
||||||
|
raise
|
||||||
|
(Error { start = { start with pos_cnum = start.pos_cnum + delta }
|
||||||
|
; stop = Lexing.lexeme_end_p lexbuf
|
||||||
|
; message
|
||||||
|
})
|
||||||
|
|
||||||
|
let escaped_buf = Buffer.create 256
|
||||||
|
|
||||||
|
(* The difference between the old and new syntax is that the old
|
||||||
|
syntax allows backslash following by any characters other than 'n',
|
||||||
|
'x', ... and interpret it as it. The new syntax is stricter in
|
||||||
|
order to allow introducing new escape sequence in the future if
|
||||||
|
needed. *)
|
||||||
|
type escape_mode =
|
||||||
|
| In_block_comment (* Inside #|...|# comments (old syntax) *)
|
||||||
|
| Old_syntax
|
||||||
|
| New_syntax
|
||||||
|
|
||||||
|
type escape_sequence =
|
||||||
|
| Newline
|
||||||
|
| Other
|
||||||
|
|
||||||
|
type block_string_line_kind =
|
||||||
|
| With_escape_sequences
|
||||||
|
| Raw
|
||||||
|
|
||||||
|
let eval_decimal_char c = Char.code c - Char.code '0'
|
||||||
|
|
||||||
|
let eval_decimal_escape c1 c2 c3 =
|
||||||
|
(eval_decimal_char c1) * 100 +
|
||||||
|
(eval_decimal_char c2) * 10 +
|
||||||
|
(eval_decimal_char c3)
|
||||||
|
|
||||||
|
let eval_hex_char c =
|
||||||
|
match c with
|
||||||
|
| '0'..'9' -> Char.code c - Char.code '0'
|
||||||
|
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
|
||||||
|
| 'A'..'F' -> Char.code c - Char.code 'A' + 10
|
||||||
|
| _ -> -1
|
||||||
|
|
||||||
|
let eval_hex_escape c1 c2 =
|
||||||
|
(eval_hex_char c1) * 16 +
|
||||||
|
(eval_hex_char c2)
|
|
@ -0,0 +1,44 @@
|
||||||
|
module Token : sig
|
||||||
|
type t =
|
||||||
|
| Atom of Atom.t
|
||||||
|
| Quoted_string of string
|
||||||
|
| Lparen
|
||||||
|
| Rparen
|
||||||
|
| Sexp_comment
|
||||||
|
| Eof
|
||||||
|
end
|
||||||
|
|
||||||
|
type t = Lexing.lexbuf -> Token.t
|
||||||
|
|
||||||
|
module Error : sig
|
||||||
|
type t =
|
||||||
|
{ start : Lexing.position
|
||||||
|
; stop : Lexing.position
|
||||||
|
; message : string
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
val error : ?delta:int -> Lexing.lexbuf -> string -> 'a
|
||||||
|
|
||||||
|
val escaped_buf : Buffer.t
|
||||||
|
|
||||||
|
exception Error of Error.t
|
||||||
|
|
||||||
|
type escape_mode =
|
||||||
|
| In_block_comment (* Inside #|...|# comments (old syntax) *)
|
||||||
|
| Old_syntax
|
||||||
|
| New_syntax
|
||||||
|
|
||||||
|
type escape_sequence =
|
||||||
|
| Newline
|
||||||
|
| Other
|
||||||
|
|
||||||
|
type block_string_line_kind =
|
||||||
|
| With_escape_sequences
|
||||||
|
| Raw
|
||||||
|
|
||||||
|
val eval_decimal_char : char -> int
|
||||||
|
|
||||||
|
val eval_decimal_escape : char -> char -> char -> int
|
||||||
|
|
||||||
|
val eval_hex_escape : char -> char -> int
|
Loading…
Reference in New Issue