2016-11-03 16:44:09 +00:00
|
|
|
{
|
|
|
|
type stack =
|
|
|
|
| Empty
|
|
|
|
| Open of Lexing.position * stack
|
|
|
|
| Sexp of Sexp.t * Sexp.Locs.t * stack
|
|
|
|
|
2016-12-02 13:54:32 +00:00
|
|
|
let error = Loc.fail_lex
|
2016-11-03 16:44:09 +00:00
|
|
|
|
|
|
|
let make_list =
|
|
|
|
let rec loop lexbuf acc acc_locs = function
|
|
|
|
| Empty ->
|
|
|
|
error lexbuf "right parenthesis without matching left parenthesis"
|
|
|
|
| Open (start, stack) ->
|
|
|
|
Sexp (List acc,
|
|
|
|
List ({ start; stop = Lexing.lexeme_end_p lexbuf }, acc_locs),
|
|
|
|
stack)
|
|
|
|
| Sexp (sexp, locs, stack) -> loop lexbuf (sexp :: acc) (locs :: acc_locs) stack
|
|
|
|
in
|
|
|
|
fun lexbuf stack -> loop lexbuf [] [] stack
|
|
|
|
|
|
|
|
let new_sexp loop stack lexbuf =
|
|
|
|
match stack with
|
|
|
|
| Sexp (sexp, locs, Empty) -> Some (sexp, locs)
|
|
|
|
| _ -> loop stack lexbuf
|
|
|
|
|
|
|
|
let atom_loc lexbuf : Sexp.Locs.t =
|
|
|
|
Atom
|
|
|
|
{ start = Lexing.lexeme_start_p lexbuf
|
|
|
|
; stop = Lexing.lexeme_end_p lexbuf
|
|
|
|
}
|
2016-12-02 13:54:32 +00:00
|
|
|
|
|
|
|
let char_for_backslash = function
|
|
|
|
| 'n' -> '\010'
|
|
|
|
| 'r' -> '\013'
|
|
|
|
| 'b' -> '\008'
|
|
|
|
| 't' -> '\009'
|
|
|
|
| c -> c
|
|
|
|
|
|
|
|
let dec_code c1 c2 c3 =
|
|
|
|
100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
|
|
|
|
|
|
|
|
let hex_code c1 c2 =
|
|
|
|
let d1 = Char.code c1 in
|
|
|
|
let val1 =
|
|
|
|
if d1 >= 97 then d1 - 87
|
|
|
|
else if d1 >= 65 then d1 - 55
|
|
|
|
else d1 - 48 in
|
|
|
|
let d2 = Char.code c2 in
|
|
|
|
let val2 =
|
|
|
|
if d2 >= 97 then d2 - 87
|
|
|
|
else if d2 >= 65 then d2 - 55
|
|
|
|
else d2 - 48 in
|
|
|
|
val1 * 16 + val2
|
|
|
|
|
|
|
|
let escaped_buf = Buffer.create 256
|
2016-11-03 16:44:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let lf = '\010'
|
|
|
|
let lf_cr = ['\010' '\013']
|
|
|
|
let dos_newline = "\013\010"
|
|
|
|
let blank = [' ' '\009' '\012']
|
|
|
|
let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
|
2016-12-02 13:54:32 +00:00
|
|
|
let digit = ['0'-'9']
|
|
|
|
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
2016-11-03 16:44:09 +00:00
|
|
|
|
|
|
|
rule main stack = parse
|
|
|
|
| lf | dos_newline
|
|
|
|
{ Lexing.new_line lexbuf; main stack lexbuf }
|
|
|
|
| blank+
|
|
|
|
{ main stack lexbuf }
|
|
|
|
| (';' (_ # lf_cr)*)
|
|
|
|
{ main stack lexbuf }
|
|
|
|
| '('
|
|
|
|
{ main (Open (Lexing.lexeme_start_p lexbuf, stack)) lexbuf }
|
|
|
|
| ')'
|
|
|
|
{ new_sexp main (make_list lexbuf stack) lexbuf }
|
2016-12-02 13:54:32 +00:00
|
|
|
| '"'
|
|
|
|
{ Buffer.clear escaped_buf;
|
|
|
|
scan_string escaped_buf (Lexing.lexeme_start_p lexbuf) stack lexbuf
|
|
|
|
}
|
2016-11-03 16:44:09 +00:00
|
|
|
| unquoted* as s
|
|
|
|
{ new_sexp main (Sexp (Atom s, atom_loc lexbuf, stack)) lexbuf }
|
|
|
|
| eof
|
|
|
|
{ match stack with
|
|
|
|
| Empty -> None
|
|
|
|
| _ -> error lexbuf "unterminated s-expression" }
|
|
|
|
| _
|
|
|
|
{ error lexbuf "syntax error" }
|
|
|
|
|
2016-12-02 13:54:32 +00:00
|
|
|
and scan_string buf start stack = parse
|
|
|
|
| '"'
|
|
|
|
{ new_sexp main
|
|
|
|
(Sexp (Atom (Buffer.contents buf),
|
|
|
|
Atom { start; stop = Lexing.lexeme_end_p lexbuf },
|
|
|
|
stack))
|
|
|
|
lexbuf
|
|
|
|
}
|
|
|
|
| '\\' lf
|
|
|
|
{
|
|
|
|
Lexing.new_line lexbuf;
|
|
|
|
scan_string_after_escaped_newline buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| '\\' dos_newline
|
|
|
|
{
|
|
|
|
Lexing.new_line lexbuf;
|
|
|
|
scan_string_after_escaped_newline buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
|
|
|
|
{
|
|
|
|
Buffer.add_char buf (char_for_backslash c);
|
|
|
|
scan_string buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| '\\' (digit as c1) (digit as c2) (digit as c3)
|
|
|
|
{
|
|
|
|
let v = dec_code c1 c2 c3 in
|
|
|
|
if v > 255 then error lexbuf "illegal escape";
|
|
|
|
Buffer.add_char buf (Char.chr v);
|
|
|
|
scan_string buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| '\\' 'x' (hexdigit as c1) (hexdigit as c2)
|
|
|
|
{
|
|
|
|
let v = hex_code c1 c2 in
|
|
|
|
Buffer.add_char buf (Char.chr v);
|
|
|
|
scan_string buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| '\\' (_ as c)
|
|
|
|
{
|
|
|
|
Buffer.add_char buf '\\';
|
|
|
|
Buffer.add_char buf c;
|
|
|
|
scan_string buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| lf
|
|
|
|
{
|
|
|
|
Lexing.new_line lexbuf;
|
|
|
|
Buffer.add_char buf '\n';
|
|
|
|
scan_string buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| ([^ '\\' '"'] # lf)+ as s
|
|
|
|
{
|
|
|
|
Buffer.add_string buf s;
|
|
|
|
scan_string buf start stack lexbuf
|
|
|
|
}
|
|
|
|
| eof
|
|
|
|
{
|
|
|
|
error lexbuf "unterminated string"
|
|
|
|
}
|
|
|
|
|
|
|
|
and scan_string_after_escaped_newline buf start stack = parse
|
|
|
|
| [' ' '\t']*
|
|
|
|
{ scan_string buf start stack lexbuf }
|
|
|
|
| ""
|
|
|
|
{ scan_string buf start stack lexbuf }
|
|
|
|
|
2016-11-03 16:44:09 +00:00
|
|
|
and trailing = parse
|
|
|
|
| lf | dos_newline
|
|
|
|
{ Lexing.new_line lexbuf; trailing lexbuf }
|
|
|
|
| blank+
|
|
|
|
{ trailing lexbuf }
|
|
|
|
| (';' (_ # lf_cr)*)
|
|
|
|
{ trailing lexbuf }
|
|
|
|
| eof
|
|
|
|
{ () }
|
|
|
|
| _
|
|
|
|
{ error lexbuf "garbage after s-expression" }
|
|
|
|
|
|
|
|
{
|
|
|
|
let single lexbuf =
|
|
|
|
match main Empty lexbuf with
|
|
|
|
| None -> error lexbuf "no s-expression found"
|
|
|
|
| Some sexp -> trailing lexbuf; sexp
|
|
|
|
|
|
|
|
let many lexbuf =
|
|
|
|
let rec loop acc =
|
|
|
|
match main Empty lexbuf with
|
|
|
|
| None -> List.rev acc
|
|
|
|
| Some sexp -> loop (sexp :: acc)
|
|
|
|
in
|
|
|
|
loop []
|
|
|
|
}
|