Vendored usexp and switch to it

This commit is contained in:
Jeremie Dimino 2017-12-12 10:16:17 +00:00 committed by Jeremie Dimino
parent bbb6108924
commit 3e525d8eec
26 changed files with 2677 additions and 462 deletions

View File

@ -27,15 +27,33 @@ module String = struct
sub s ~pos ~len:(String.length s - pos))
end
(* Directories with library names *)
let dirs =
[ "vendor/boot"
; "src"
[ ("vendor/boot" , None)
; ("vendor/usexp/src" , Some "Usexp")
; ("src" , None)
]
open Printf
module String_set = Set.Make(String)
module String_map = Map.Make(String)
module Make_map(Key : Map.OrderedType) = struct
include Map.Make(Key)
let of_alist_multi l =
List.fold_left (List.rev l) ~init:empty ~f:(fun acc (k, v) ->
let l =
try
find k acc
with Not_found ->
[]
in
add k (v :: l) acc)
end
module String_map = Make_map(String)
module String_option_map = Make_map(struct type t = string option let compare = compare end)
let () =
match Sys.getenv "OCAMLPARAM" with
@ -136,21 +154,30 @@ let run_ocamllex src =
at_exit (fun () -> try Sys.remove dst with _ -> ());
dst
type module_files =
{ impl : string
; intf : string option
type module_info =
{ impl : string
; intf : string option
; name : string
; libname : string option
; fqn : string (** Fully qualified name *)
}
let fqn libname mod_name =
match libname with
| None -> mod_name
| Some s -> if s = mod_name then s else s ^ "." ^ mod_name
(* Map from module names to ml/mli filenames *)
let modules =
let files_of dir =
Sys.readdir dir |> Array.to_list |> List.map ~f:(Filename.concat dir)
let files_of (dir, libname) =
Sys.readdir dir |> Array.to_list |> List.map ~f:(fun fn ->
(Filename.concat dir fn, libname))
in
let impls, intfs =
List.map dirs ~f:files_of
|> List.concat
|> List.fold_left ~init:(String_map.empty, String_map.empty)
~f:(fun ((impls, intfs) as acc) fn ->
~f:(fun ((impls, intfs) as acc) (fn, libname) ->
let base = Filename.basename fn in
match String.index base '.' with
| exception Not_found -> acc
@ -173,22 +200,31 @@ let modules =
let fn =
if ext = ".mll" then lazy (run_ocamllex fn) else lazy fn
in
(String_map.add mod_name fn impls, intfs)
let fqn = fqn libname mod_name in
(String_map.add fqn (libname, mod_name, fn) impls, intfs)
else
acc
| ".mli" ->
let mod_name = String.capitalize_ascii base in
if is_boot || not (String_map.mem mod_name intfs) then
(impls, String_map.add mod_name fn intfs)
let fqn = fqn libname mod_name in
(impls, String_map.add fqn fn intfs)
else
acc
| _ -> acc)
in
String_map.merge
(fun _ impl intf ->
(fun fqn impl intf ->
match impl with
| None -> None
| Some impl -> Some { impl = Lazy.force impl; intf })
| Some (libname, name, impl) ->
let impl = Lazy.force impl in
Some { impl
; intf
; name
; libname
; fqn
})
impls intfs
let split_words s =
@ -220,28 +256,44 @@ let read_lines fn =
close_in ic;
lines
let read_deps files =
let read_deps files_by_lib =
let out_fn = "boot-depends.txt" in
at_exit (fun () -> Sys.remove out_fn);
let n =
exec "%s -modules %s > %s"
(Filename.quote ocamldep) (String.concat ~sep:" " files) out_fn
in
if n <> 0 then exit n;
List.map (read_lines out_fn) ~f:(fun line ->
let i = String.index line ':' in
let unit =
String.sub line ~pos:0 ~len:i
|> Filename.basename
|> (fun s -> String.sub s ~pos:0 ~len:(String.index s '.'))
|> String.capitalize_ascii
List.map files_by_lib ~f:(fun (libname, files) ->
let n =
exec "%s -modules %s > %s"
(Filename.quote ocamldep) (String.concat ~sep:" " files) out_fn
in
let deps =
split_words (String.sub line ~pos:(i + 1)
~len:(String.length line - (i + 1)))
|> List.filter ~f:(fun m -> String_map.mem m modules)
in
(unit, deps))
if n <> 0 then exit n;
List.map (read_lines out_fn) ~f:(fun line ->
let i = String.index line ':' in
let unit =
String.sub line ~pos:0 ~len:i
|> Filename.basename
|> (fun s -> String.sub s ~pos:0 ~len:(String.index s '.'))
|> String.capitalize_ascii
in
let deps =
split_words (String.sub line ~pos:(i + 1)
~len:(String.length line - (i + 1)))
in
let rec resolve deps acc =
match deps with
| [] -> List.rev acc
| dep :: deps ->
let fqn = fqn libname dep in
let acc =
if String_map.mem fqn modules then
fqn :: acc
else if String_map.mem dep modules then
dep :: acc
else
acc
in
resolve deps acc
in
(fqn libname unit, resolve deps [])))
|> List.concat
let topsort deps =
let n = List.length deps in
@ -263,10 +315,12 @@ let topsort deps =
List.rev !res
let topsorted_module_names =
let files =
List.map (String_map.bindings modules) ~f:(fun (_, x) -> x.impl)
let files_by_lib =
List.map (String_map.bindings modules) ~f:(fun (_, x) -> (x.libname, x.impl))
|> String_option_map.of_alist_multi
|> String_option_map.bindings
in
topsort (read_deps files)
topsort (read_deps files_by_lib)
let count_newlines s =
let newlines = ref 0 in
@ -308,24 +362,57 @@ let generate_file_with_all_the_sources () =
pos_in_generated_file := !pos_in_generated_file + newlines;
pr "# %d %S" (!pos_in_generated_file + 1) generated_file
in
List.iter topsorted_module_names ~f:(fun m ->
let { impl; intf } =
try
String_map.find m modules
with Not_found ->
Printf.ksprintf failwith "module not found: %s" m
in
match intf with
| Some intf ->
pr "module %s : sig" m;
dump intf;
pr "end = struct";
dump impl;
pr "end"
| None ->
pr "module %s = struct" m;
dump impl;
pr "end");
let modules_by_lib =
List.map topsorted_module_names ~f:(fun m ->
let info = String_map.find m modules in
(info.libname, info))
|> String_option_map.of_alist_multi
in
let lib_order =
List.fold_left topsorted_module_names ~init:(String_set.empty, [])
~f:(fun ((seen, rev_order) as acc) m ->
match (String_map.find m modules).libname with
| None -> acc
| Some lib ->
if String_set.mem lib seen then
acc
else
(String_set.add lib seen, lib :: rev_order))
|> snd
|> List.rev_map ~f:(fun lib -> Some lib)
in
let lib_order = lib_order @ [None] in
List.iter lib_order ~f:(fun libname ->
let modules = String_option_map.find libname modules_by_lib in
(match libname with
| None -> ()
| Some s -> pr "module %s = struct" s);
List.iter modules ~f:(fun { name; intf; impl; _ } ->
if Some name = libname then
match intf with
| Some intf ->
pr "include (struct";
dump impl;
pr "end : sig";
dump intf;
pr "end)"
| None ->
dump impl;
else
match intf with
| Some intf ->
pr "module %s : sig" name;
dump intf;
pr "end = struct";
dump impl;
pr "end"
| None ->
pr "module %s = struct" name;
dump impl;
pr "end");
(match libname with
| None -> ()
| Some _ -> pr "end"));
output_string oc "let () = Main.bootstrap ()\n";
close_out oc

View File

@ -147,14 +147,7 @@ let strings p =
let read_sexp p =
contents p
>>^ fun s ->
let lb = Lexing.from_string s in
lb.lex_curr_p <-
{ pos_fname = Path.to_string p
; pos_lnum = 1
; pos_bol = 0
; pos_cnum = 0
};
Sexp_lexer.single lb
Usexp.parse_string s ~fname:(Path.to_string p) ~mode:Single
let if_file_exists p ~then_ ~else_ =
If_file_exists (p, ref (Undecided (then_, else_)))

View File

@ -585,7 +585,7 @@ module Trace = struct
Utils.Cached_digest.load ();
let trace = Hashtbl.create 1024 in
if Sys.file_exists file then begin
let sexp = Sexp_lexer.Load.single file in
let sexp = Sexp.load ~fname:file ~mode:Single in
let bindings =
let open Sexp.Of_sexp in
list (pair Path.t (fun s -> Digest.from_hex (string s))) sexp

View File

@ -406,7 +406,7 @@ let create_for_opam ?root ~switch ~name ?(merlin=false) () =
["config"; "env"; "--root"; root; "--switch"; switch; "--sexp"]
>>= fun s ->
let vars =
Sexp_lexer.single (Lexing.from_string s)
Usexp.parse_string ~fname:"<opam output>" ~mode:Single s
|> Sexp.Of_sexp.(list (pair string string))
|> Env_var_map.of_alist_multi
|> Env_var_map.mapi ~f:(fun var values ->

View File

@ -725,8 +725,9 @@ Add it to your jbuild file to remove this warning.
action_str;
let dir = Path.append ctx.build_dir dir in
let action =
Lexing.from_string action_str
|> Sexp_lexer.single
Usexp.parse_string action_str
~fname:"<internal action for mli to ml>"
~mode:Single
|> Action.Unexpanded.t
in
SC.add_rule sctx

View File

@ -5,7 +5,8 @@
;; (public_name jbuilder)
(libraries (unix
jbuilder_re
jbuilder_opam_file_format))
jbuilder_opam_file_format
usexp))
(synopsis "Internal Jbuilder library, do not use!")))
(ocamllex (sexp_lexer meta_lexer glob_lexer))
(ocamllex (meta_lexer glob_lexer))

View File

@ -144,7 +144,7 @@ end
die "@{<error>Error:@} %s failed to produce a valid jbuild file.\n\
Did you forgot to call [Jbuild_plugin.V*.send]?"
(Path.to_string file);
let sexps = Sexp_lexer.Load.many (Path.to_string generated_jbuild) in
let sexps = Sexp.load ~fname:(Path.to_string generated_jbuild) ~mode:Many in
return (dir, scope, Stanzas.parse scope sexps))
|> Future.all
end
@ -158,7 +158,7 @@ type conf =
let load ~dir ~scope =
let file = Path.relative dir "jbuild" in
match Sexp_lexer.Load.many_or_ocaml_script (Path.to_string file) with
match Sexp.load_many_or_ocaml_script (Path.to_string file) with
| Sexps sexps ->
Jbuilds.Literal (dir, scope, Stanzas.parse scope sexps)
| Ocaml_script ->

View File

@ -1,6 +1,6 @@
open Import
type t =
type t = Usexp.Loc.t =
{ start : Lexing.position
; stop : Lexing.position
}

View File

@ -1,4 +1,4 @@
type t =
type t = Usexp.Loc.t =
{ start : Lexing.position
; stop : Lexing.position
}

View File

@ -1,119 +1,7 @@
open Import
type t =
| Atom of string
| List of t list
type sexp = t
let must_escape str =
let len = String.length str in
len = 0 ||
let rec loop ix =
match str.[ix] with
| '"' | '(' | ')' | ';' | '\\' -> true
| '|' -> ix > 0 && let next = ix - 1 in str.[next] = '#' || loop next
| '#' -> ix > 0 && let next = ix - 1 in str.[next] = '|' || loop next
| '\000' .. '\032' | '\127' .. '\255' -> true
| _ -> ix > 0 && loop (ix - 1)
in
loop (len - 1)
let rec to_string = function
| Atom s -> if must_escape s then sprintf "%S" s else s
| List l -> sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
let rec pp ppf = function
| Atom s ->
if must_escape s then
Format.fprintf ppf "%S" s
else
Format.pp_print_string ppf s
| List [] ->
Format.pp_print_string ppf "()"
| List (first :: rest) ->
Format.pp_open_box ppf 1;
Format.pp_print_string ppf "(";
Format.pp_open_hvbox ppf 0;
pp ppf first;
List.iter rest ~f:(fun sexp ->
Format.pp_print_space ppf ();
pp ppf sexp);
Format.pp_close_box ppf ();
Format.pp_print_string ppf ")";
Format.pp_close_box ppf ()
let rec pp_split_strings ppf = function
| Atom s ->
if must_escape s then begin
if String.contains s '\n' then begin
match String.split s ~on:'\n' with
| [] -> Format.fprintf ppf "%S" s
| first :: rest ->
Format.fprintf ppf "@[<hv 1>\"@{<atom>%s" (String.escaped first);
List.iter rest ~f:(fun s ->
Format.fprintf ppf "@,\\n%s" (String.escaped s));
Format.fprintf ppf "@}\"@]"
end else
Format.fprintf ppf "%S" s
end else
Format.pp_print_string ppf s
| List [] ->
Format.pp_print_string ppf "()"
| List (first :: rest) ->
Format.pp_open_box ppf 1;
Format.pp_print_string ppf "(";
Format.pp_open_hvbox ppf 0;
pp_split_strings ppf first;
List.iter rest ~f:(fun sexp ->
Format.pp_print_space ppf ();
pp_split_strings ppf sexp);
Format.pp_close_box ppf ();
Format.pp_print_string ppf ")";
Format.pp_close_box ppf ()
type formatter_state =
| In_atom
| In_makefile_action
| In_makefile_stuff
let prepare_formatter ppf =
let state = ref [] in
Format.pp_set_mark_tags ppf true;
let ofuncs = Format.pp_get_formatter_out_functions ppf () in
let tfuncs = Format.pp_get_formatter_tag_functions ppf () in
Format.pp_set_formatter_tag_functions ppf
{ tfuncs with
mark_open_tag = (function
| "atom" -> state := In_atom :: !state; ""
| "makefile-action" -> state := In_makefile_action :: !state; ""
| "makefile-stuff" -> state := In_makefile_stuff :: !state; ""
| s -> tfuncs.mark_open_tag s)
; mark_close_tag = (function
| "atom" | "makefile-action" | "makefile-stuff" -> state := List.tl !state; ""
| s -> tfuncs.mark_close_tag s)
};
Format.pp_set_formatter_out_functions ppf
{ ofuncs with
out_newline = (fun () ->
match !state with
| [In_atom; In_makefile_action] ->
ofuncs.out_string "\\\n\t" 0 3
| [In_atom] ->
ofuncs.out_string "\\\n" 0 2
| [In_makefile_action] ->
ofuncs.out_string " \\\n\t" 0 4
| [In_makefile_stuff] ->
ofuncs.out_string " \\\n" 0 3
| [] ->
ofuncs.out_string "\n" 0 1
| _ -> assert false)
; out_spaces = (fun n ->
ofuncs.out_spaces
(match !state with
| In_atom :: _ -> max 0 (n - 2)
| _ -> n))
}
include (Usexp : module type of struct include Usexp end
with module Loc := Usexp.Loc)
let code_error message vars =
code_errorf "%s"
@ -122,27 +10,53 @@ let code_error message vars =
:: List.map vars ~f:(fun (name, value) ->
List [Atom name; value]))))
let buf_len = 65_536
module Ast = struct
type t =
| Atom of Loc.t * string
| List of Loc.t * t list
let load ~fname ~mode =
Io.with_file_in fname ~f:(fun ic ->
let state = Parser.create ~fname ~mode in
let buf = Bytes.create buf_len in
let rec loop stack =
match input ic buf 0 buf_len with
| 0 -> Parser.feed_eoi state stack
| n -> loop (Parser.feed_subbytes state buf ~pos:0 ~len:n stack)
in
loop Parser.Stack.empty)
let loc = function
| Atom (loc, _) -> loc
| List (loc, _) -> loc
let ocaml_script_prefix = "(* -*- tuareg -*- *)"
let ocaml_script_prefix_len = String.length ocaml_script_prefix
let rec remove_locs : t -> sexp = function
| Atom (_, s) -> Atom s
| List (_, l) -> List (List.map l ~f:remove_locs)
type sexps_or_ocaml_script =
| Sexps of Ast.t list
| Ocaml_script
let to_string t = to_string (remove_locs t)
end
let rec add_loc t ~loc : Ast.t =
match t with
| Atom s -> Atom (loc, s)
| List l -> List (loc, List.map l ~f:(add_loc ~loc))
let load_many_or_ocaml_script fname =
Io.with_file_in fname ~f:(fun ic ->
let state = Parser.create ~fname ~mode:Many in
let buf = Bytes.create buf_len in
let rec loop stack =
match input ic buf 0 buf_len with
| 0 -> Parser.feed_eoi state stack
| n -> loop (Parser.feed_subbytes state buf ~pos:0 ~len:n stack)
in
let rec loop0 stack i =
match input ic buf i (buf_len - i) with
| 0 ->
let stack = Parser.feed_subbytes state buf ~pos:0 ~len:i stack in
Sexps (Parser.feed_eoi state stack)
| n ->
let i = i + n in
if i < ocaml_script_prefix_len then
loop0 stack i
else if Bytes.sub_string buf 0 ocaml_script_prefix_len
[@warning "-6"]
= ocaml_script_prefix then
Ocaml_script
else
let stack = Parser.feed_subbytes state buf ~pos:0 ~len:i stack in
Sexps (loop stack)
in
loop0 Parser.Stack.empty 0)
module type Combinators = sig
type 'a t

View File

@ -1,37 +1,16 @@
open Import
type t =
| Atom of string
| List of t list
module Ast : sig
type sexp = t
type t =
| Atom of Loc.t * string
| List of Loc.t * t list
val loc : t -> Loc.t
val remove_locs : t -> sexp
val to_string : t -> string
end with type sexp := t
val add_loc : t -> loc:Loc.t -> Ast.t
include module type of struct include Usexp end with module Loc := Usexp.Loc
val code_error : string -> (string * t) list -> _
val to_string : t -> string
val load : fname:string -> mode:'a Parser.Mode.t -> 'a
val pp : Format.formatter -> t -> unit
type sexps_or_ocaml_script =
| Sexps of Ast.t list
| Ocaml_script
(** Same as [pp], but split long strings. The formatter must have been
prepared with [prepare_formatter]. *)
val pp_split_strings : Format.formatter -> t -> unit
(** Prepare a formatter for [pp_split_strings]. Additionaly the
formatter escape newlines when the tags "makefile-action" or
"makefile-stuff" are active. *)
val prepare_formatter : Format.formatter -> unit
val load_many_or_ocaml_script : string -> sexps_or_ocaml_script
module type Combinators = sig
type 'a t

View File

@ -1,14 +0,0 @@
val single : Lexing.lexbuf -> Sexp.Ast.t
val many : Lexing.lexbuf -> Sexp.Ast.t list
type sexps_or_ocaml_script =
| Sexps of Sexp.Ast.t list
| Ocaml_script
val many_or_ocaml_script : Lexing.lexbuf -> sexps_or_ocaml_script
module Load : sig
val single : string -> Sexp.Ast.t
val many : string -> Sexp.Ast.t list
val many_or_ocaml_script : string -> sexps_or_ocaml_script
end

View File

@ -1,216 +0,0 @@
{
type sexps_or_ocaml_script =
| Sexps of Sexp.Ast.t list
| Ocaml_script
type stack =
| Empty
| Open of Lexing.position * stack
| Sexp of Sexp.Ast.t * stack
let error = Loc.fail_lex
let make_list =
let rec loop lexbuf acc = function
| Empty ->
error lexbuf "right parenthesis without matching left parenthesis"
| Open (start, stack) ->
Sexp (List ({ start; stop = Lexing.lexeme_end_p lexbuf }, acc),
stack)
| Sexp (sexp, stack) -> loop lexbuf (sexp :: acc) stack
in
fun lexbuf stack -> loop lexbuf [] stack
let new_sexp loop stack lexbuf =
match stack with
| Sexp (sexp, Empty) -> Some sexp
| _ -> loop stack lexbuf
let atom_loc lexbuf : Loc.t =
{ start = Lexing.lexeme_start_p lexbuf
; stop = Lexing.lexeme_end_p lexbuf
}
let char_for_backslash = function
| 'n' -> '\010'
| 'r' -> '\013'
| 'b' -> '\008'
| 't' -> '\009'
| c -> c
let dec_code c1 c2 c3 =
100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
let hex_code c1 c2 =
let d1 = Char.code c1 in
let val1 =
if d1 >= 97 then d1 - 87
else if d1 >= 65 then d1 - 55
else d1 - 48 in
let d2 = Char.code c2 in
let val2 =
if d2 >= 97 then d2 - 87
else if d2 >= 65 then d2 - 55
else d2 - 48 in
val1 * 16 + val2
let escaped_buf = Buffer.create 256
}
let lf = '\010'
let lf_cr = ['\010' '\013']
let dos_newline = "\013\010"
let blank = [' ' '\009' '\012']
let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
let digit = ['0'-'9']
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
rule main stack = parse
| lf | dos_newline
{ Lexing.new_line lexbuf; main stack lexbuf }
| blank+
{ main stack lexbuf }
| (';' (_ # lf_cr)*)
{ main stack lexbuf }
| '('
{ main (Open (Lexing.lexeme_start_p lexbuf, stack)) lexbuf }
| ')'
{ new_sexp main (make_list lexbuf stack) lexbuf }
| '"'
{ Buffer.clear escaped_buf;
scan_string escaped_buf (Lexing.lexeme_start_p lexbuf) stack lexbuf
}
| "#|"
{ block_comment 0 stack lexbuf }
| unquoted* as s
{ new_sexp main (Sexp (Atom (atom_loc lexbuf, s), stack)) lexbuf }
| eof
{ match stack with
| Empty -> None
| _ -> error lexbuf "unterminated s-expression" }
| _
{ error lexbuf "syntax error" }
and block_comment depth stack = parse
| "#|"
{ block_comment (depth + 1) stack lexbuf }
| "|#"
{ if depth = 0 then
main stack lexbuf
else
block_comment (depth - 1) stack lexbuf }
| _
{ block_comment depth stack lexbuf }
| eof
{ error lexbuf "unterminated block comment" }
and scan_string buf start stack = parse
| '"'
{ new_sexp main
(Sexp (Atom ({ start; stop = Lexing.lexeme_end_p lexbuf },
Buffer.contents buf),
stack))
lexbuf
}
| '\\' lf
{
Lexing.new_line lexbuf;
scan_string_after_escaped_newline buf start stack lexbuf
}
| '\\' dos_newline
{
Lexing.new_line lexbuf;
scan_string_after_escaped_newline buf start stack lexbuf
}
| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
{
Buffer.add_char buf (char_for_backslash c);
scan_string buf start stack lexbuf
}
| '\\' (digit as c1) (digit as c2) (digit as c3)
{
let v = dec_code c1 c2 c3 in
if v > 255 then error lexbuf "illegal escape";
Buffer.add_char buf (Char.chr v);
scan_string buf start stack lexbuf
}
| '\\' 'x' (hexdigit as c1) (hexdigit as c2)
{
let v = hex_code c1 c2 in
Buffer.add_char buf (Char.chr v);
scan_string buf start stack lexbuf
}
| '\\' (_ as c)
{
Buffer.add_char buf '\\';
Buffer.add_char buf c;
scan_string buf start stack lexbuf
}
| lf
{
Lexing.new_line lexbuf;
Buffer.add_char buf '\n';
scan_string buf start stack lexbuf
}
| ([^ '\\' '"'] # lf)+ as s
{
Buffer.add_string buf s;
scan_string buf start stack lexbuf
}
| eof
{
error lexbuf "unterminated string"
}
and scan_string_after_escaped_newline buf start stack = parse
| [' ' '\t']*
{ scan_string buf start stack lexbuf }
| ""
{ scan_string buf start stack lexbuf }
and trailing = parse
| lf | dos_newline
{ Lexing.new_line lexbuf; trailing lexbuf }
| blank+
{ trailing lexbuf }
| (';' (_ # lf_cr)*)
{ trailing lexbuf }
| eof
{ () }
| _
{ error lexbuf "garbage after s-expression" }
and is_ocaml_script = parse
| "(* -*- tuareg -*- *)" { true }
| "" { false }
{
let single lexbuf =
match main Empty lexbuf with
| None -> error lexbuf "no s-expression found"
| Some sexp -> trailing lexbuf; sexp
let many lexbuf =
let rec loop acc =
match main Empty lexbuf with
| None -> List.rev acc
| Some sexp -> loop (sexp :: acc)
in
loop []
let many_or_ocaml_script lexbuf =
match is_ocaml_script lexbuf with
| true -> Ocaml_script
| false -> Sexps (many lexbuf)
module Load = struct
let single fn =
Io.with_lexbuf_from_file fn ~f:single
let many fn =
Io.with_lexbuf_from_file fn ~f:many
let many_or_ocaml_script fn =
Io.with_lexbuf_from_file fn ~f:many_or_ocaml_script
end
}

View File

@ -191,7 +191,7 @@ module Cached_digest = struct
let load () =
if Sys.file_exists db_file then begin
let sexp = Sexp_lexer.Load.single db_file in
let sexp = Sexp.load ~fname:db_file ~mode:Single in
let bindings =
let open Sexp.Of_sexp in
list

View File

@ -55,7 +55,7 @@ struct
let to_string path x = To_sexp.t path x |> Sexp.to_string
let load path =
Of_sexp.t path (Sexp_lexer.Load.single (Path.to_string path))
Of_sexp.t path (Sexp.load ~fname:(Path.to_string path) ~mode:Single)
end

View File

@ -83,4 +83,4 @@ let t sexps =
; contexts = List.rev contexts
}
let load fn = t (Sexp_lexer.Load.many fn)
let load fname = t (Sexp.load ~fname ~mode:Many)

20
vendor/update-usexp.sh vendored Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
set -e -o pipefail
TMP="$(mktemp -d)"
trap "rm -rf $TMP" EXIT
rm -rf usexp
mkdir -p usexp/src
(cd $TMP && git clone https://github.com/janestreet/usexp.git)
SRC=$TMP/usexp
cp -v $SRC/LICENSE.md usexp
cp -v $SRC/src/*.{ml,mli} usexp/src
git checkout usexp/src/jbuild
git add -A .

202
vendor/usexp/LICENSE.md vendored Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

3
vendor/usexp/src/jbuild vendored Normal file
View File

@ -0,0 +1,3 @@
(jbuild_version 1)
(library ((name usexp)))

View File

@ -0,0 +1,380 @@
type stack =
| Empty
| Open of Lexing.position * stack
| Sexp of Sexp_ast.t * stack
let empty_stack = Empty
type 'a mode =
| Single : Sexp_ast.t mode
| Many : Sexp_ast.t list mode
type 'a state =
{ mutable automaton_state : int
; mutable depth : int
; (* Number of opened #| when parsing a block comment *)
mutable block_comment_depth : int
; (* Number of full sexp to ignore. This correspond to the number of consecutive #;.
We don't have to track nested #; because they do not change the result. *)
mutable ignoring : int
; (* Only meaningful when [ignoring] > 0. Number of opened parentheses of the
outermost sexp comment. *)
mutable ignoring_depth : int
; (* When parsing an escape sequence of the form "\\NNN" or "\\XX", this accumulates
the computed number *)
mutable escaped_value : int
; (* Buffer for accumulating atoms *)
atom_buffer : Buffer.t
; fname : string
; mutable full_sexps : int
; mutable offset : int (* global offset *)
; mutable line_number : int
; mutable bol_offset : int (* offset of beginning of line *)
; (* Starting positions of the current token *)
mutable token_start_pos : Lexing.position
; mode : 'a mode
}
(* these magic numbers are checked in gen_parser_automaton.ml:
let () = assert (initial = 0)
let () = assert (to_int Error = 1) *)
let initial_state = 0
let error_state = 1
let new_state ~fname mode =
{ depth = 0
; automaton_state = initial_state
; block_comment_depth = 0
; ignoring = 0
; ignoring_depth = 0
; escaped_value = 0
; atom_buffer = Buffer.create 128
; mode = mode
; full_sexps = 0
; offset = 0
; line_number = 1
; bol_offset = 0
; fname = fname
; token_start_pos = { pos_fname = fname; pos_cnum = 0; pos_lnum = 1; pos_bol = 0 }
}
let mode t = t.mode
let offset t = t.offset
let line t = t.line_number
let column t = t.offset - t.bol_offset
let position t =
{ Lexing.
pos_fname = t.fname
; pos_cnum = t.offset
; pos_lnum = t.line_number
; pos_bol = t.bol_offset
}
let has_unclosed_paren state = state.depth > 0
let set_error_state state = state.automaton_state <- error_state
let sexp_of_stack : stack -> Sexp_ast.t = function
| Sexp (sexp, Empty) -> sexp
| _ -> failwith "Parser_automaton.sexp_of_stack"
let sexps_of_stack =
let rec loop acc : stack -> Sexp_ast.t list = function
| Empty -> acc
| Open _ -> failwith "Parser_automaton.sexps_of_stack"
| Sexp (sexp, stack) -> loop (sexp :: acc) stack
in
fun stack -> loop [] stack
let automaton_state state = state.automaton_state
module Error = struct
type t =
{ position : Lexing.position
; message : string
}
exception Parse_error of t
module Reason = struct
(* To be kept in sync with the Error module in gen/gen_parser_automaton.ml *)
type t =
| Unexpected_char_parsing_hex_escape
| Unexpected_char_parsing_dec_escape
| Unterminated_quoted_string
| Unterminated_block_comment
| Escape_sequence_out_of_range
| Unclosed_paren
| Too_many_sexps
| Closed_paren_without_opened
| Comment_token_in_unquoted_atom
| Sexp_comment_without_sexp
| Unexpected_character_after_cr
| No_sexp_found_in_input
| Automaton_in_error_state
end
let raise state ~at_eof (reason : Reason.t) =
set_error_state state;
let message =
(* These messages where choosen such that we can build the various Sexplib parsing
functions on top of Parsexp and keep the same exceptions.
At the time of writing this, a simple layer on top of parsexp to implement the
sexplib API is passing all the sexplib tests.
Note that parsexp matches the semantic of Sexp.parse which is slightly
different from the ocamllex/ocamlyacc based parser of Sexplib. The latter one
is less tested and assumed to be less used. *)
match reason with
| Unexpected_char_parsing_hex_escape ->
"unterminated hexadecimal escape sequence"
| Unexpected_char_parsing_dec_escape ->
"unterminated decimal escape sequence"
| Unterminated_quoted_string ->
"unterminated quoted string"
| Unterminated_block_comment ->
"unterminated block comment"
| Escape_sequence_out_of_range ->
"escape sequence in quoted string out of range"
| Unclosed_paren ->
"unclosed parentheses at end of input"
| Too_many_sexps ->
"s-expression followed by data"
| Closed_paren_without_opened ->
"unexpected character: ')'"
| Comment_token_in_unquoted_atom ->
if Buffer.contents state.atom_buffer = "|" then
"illegal end of comment"
else
"comment tokens in unquoted atom"
| Sexp_comment_without_sexp ->
"unterminated sexp comment"
| Unexpected_character_after_cr ->
if at_eof then
"unexpected end of input after carriage return"
else
"unexpected character after carriage return"
| No_sexp_found_in_input ->
"no s-expression found in input"
| Automaton_in_error_state ->
failwith "Parsexp.Parser_automaton: parser is dead"
in
let position = position state in
raise (Parse_error { position; message })
let position t = t.position
let message t = t.message
end
exception Parse_error = Error.Parse_error
type 'a action = 'a state -> char -> stack -> stack
type 'a epsilon_action = 'a state -> stack -> stack
let current_pos ?(delta=0) state : Lexing.position =
let offset = state.offset + delta in
{ pos_fname = state.fname
; pos_lnum = state.line_number
; pos_cnum = offset
; pos_bol = state.bol_offset
}
let set_automaton_state state x = state.automaton_state <- x
let advance state = state.offset <- state.offset + 1
let advance_eol state =
let newline_offset = state.offset in
state.offset <- newline_offset + 1;
state.bol_offset <- state.offset;
state.line_number <- state.line_number + 1
let block_comment_depth state = state.block_comment_depth
let add_token_char _state _char stack = stack
let add_atom_char state c stack =
Buffer.add_char state.atom_buffer c;
stack
let add_quoted_atom_char state c stack =
Buffer.add_char state.atom_buffer c;
add_token_char state c stack
let check_new_sexp_allowed : type a. a state -> unit = fun state ->
let is_single = match state.mode with Single -> true | _ -> false in
if is_single && state.full_sexps > 0 && state.ignoring = 0 then
Error.raise state ~at_eof:false Too_many_sexps
let add_first_char state char stack =
check_new_sexp_allowed state;
Buffer.add_char state.atom_buffer char;
(* For non-quoted atoms, we save both positions at the end. We can always determine the
start position from the end position and the atom length for non-quoted atoms.
Doing it this way allows us to detect single characater atoms for which we need to
save the position twice. *)
stack
let eps_add_first_char_hash state stack =
check_new_sexp_allowed state;
Buffer.add_char state.atom_buffer '#';
stack
let start_quoted_string state _char stack =
check_new_sexp_allowed state;
state.token_start_pos <- current_pos state;
stack
let add_escaped state c stack =
let c' =
match c with
| 'n' -> '\n'
| 'r' -> '\r'
| 'b' -> '\b'
| 't' -> '\t'
| '\\' | '\'' | '"' -> c
| _ -> Buffer.add_char state.atom_buffer '\\'; c
in
Buffer.add_char state.atom_buffer c';
add_token_char state c stack
let eps_add_escaped_cr state stack =
Buffer.add_char state.atom_buffer '\r';
stack
let dec_val c = Char.code c - Char.code '0'
let hex_val c =
match c with
| '0'..'9' -> Char.code c - Char.code '0'
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
| _ -> Char.code c - Char.code 'A' + 10
let add_dec_escape_char state c stack =
state.escaped_value <- state.escaped_value * 10 + dec_val c;
add_token_char state c stack
let add_last_dec_escape_char state c stack =
let value = state.escaped_value * 10 + dec_val c in
state.escaped_value <- 0;
if value > 255 then Error.raise state ~at_eof:false Escape_sequence_out_of_range;
Buffer.add_char state.atom_buffer (Char.unsafe_chr value);
add_token_char state c stack
let comment_add_last_dec_escape_char state c stack =
let value = state.escaped_value * 10 + dec_val c in
state.escaped_value <- 0;
if value > 255 then Error.raise state ~at_eof:false Escape_sequence_out_of_range;
add_token_char state c stack
let add_hex_escape_char state c stack =
state.escaped_value <- (state.escaped_value lsl 4) lor hex_val c;
add_token_char state c stack
let add_last_hex_escape_char state c stack =
let value = (state.escaped_value lsl 4) lor hex_val c in
state.escaped_value <- 0;
Buffer.add_char state.atom_buffer (Char.unsafe_chr value);
add_token_char state c stack
let opening state _char stack =
check_new_sexp_allowed state;
state.depth <- state.depth + 1;
if state.ignoring = 0 then
Open (current_pos state, stack)
else
stack
let sexp_or_comment_added ~is_comment state stack ~delta:_ =
if state.ignoring lor state.depth <> 0 then
stack
else begin
if not is_comment then state.full_sexps <- state.full_sexps + 1;
stack
end
let sexp_added state stack ~delta =
let stack = sexp_or_comment_added ~is_comment:false state stack ~delta in
if state.ignoring <> 0 && state.ignoring_depth = state.depth then begin
state.ignoring <- state.ignoring - 1;
stack
end else stack
let rec make_list stop acc : stack -> stack = function
| Empty -> assert false
| Open (start, stack) -> Sexp (List ({ start; stop }, acc), stack)
| Sexp (sexp, stack) -> make_list stop (sexp :: acc) stack
let closing state _char stack =
if state.depth > 0 then begin
let stack =
if state.ignoring = 0 then
make_list (current_pos state ~delta:1) [] stack
else
stack
in
if state.ignoring <> 0 && state.ignoring_depth = state.depth then
Error.raise state ~at_eof:false Sexp_comment_without_sexp;
state.depth <- state.depth - 1;
sexp_added state stack ~delta:1
end else
Error.raise state ~at_eof:false Closed_paren_without_opened
let make_loc ?(delta=0) state : Sexp_ast.Loc.t =
{ start = state.token_start_pos
; stop = current_pos state ~delta
}
let eps_push_atom state stack =
let str = Buffer.contents state.atom_buffer in
Buffer.clear state.atom_buffer;
let stack =
if state.ignoring = 0 then
Sexp (Atom (make_loc state, str), stack)
else
stack
in
sexp_added state stack ~delta:0
let push_quoted_atom state _char stack =
let str = Buffer.contents state.atom_buffer in
Buffer.clear state.atom_buffer;
let stack =
if state.ignoring = 0 then
Sexp (Atom (make_loc state ~delta:1, str), stack)
else
stack
in
sexp_added state stack ~delta:1
let start_sexp_comment state _char stack =
if state.ignoring = 0 || state.ignoring_depth = state.depth then begin
state.ignoring <- state.ignoring + 1;
state.ignoring_depth <- state.depth
end;
stack
let start_block_comment state _char stack =
state.block_comment_depth <- state.block_comment_depth + 1;
stack
let end_block_comment state _char stack =
state.block_comment_depth <- state.block_comment_depth - 1;
stack
let start_line_comment _state _char stack = stack
let end_line_comment _state stack = stack
let eps_eoi_check : type a. a state -> stack -> stack = fun state stack ->
if state.depth > 0 then Error.raise state ~at_eof:true Unclosed_paren;
if state.ignoring > 0 then Error.raise state ~at_eof:true Sexp_comment_without_sexp;
if state.full_sexps = 0 then (
match state.mode with
| Many -> ()
| Single ->
Error.raise state ~at_eof:true No_sexp_found_in_input
);
stack

View File

@ -0,0 +1,132 @@
(** Internal bits used by the generated automaton, not part of the public API *)
(** Internal state of the automaton *)
type 'a state
type 'a mode =
| Single : Sexp_ast.t mode
| Many : Sexp_ast.t list mode
type stack
val empty_stack : stack
val new_state
: fname:string
-> 'a mode
-> 'a state
val mode : 'a state -> 'a mode
(** Number of characters fed to the parser *)
val offset : _ state -> int
(** Position in the text *)
val line : _ state -> int
val column : _ state -> int
(** Whether there are some unclosed parentheses *)
val has_unclosed_paren : _ state -> bool
val set_error_state : _ state -> unit
val sexp_of_stack : stack -> Sexp_ast.t
val sexps_of_stack : stack -> Sexp_ast.t list
val automaton_state : _ state -> int
module Error : sig
type t
val position : t -> Lexing.position
val message : t -> string
module Reason : sig
type t =
| Unexpected_char_parsing_hex_escape
| Unexpected_char_parsing_dec_escape
| Unterminated_quoted_string
| Unterminated_block_comment
| Escape_sequence_out_of_range
| Unclosed_paren
| Too_many_sexps
| Closed_paren_without_opened
| Comment_token_in_unquoted_atom
| Sexp_comment_without_sexp
| Unexpected_character_after_cr
| No_sexp_found_in_input
| Automaton_in_error_state
end
val raise : _ state -> at_eof:bool -> Reason.t -> _
end
exception Parse_error of Error.t
val set_automaton_state : _ state -> int -> unit
(** Advance the position counters. [advance_eol] is for when we read a newline
character. *)
val advance : _ state -> unit
val advance_eol : _ state -> unit
(** Number of opened #| *)
val block_comment_depth : _ state -> int
type 'a action = 'a state -> char -> stack -> stack
type 'a epsilon_action = 'a state -> stack -> stack
(** Add a character to the atom buffer. [add_quoted_atom_char] does the same for quoted
atoms *)
val add_atom_char : _ action
val add_quoted_atom_char : _ action
(** Add a character that just follows a '\\' and the '\\' itself if necessary. *)
val add_escaped : _ action
(** [escaped_value <- escaped_value * 10 + (char - '0')]
These functions make the assumption that [char] is between '0' and '9'.
[add_dec_escape_char] also assumes the result doesn't overflow. The automaton
definition must make sure this is the case.
[add_last_dec_escape_char] also adds the resulting character to the atom buffer.
*)
val add_dec_escape_char : _ action
val add_last_dec_escape_char : _ action
(** Same but for quoted strings inside comments. Useful because it can fail. *)
val comment_add_last_dec_escape_char : _ action
(** Same as [add_dec_escape_char] but for hexadicemal escape sequences *)
val add_hex_escape_char : _ action
val add_last_hex_escape_char : _ action
(** Ignore one more full sexp to come *)
val start_sexp_comment : _ action
(** Add the first char of an unquoted atom. *)
val add_first_char : _ action
val start_quoted_string : _ action
(** Takes note of a control character in quoted atoms or the uninterpreted characters of
comments, for which there is no corresponding [add_*] call (a backslash and the x in
"\xff" or any character in a line comment). This does not get called for the opening
([start_quoted_string]) or closing ([push_quoted_atom]) quotes themselves.
*)
val add_token_char : _ action
val opening : _ action
val closing : _ action
val push_quoted_atom : _ action
val start_block_comment : _ action
val end_block_comment : _ action
val start_line_comment : _ action
val end_line_comment : _ epsilon_action
val eps_push_atom : _ epsilon_action
val eps_add_first_char_hash : _ epsilon_action
val eps_eoi_check : _ epsilon_action
val eps_add_escaped_cr : _ epsilon_action

10
vendor/usexp/src/sexp_ast.ml vendored Normal file
View File

@ -0,0 +1,10 @@
module Loc = struct
type t =
{ start : Lexing.position
; stop : Lexing.position
}
end
type t =
| Atom of Loc.t * string
| List of Loc.t * t list

1319
vendor/usexp/src/table.ml vendored Normal file

File diff suppressed because it is too large Load Diff

2
vendor/usexp/src/table.mli vendored Normal file
View File

@ -0,0 +1,2 @@
val transitions : Obj.t Parser_automaton_internal.action array
val transitions_eoi : Obj.t Parser_automaton_internal.epsilon_action array

290
vendor/usexp/src/usexp.ml vendored Normal file
View File

@ -0,0 +1,290 @@
open StdLabels
module A = Parser_automaton_internal
module Atom = struct
type t = string
let escaped_length s =
let n = ref 0 in
for i = 0 to String.length s - 1 do
n := !n +
(match String.unsafe_get s i with
| '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
| ' ' .. '~' -> 1
| _ -> 4)
done;
!n
let must_escape s = escaped_length s > String.length s
let escaped_internal s ~with_double_quotes =
let n = escaped_length s in
if n = String.length s then s else begin
let s' = Bytes.create (n + if with_double_quotes then 2 else 0) in
let n = ref 0 in
if with_double_quotes then begin
Bytes.unsafe_set s' 0 '"';
n := 1
end;
for i = 0 to String.length s - 1 do
begin match String.unsafe_get s i with
| ('\"' | '\\') as c ->
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n c
| '\n' ->
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'n'
| '\t' ->
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 't'
| '\r' ->
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'r'
| '\b' ->
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'b'
| (' ' .. '~') as c -> Bytes.unsafe_set s' !n c
| c ->
let a = Char.code c in
Bytes.unsafe_set s' !n '\\';
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + a / 100));
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a / 10) mod 10));
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + a mod 10));
end;
incr n
done;
if with_double_quotes then Bytes.unsafe_set s' !n '"';
Bytes.unsafe_to_string s'
end
let escaped s = escaped_internal s ~with_double_quotes:false
let serialize s = escaped_internal s ~with_double_quotes:true
end
type t =
| Atom of string
| List of t list
type sexp = t
let rec to_string = function
| Atom s -> Atom.serialize s
| List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
let rec pp ppf = function
| Atom s ->
Format.pp_print_string ppf (Atom.serialize s)
| List [] ->
Format.pp_print_string ppf "()"
| List (first :: rest) ->
Format.pp_open_box ppf 1;
Format.pp_print_string ppf "(";
Format.pp_open_hvbox ppf 0;
pp ppf first;
List.iter rest ~f:(fun sexp ->
Format.pp_print_space ppf ();
pp ppf sexp);
Format.pp_close_box ppf ();
Format.pp_print_string ppf ")";
Format.pp_close_box ppf ()
let split_string s ~on =
let rec loop i j =
if j = String.length s then
[String.sub s ~pos:i ~len:(j - i)]
else if s.[j] = on then
String.sub s ~pos:i ~len:(j - i) :: loop (j + 1) (j + 1)
else
loop i (j + 1)
in
loop 0 0
let rec pp_split_strings ppf = function
| Atom s ->
if Atom.must_escape s then begin
if String.contains s '\n' then begin
match split_string s ~on:'\n' with
| [] -> Format.pp_print_string ppf (Atom.serialize s)
| first :: rest ->
Format.fprintf ppf "@[<hv 1>\"@{<atom>%s" (String.escaped first);
List.iter rest ~f:(fun s ->
Format.fprintf ppf "@,\\n%s" (String.escaped s));
Format.fprintf ppf "@}\"@]"
end else
Format.fprintf ppf "%S" s
end else
Format.pp_print_string ppf s
| List [] ->
Format.pp_print_string ppf "()"
| List (first :: rest) ->
Format.pp_open_box ppf 1;
Format.pp_print_string ppf "(";
Format.pp_open_hvbox ppf 0;
pp_split_strings ppf first;
List.iter rest ~f:(fun sexp ->
Format.pp_print_space ppf ();
pp_split_strings ppf sexp);
Format.pp_close_box ppf ();
Format.pp_print_string ppf ")";
Format.pp_close_box ppf ()
type formatter_state =
| In_atom
| In_makefile_action
| In_makefile_stuff
let prepare_formatter ppf =
let state = ref [] in
Format.pp_set_mark_tags ppf true;
let ofuncs = Format.pp_get_formatter_out_functions ppf () in
let tfuncs = Format.pp_get_formatter_tag_functions ppf () in
Format.pp_set_formatter_tag_functions ppf
{ tfuncs with
mark_open_tag = (function
| "atom" -> state := In_atom :: !state; ""
| "makefile-action" -> state := In_makefile_action :: !state; ""
| "makefile-stuff" -> state := In_makefile_stuff :: !state; ""
| s -> tfuncs.mark_open_tag s)
; mark_close_tag = (function
| "atom" | "makefile-action" | "makefile-stuff" -> state := List.tl !state; ""
| s -> tfuncs.mark_close_tag s)
};
Format.pp_set_formatter_out_functions ppf
{ ofuncs with
out_newline = (fun () ->
match !state with
| [In_atom; In_makefile_action] ->
ofuncs.out_string "\\\n\t" 0 3
| [In_atom] ->
ofuncs.out_string "\\\n" 0 2
| [In_makefile_action] ->
ofuncs.out_string " \\\n\t" 0 4
| [In_makefile_stuff] ->
ofuncs.out_string " \\\n" 0 3
| [] ->
ofuncs.out_string "\n" 0 1
| _ -> assert false)
; out_spaces = (fun n ->
ofuncs.out_spaces
(match !state with
| In_atom :: _ -> max 0 (n - 2)
| _ -> n))
}
module Loc = Sexp_ast.Loc
module Ast = struct
type t = Sexp_ast.t =
| Atom of Loc.t * string
| List of Loc.t * t list
let loc (Atom (loc, _) | List (loc, _)) = loc
let rec remove_locs : t -> sexp = function
| Atom (_, s) -> Atom s
| List (_, l) -> List (List.map l ~f:remove_locs)
module Token = struct
type t =
| Atom of Loc.t * string
| Lparen of Loc.t
| Rparen of Loc.t
end
let tokenize =
let rec loop acc t =
match t with
| Atom (loc, s) -> Token.Atom (loc, s) :: acc
| List (loc, l) ->
let shift (pos : Lexing.position) delta =
{ pos with pos_cnum = pos.pos_cnum + delta }
in
let l_loc = { loc with stop = shift loc.start 1 } in
let r_loc = { loc with start = shift loc.stop (-1) } in
let acc = Token.Lparen l_loc :: acc in
let acc = List.fold_left l ~init:acc ~f:loop in
let acc = Token.Rparen r_loc :: acc in
acc
in
fun t -> loop [] t |> List.rev
end
let rec add_loc t ~loc : Ast.t =
match t with
| Atom s -> Atom (loc, s)
| List l -> List (loc, List.map l ~f:(add_loc ~loc))
module Parser = struct
module Error = A.Error
exception Error = A.Parse_error
module Mode = struct
type 'a t = 'a A.mode =
| Single : Ast.t t
| Many : Ast.t list t
end
module Stack = struct
type t = A.stack
let empty = A.empty_stack
end
type 'a t = 'a A.state
let create ~fname ~mode = A.new_state ~fname mode
let feed : type a. a A.action = fun state char stack ->
let idx = (A.automaton_state state lsl 8) lor (Char.code char) in
(* We need an Obj.magic as the type of the array can't be generalized.
This problem will go away when we get immutable arrays. *)
(Obj.magic (Table.transitions.(idx) : Obj.t A.action) : a A.action) state char stack
[@@inline always]
let feed_eoi : type a. a t -> Stack.t -> a = fun state stack ->
let stack =
(Obj.magic (Table.transitions_eoi.(A.automaton_state state)
: Obj.t A.epsilon_action)
: a A.epsilon_action) state stack
in
A.set_error_state state;
match A.mode state with
| Mode.Single -> A.sexp_of_stack stack
| Mode.Many -> A.sexps_of_stack stack
let rec feed_substring_unsafe str state stack i stop =
if i < stop then
let c = String.unsafe_get str i in
let stack = feed state c stack in
feed_substring_unsafe str state stack (i + 1) stop
else
stack
let rec feed_subbytes_unsafe str state stack i stop =
if i < stop then
let c = Bytes.unsafe_get str i in
let stack = feed state c stack in
feed_subbytes_unsafe str state stack (i + 1) stop
else
stack
let feed_substring state str ~pos ~len stack =
let str_len = String.length str in
if pos < 0 || len < 0 || pos > str_len - len then
invalid_arg "Jbuilder_sexp.feed_substring";
feed_substring_unsafe str state stack pos (pos + len)
let feed_subbytes state str ~pos ~len stack =
let str_len = Bytes.length str in
if pos < 0 || len < 0 || pos > str_len - len then
invalid_arg "Jbuilder_sexp.feed_subbytes";
feed_subbytes_unsafe str state stack pos (pos + len)
let feed_string state str stack =
feed_substring_unsafe str state stack 0 (String.length str)
let feed_bytes state str stack =
feed_subbytes_unsafe str state stack 0 (Bytes.length str)
end
let parse_string ~fname ~mode str =
let p = Parser.create ~fname ~mode in
let stack = Parser.feed_string p str Parser.Stack.empty in
Parser.feed_eoi p stack

112
vendor/usexp/src/usexp.mli vendored Normal file
View File

@ -0,0 +1,112 @@
(** Parsing of s-expressions *)
module Atom : sig
type t = string
(** Whether the atom must be escaped when serialized *)
val must_escape : t -> bool
(** Escape all special characters in the atom *)
val escaped : t -> string
(** [serialize t] is the serialized representation of [t], so either
[t] either [escaped t] surrounded by double quotes. *)
val serialize : t -> string
end
module Loc : sig
type t =
{ start : Lexing.position
; stop : Lexing.position
}
end
(** The S-expression type *)
type t =
| Atom of string
| List of t list
(** Serialize a S-expression *)
val to_string : t -> string
(** Serialize a S-expression using indentation to improve readability *)
val pp : Format.formatter -> t -> unit
(** Same as [pp], but split long strings. The formatter must have been
prepared with [prepare_formatter]. *)
val pp_split_strings : Format.formatter -> t -> unit
(** Prepare a formatter for [pp_split_strings]. Additionaly the
formatter escape newlines when the tags "makefile-action" or
"makefile-stuff" are active. *)
val prepare_formatter : Format.formatter -> unit
(** Abstract syntax tree *)
module Ast : sig
type sexp = t
type t =
| Atom of Loc.t * Atom.t
| List of Loc.t * t list
val loc : t -> Loc.t
val remove_locs : t -> sexp
module Token : sig
type t =
| Atom of Loc.t * string
| Lparen of Loc.t
| Rparen of Loc.t
end
val tokenize : t -> Token.t list
end with type sexp := t
val add_loc : t -> loc:Loc.t -> Ast.t
module Parser : sig
module Error : sig
type t
val position : t -> Lexing.position
val message : t -> string
end
(** Exception raised in case of a parsing error *)
exception Error of Error.t
module Mode : sig
type sexp = t
type 'a t =
| Single : Ast.t t
| Many : Ast.t list t
end with type sexp := t
module Stack : sig
(** Parser stack. The stack is not in [state] for optimization purposes. *)
type t
val empty : t
end
type 'a t
(** Create a new parser state. [fname] is the filename the input is from. *)
val create : fname:string -> mode:'a Mode.t -> 'a t
(** Feed one character to the parser. In case of error, it raises [Parse_error] *)
val feed : _ t -> char -> Stack.t -> Stack.t
(** Instruct the parser that the end of input was reached. In case of error, it raises
[Parse_error] *)
val feed_eoi : 'a t -> Stack.t -> 'a
(** {3 Convenience functions} *)
val feed_string : _ t -> string -> Stack.t -> Stack.t
val feed_substring : _ t -> string -> pos:int -> len:int -> Stack.t -> Stack.t
val feed_bytes : _ t -> bytes -> Stack.t -> Stack.t
val feed_subbytes : _ t -> bytes -> pos:int -> len:int -> Stack.t -> Stack.t
end
val parse_string : fname:string -> mode:'a Parser.Mode.t -> string -> 'a