Vendored usexp and switch to it
This commit is contained in:
parent
bbb6108924
commit
3e525d8eec
137
bootstrap.ml
137
bootstrap.ml
|
@ -27,15 +27,33 @@ module String = struct
|
|||
sub s ~pos ~len:(String.length s - pos))
|
||||
end
|
||||
|
||||
(* Directories with library names *)
|
||||
let dirs =
|
||||
[ "vendor/boot"
|
||||
; "src"
|
||||
[ ("vendor/boot" , None)
|
||||
; ("vendor/usexp/src" , Some "Usexp")
|
||||
; ("src" , None)
|
||||
]
|
||||
|
||||
open Printf
|
||||
|
||||
module String_set = Set.Make(String)
|
||||
module String_map = Map.Make(String)
|
||||
|
||||
module Make_map(Key : Map.OrderedType) = struct
|
||||
include Map.Make(Key)
|
||||
|
||||
let of_alist_multi l =
|
||||
List.fold_left (List.rev l) ~init:empty ~f:(fun acc (k, v) ->
|
||||
let l =
|
||||
try
|
||||
find k acc
|
||||
with Not_found ->
|
||||
[]
|
||||
in
|
||||
add k (v :: l) acc)
|
||||
end
|
||||
|
||||
module String_map = Make_map(String)
|
||||
module String_option_map = Make_map(struct type t = string option let compare = compare end)
|
||||
|
||||
let () =
|
||||
match Sys.getenv "OCAMLPARAM" with
|
||||
|
@ -136,21 +154,30 @@ let run_ocamllex src =
|
|||
at_exit (fun () -> try Sys.remove dst with _ -> ());
|
||||
dst
|
||||
|
||||
type module_files =
|
||||
type module_info =
|
||||
{ impl : string
|
||||
; intf : string option
|
||||
; name : string
|
||||
; libname : string option
|
||||
; fqn : string (** Fully qualified name *)
|
||||
}
|
||||
|
||||
let fqn libname mod_name =
|
||||
match libname with
|
||||
| None -> mod_name
|
||||
| Some s -> if s = mod_name then s else s ^ "." ^ mod_name
|
||||
|
||||
(* Map from module names to ml/mli filenames *)
|
||||
let modules =
|
||||
let files_of dir =
|
||||
Sys.readdir dir |> Array.to_list |> List.map ~f:(Filename.concat dir)
|
||||
let files_of (dir, libname) =
|
||||
Sys.readdir dir |> Array.to_list |> List.map ~f:(fun fn ->
|
||||
(Filename.concat dir fn, libname))
|
||||
in
|
||||
let impls, intfs =
|
||||
List.map dirs ~f:files_of
|
||||
|> List.concat
|
||||
|> List.fold_left ~init:(String_map.empty, String_map.empty)
|
||||
~f:(fun ((impls, intfs) as acc) fn ->
|
||||
~f:(fun ((impls, intfs) as acc) (fn, libname) ->
|
||||
let base = Filename.basename fn in
|
||||
match String.index base '.' with
|
||||
| exception Not_found -> acc
|
||||
|
@ -173,22 +200,31 @@ let modules =
|
|||
let fn =
|
||||
if ext = ".mll" then lazy (run_ocamllex fn) else lazy fn
|
||||
in
|
||||
(String_map.add mod_name fn impls, intfs)
|
||||
let fqn = fqn libname mod_name in
|
||||
(String_map.add fqn (libname, mod_name, fn) impls, intfs)
|
||||
else
|
||||
acc
|
||||
| ".mli" ->
|
||||
let mod_name = String.capitalize_ascii base in
|
||||
if is_boot || not (String_map.mem mod_name intfs) then
|
||||
(impls, String_map.add mod_name fn intfs)
|
||||
let fqn = fqn libname mod_name in
|
||||
(impls, String_map.add fqn fn intfs)
|
||||
else
|
||||
acc
|
||||
| _ -> acc)
|
||||
in
|
||||
String_map.merge
|
||||
(fun _ impl intf ->
|
||||
(fun fqn impl intf ->
|
||||
match impl with
|
||||
| None -> None
|
||||
| Some impl -> Some { impl = Lazy.force impl; intf })
|
||||
| Some (libname, name, impl) ->
|
||||
let impl = Lazy.force impl in
|
||||
Some { impl
|
||||
; intf
|
||||
; name
|
||||
; libname
|
||||
; fqn
|
||||
})
|
||||
impls intfs
|
||||
|
||||
let split_words s =
|
||||
|
@ -220,9 +256,10 @@ let read_lines fn =
|
|||
close_in ic;
|
||||
lines
|
||||
|
||||
let read_deps files =
|
||||
let read_deps files_by_lib =
|
||||
let out_fn = "boot-depends.txt" in
|
||||
at_exit (fun () -> Sys.remove out_fn);
|
||||
List.map files_by_lib ~f:(fun (libname, files) ->
|
||||
let n =
|
||||
exec "%s -modules %s > %s"
|
||||
(Filename.quote ocamldep) (String.concat ~sep:" " files) out_fn
|
||||
|
@ -239,9 +276,24 @@ let read_deps files =
|
|||
let deps =
|
||||
split_words (String.sub line ~pos:(i + 1)
|
||||
~len:(String.length line - (i + 1)))
|
||||
|> List.filter ~f:(fun m -> String_map.mem m modules)
|
||||
in
|
||||
(unit, deps))
|
||||
let rec resolve deps acc =
|
||||
match deps with
|
||||
| [] -> List.rev acc
|
||||
| dep :: deps ->
|
||||
let fqn = fqn libname dep in
|
||||
let acc =
|
||||
if String_map.mem fqn modules then
|
||||
fqn :: acc
|
||||
else if String_map.mem dep modules then
|
||||
dep :: acc
|
||||
else
|
||||
acc
|
||||
in
|
||||
resolve deps acc
|
||||
in
|
||||
(fqn libname unit, resolve deps [])))
|
||||
|> List.concat
|
||||
|
||||
let topsort deps =
|
||||
let n = List.length deps in
|
||||
|
@ -263,10 +315,12 @@ let topsort deps =
|
|||
List.rev !res
|
||||
|
||||
let topsorted_module_names =
|
||||
let files =
|
||||
List.map (String_map.bindings modules) ~f:(fun (_, x) -> x.impl)
|
||||
let files_by_lib =
|
||||
List.map (String_map.bindings modules) ~f:(fun (_, x) -> (x.libname, x.impl))
|
||||
|> String_option_map.of_alist_multi
|
||||
|> String_option_map.bindings
|
||||
in
|
||||
topsort (read_deps files)
|
||||
topsort (read_deps files_by_lib)
|
||||
|
||||
let count_newlines s =
|
||||
let newlines = ref 0 in
|
||||
|
@ -308,24 +362,57 @@ let generate_file_with_all_the_sources () =
|
|||
pos_in_generated_file := !pos_in_generated_file + newlines;
|
||||
pr "# %d %S" (!pos_in_generated_file + 1) generated_file
|
||||
in
|
||||
List.iter topsorted_module_names ~f:(fun m ->
|
||||
let { impl; intf } =
|
||||
try
|
||||
String_map.find m modules
|
||||
with Not_found ->
|
||||
Printf.ksprintf failwith "module not found: %s" m
|
||||
let modules_by_lib =
|
||||
List.map topsorted_module_names ~f:(fun m ->
|
||||
let info = String_map.find m modules in
|
||||
(info.libname, info))
|
||||
|> String_option_map.of_alist_multi
|
||||
in
|
||||
let lib_order =
|
||||
List.fold_left topsorted_module_names ~init:(String_set.empty, [])
|
||||
~f:(fun ((seen, rev_order) as acc) m ->
|
||||
match (String_map.find m modules).libname with
|
||||
| None -> acc
|
||||
| Some lib ->
|
||||
if String_set.mem lib seen then
|
||||
acc
|
||||
else
|
||||
(String_set.add lib seen, lib :: rev_order))
|
||||
|> snd
|
||||
|> List.rev_map ~f:(fun lib -> Some lib)
|
||||
in
|
||||
let lib_order = lib_order @ [None] in
|
||||
List.iter lib_order ~f:(fun libname ->
|
||||
let modules = String_option_map.find libname modules_by_lib in
|
||||
(match libname with
|
||||
| None -> ()
|
||||
| Some s -> pr "module %s = struct" s);
|
||||
List.iter modules ~f:(fun { name; intf; impl; _ } ->
|
||||
if Some name = libname then
|
||||
match intf with
|
||||
| Some intf ->
|
||||
pr "module %s : sig" m;
|
||||
pr "include (struct";
|
||||
dump impl;
|
||||
pr "end : sig";
|
||||
dump intf;
|
||||
pr "end)"
|
||||
| None ->
|
||||
dump impl;
|
||||
else
|
||||
match intf with
|
||||
| Some intf ->
|
||||
pr "module %s : sig" name;
|
||||
dump intf;
|
||||
pr "end = struct";
|
||||
dump impl;
|
||||
pr "end"
|
||||
| None ->
|
||||
pr "module %s = struct" m;
|
||||
pr "module %s = struct" name;
|
||||
dump impl;
|
||||
pr "end");
|
||||
(match libname with
|
||||
| None -> ()
|
||||
| Some _ -> pr "end"));
|
||||
output_string oc "let () = Main.bootstrap ()\n";
|
||||
close_out oc
|
||||
|
||||
|
|
|
@ -147,14 +147,7 @@ let strings p =
|
|||
let read_sexp p =
|
||||
contents p
|
||||
>>^ fun s ->
|
||||
let lb = Lexing.from_string s in
|
||||
lb.lex_curr_p <-
|
||||
{ pos_fname = Path.to_string p
|
||||
; pos_lnum = 1
|
||||
; pos_bol = 0
|
||||
; pos_cnum = 0
|
||||
};
|
||||
Sexp_lexer.single lb
|
||||
Usexp.parse_string s ~fname:(Path.to_string p) ~mode:Single
|
||||
|
||||
let if_file_exists p ~then_ ~else_ =
|
||||
If_file_exists (p, ref (Undecided (then_, else_)))
|
||||
|
|
|
@ -585,7 +585,7 @@ module Trace = struct
|
|||
Utils.Cached_digest.load ();
|
||||
let trace = Hashtbl.create 1024 in
|
||||
if Sys.file_exists file then begin
|
||||
let sexp = Sexp_lexer.Load.single file in
|
||||
let sexp = Sexp.load ~fname:file ~mode:Single in
|
||||
let bindings =
|
||||
let open Sexp.Of_sexp in
|
||||
list (pair Path.t (fun s -> Digest.from_hex (string s))) sexp
|
||||
|
|
|
@ -406,7 +406,7 @@ let create_for_opam ?root ~switch ~name ?(merlin=false) () =
|
|||
["config"; "env"; "--root"; root; "--switch"; switch; "--sexp"]
|
||||
>>= fun s ->
|
||||
let vars =
|
||||
Sexp_lexer.single (Lexing.from_string s)
|
||||
Usexp.parse_string ~fname:"<opam output>" ~mode:Single s
|
||||
|> Sexp.Of_sexp.(list (pair string string))
|
||||
|> Env_var_map.of_alist_multi
|
||||
|> Env_var_map.mapi ~f:(fun var values ->
|
||||
|
|
|
@ -725,8 +725,9 @@ Add it to your jbuild file to remove this warning.
|
|||
action_str;
|
||||
let dir = Path.append ctx.build_dir dir in
|
||||
let action =
|
||||
Lexing.from_string action_str
|
||||
|> Sexp_lexer.single
|
||||
Usexp.parse_string action_str
|
||||
~fname:"<internal action for mli to ml>"
|
||||
~mode:Single
|
||||
|> Action.Unexpanded.t
|
||||
in
|
||||
SC.add_rule sctx
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
;; (public_name jbuilder)
|
||||
(libraries (unix
|
||||
jbuilder_re
|
||||
jbuilder_opam_file_format))
|
||||
jbuilder_opam_file_format
|
||||
usexp))
|
||||
(synopsis "Internal Jbuilder library, do not use!")))
|
||||
|
||||
(ocamllex (sexp_lexer meta_lexer glob_lexer))
|
||||
(ocamllex (meta_lexer glob_lexer))
|
||||
|
|
|
@ -144,7 +144,7 @@ end
|
|||
die "@{<error>Error:@} %s failed to produce a valid jbuild file.\n\
|
||||
Did you forgot to call [Jbuild_plugin.V*.send]?"
|
||||
(Path.to_string file);
|
||||
let sexps = Sexp_lexer.Load.many (Path.to_string generated_jbuild) in
|
||||
let sexps = Sexp.load ~fname:(Path.to_string generated_jbuild) ~mode:Many in
|
||||
return (dir, scope, Stanzas.parse scope sexps))
|
||||
|> Future.all
|
||||
end
|
||||
|
@ -158,7 +158,7 @@ type conf =
|
|||
|
||||
let load ~dir ~scope =
|
||||
let file = Path.relative dir "jbuild" in
|
||||
match Sexp_lexer.Load.many_or_ocaml_script (Path.to_string file) with
|
||||
match Sexp.load_many_or_ocaml_script (Path.to_string file) with
|
||||
| Sexps sexps ->
|
||||
Jbuilds.Literal (dir, scope, Stanzas.parse scope sexps)
|
||||
| Ocaml_script ->
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
open Import
|
||||
|
||||
type t =
|
||||
type t = Usexp.Loc.t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
type t =
|
||||
type t = Usexp.Loc.t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
}
|
||||
|
|
176
src/sexp.ml
176
src/sexp.ml
|
@ -1,119 +1,7 @@
|
|||
open Import
|
||||
|
||||
type t =
|
||||
| Atom of string
|
||||
| List of t list
|
||||
|
||||
type sexp = t
|
||||
|
||||
let must_escape str =
|
||||
let len = String.length str in
|
||||
len = 0 ||
|
||||
let rec loop ix =
|
||||
match str.[ix] with
|
||||
| '"' | '(' | ')' | ';' | '\\' -> true
|
||||
| '|' -> ix > 0 && let next = ix - 1 in str.[next] = '#' || loop next
|
||||
| '#' -> ix > 0 && let next = ix - 1 in str.[next] = '|' || loop next
|
||||
| '\000' .. '\032' | '\127' .. '\255' -> true
|
||||
| _ -> ix > 0 && loop (ix - 1)
|
||||
in
|
||||
loop (len - 1)
|
||||
|
||||
let rec to_string = function
|
||||
| Atom s -> if must_escape s then sprintf "%S" s else s
|
||||
| List l -> sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
|
||||
|
||||
let rec pp ppf = function
|
||||
| Atom s ->
|
||||
if must_escape s then
|
||||
Format.fprintf ppf "%S" s
|
||||
else
|
||||
Format.pp_print_string ppf s
|
||||
| List [] ->
|
||||
Format.pp_print_string ppf "()"
|
||||
| List (first :: rest) ->
|
||||
Format.pp_open_box ppf 1;
|
||||
Format.pp_print_string ppf "(";
|
||||
Format.pp_open_hvbox ppf 0;
|
||||
pp ppf first;
|
||||
List.iter rest ~f:(fun sexp ->
|
||||
Format.pp_print_space ppf ();
|
||||
pp ppf sexp);
|
||||
Format.pp_close_box ppf ();
|
||||
Format.pp_print_string ppf ")";
|
||||
Format.pp_close_box ppf ()
|
||||
|
||||
let rec pp_split_strings ppf = function
|
||||
| Atom s ->
|
||||
if must_escape s then begin
|
||||
if String.contains s '\n' then begin
|
||||
match String.split s ~on:'\n' with
|
||||
| [] -> Format.fprintf ppf "%S" s
|
||||
| first :: rest ->
|
||||
Format.fprintf ppf "@[<hv 1>\"@{<atom>%s" (String.escaped first);
|
||||
List.iter rest ~f:(fun s ->
|
||||
Format.fprintf ppf "@,\\n%s" (String.escaped s));
|
||||
Format.fprintf ppf "@}\"@]"
|
||||
end else
|
||||
Format.fprintf ppf "%S" s
|
||||
end else
|
||||
Format.pp_print_string ppf s
|
||||
| List [] ->
|
||||
Format.pp_print_string ppf "()"
|
||||
| List (first :: rest) ->
|
||||
Format.pp_open_box ppf 1;
|
||||
Format.pp_print_string ppf "(";
|
||||
Format.pp_open_hvbox ppf 0;
|
||||
pp_split_strings ppf first;
|
||||
List.iter rest ~f:(fun sexp ->
|
||||
Format.pp_print_space ppf ();
|
||||
pp_split_strings ppf sexp);
|
||||
Format.pp_close_box ppf ();
|
||||
Format.pp_print_string ppf ")";
|
||||
Format.pp_close_box ppf ()
|
||||
|
||||
type formatter_state =
|
||||
| In_atom
|
||||
| In_makefile_action
|
||||
| In_makefile_stuff
|
||||
|
||||
let prepare_formatter ppf =
|
||||
let state = ref [] in
|
||||
Format.pp_set_mark_tags ppf true;
|
||||
let ofuncs = Format.pp_get_formatter_out_functions ppf () in
|
||||
let tfuncs = Format.pp_get_formatter_tag_functions ppf () in
|
||||
Format.pp_set_formatter_tag_functions ppf
|
||||
{ tfuncs with
|
||||
mark_open_tag = (function
|
||||
| "atom" -> state := In_atom :: !state; ""
|
||||
| "makefile-action" -> state := In_makefile_action :: !state; ""
|
||||
| "makefile-stuff" -> state := In_makefile_stuff :: !state; ""
|
||||
| s -> tfuncs.mark_open_tag s)
|
||||
; mark_close_tag = (function
|
||||
| "atom" | "makefile-action" | "makefile-stuff" -> state := List.tl !state; ""
|
||||
| s -> tfuncs.mark_close_tag s)
|
||||
};
|
||||
Format.pp_set_formatter_out_functions ppf
|
||||
{ ofuncs with
|
||||
out_newline = (fun () ->
|
||||
match !state with
|
||||
| [In_atom; In_makefile_action] ->
|
||||
ofuncs.out_string "\\\n\t" 0 3
|
||||
| [In_atom] ->
|
||||
ofuncs.out_string "\\\n" 0 2
|
||||
| [In_makefile_action] ->
|
||||
ofuncs.out_string " \\\n\t" 0 4
|
||||
| [In_makefile_stuff] ->
|
||||
ofuncs.out_string " \\\n" 0 3
|
||||
| [] ->
|
||||
ofuncs.out_string "\n" 0 1
|
||||
| _ -> assert false)
|
||||
; out_spaces = (fun n ->
|
||||
ofuncs.out_spaces
|
||||
(match !state with
|
||||
| In_atom :: _ -> max 0 (n - 2)
|
||||
| _ -> n))
|
||||
}
|
||||
include (Usexp : module type of struct include Usexp end
|
||||
with module Loc := Usexp.Loc)
|
||||
|
||||
let code_error message vars =
|
||||
code_errorf "%s"
|
||||
|
@ -122,27 +10,53 @@ let code_error message vars =
|
|||
:: List.map vars ~f:(fun (name, value) ->
|
||||
List [Atom name; value]))))
|
||||
|
||||
let buf_len = 65_536
|
||||
|
||||
module Ast = struct
|
||||
type t =
|
||||
| Atom of Loc.t * string
|
||||
| List of Loc.t * t list
|
||||
let load ~fname ~mode =
|
||||
Io.with_file_in fname ~f:(fun ic ->
|
||||
let state = Parser.create ~fname ~mode in
|
||||
let buf = Bytes.create buf_len in
|
||||
let rec loop stack =
|
||||
match input ic buf 0 buf_len with
|
||||
| 0 -> Parser.feed_eoi state stack
|
||||
| n -> loop (Parser.feed_subbytes state buf ~pos:0 ~len:n stack)
|
||||
in
|
||||
loop Parser.Stack.empty)
|
||||
|
||||
let loc = function
|
||||
| Atom (loc, _) -> loc
|
||||
| List (loc, _) -> loc
|
||||
let ocaml_script_prefix = "(* -*- tuareg -*- *)"
|
||||
let ocaml_script_prefix_len = String.length ocaml_script_prefix
|
||||
|
||||
let rec remove_locs : t -> sexp = function
|
||||
| Atom (_, s) -> Atom s
|
||||
| List (_, l) -> List (List.map l ~f:remove_locs)
|
||||
type sexps_or_ocaml_script =
|
||||
| Sexps of Ast.t list
|
||||
| Ocaml_script
|
||||
|
||||
let to_string t = to_string (remove_locs t)
|
||||
end
|
||||
|
||||
let rec add_loc t ~loc : Ast.t =
|
||||
match t with
|
||||
| Atom s -> Atom (loc, s)
|
||||
| List l -> List (loc, List.map l ~f:(add_loc ~loc))
|
||||
let load_many_or_ocaml_script fname =
|
||||
Io.with_file_in fname ~f:(fun ic ->
|
||||
let state = Parser.create ~fname ~mode:Many in
|
||||
let buf = Bytes.create buf_len in
|
||||
let rec loop stack =
|
||||
match input ic buf 0 buf_len with
|
||||
| 0 -> Parser.feed_eoi state stack
|
||||
| n -> loop (Parser.feed_subbytes state buf ~pos:0 ~len:n stack)
|
||||
in
|
||||
let rec loop0 stack i =
|
||||
match input ic buf i (buf_len - i) with
|
||||
| 0 ->
|
||||
let stack = Parser.feed_subbytes state buf ~pos:0 ~len:i stack in
|
||||
Sexps (Parser.feed_eoi state stack)
|
||||
| n ->
|
||||
let i = i + n in
|
||||
if i < ocaml_script_prefix_len then
|
||||
loop0 stack i
|
||||
else if Bytes.sub_string buf 0 ocaml_script_prefix_len
|
||||
[@warning "-6"]
|
||||
= ocaml_script_prefix then
|
||||
Ocaml_script
|
||||
else
|
||||
let stack = Parser.feed_subbytes state buf ~pos:0 ~len:i stack in
|
||||
Sexps (loop stack)
|
||||
in
|
||||
loop0 Parser.Stack.empty 0)
|
||||
|
||||
module type Combinators = sig
|
||||
type 'a t
|
||||
|
|
33
src/sexp.mli
33
src/sexp.mli
|
@ -1,37 +1,16 @@
|
|||
open Import
|
||||
|
||||
type t =
|
||||
| Atom of string
|
||||
| List of t list
|
||||
|
||||
module Ast : sig
|
||||
type sexp = t
|
||||
type t =
|
||||
| Atom of Loc.t * string
|
||||
| List of Loc.t * t list
|
||||
|
||||
val loc : t -> Loc.t
|
||||
|
||||
val remove_locs : t -> sexp
|
||||
val to_string : t -> string
|
||||
end with type sexp := t
|
||||
|
||||
val add_loc : t -> loc:Loc.t -> Ast.t
|
||||
include module type of struct include Usexp end with module Loc := Usexp.Loc
|
||||
|
||||
val code_error : string -> (string * t) list -> _
|
||||
|
||||
val to_string : t -> string
|
||||
val load : fname:string -> mode:'a Parser.Mode.t -> 'a
|
||||
|
||||
val pp : Format.formatter -> t -> unit
|
||||
type sexps_or_ocaml_script =
|
||||
| Sexps of Ast.t list
|
||||
| Ocaml_script
|
||||
|
||||
(** Same as [pp], but split long strings. The formatter must have been
|
||||
prepared with [prepare_formatter]. *)
|
||||
val pp_split_strings : Format.formatter -> t -> unit
|
||||
|
||||
(** Prepare a formatter for [pp_split_strings]. Additionaly the
|
||||
formatter escape newlines when the tags "makefile-action" or
|
||||
"makefile-stuff" are active. *)
|
||||
val prepare_formatter : Format.formatter -> unit
|
||||
val load_many_or_ocaml_script : string -> sexps_or_ocaml_script
|
||||
|
||||
module type Combinators = sig
|
||||
type 'a t
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
val single : Lexing.lexbuf -> Sexp.Ast.t
|
||||
val many : Lexing.lexbuf -> Sexp.Ast.t list
|
||||
|
||||
type sexps_or_ocaml_script =
|
||||
| Sexps of Sexp.Ast.t list
|
||||
| Ocaml_script
|
||||
|
||||
val many_or_ocaml_script : Lexing.lexbuf -> sexps_or_ocaml_script
|
||||
|
||||
module Load : sig
|
||||
val single : string -> Sexp.Ast.t
|
||||
val many : string -> Sexp.Ast.t list
|
||||
val many_or_ocaml_script : string -> sexps_or_ocaml_script
|
||||
end
|
|
@ -1,216 +0,0 @@
|
|||
{
|
||||
type sexps_or_ocaml_script =
|
||||
| Sexps of Sexp.Ast.t list
|
||||
| Ocaml_script
|
||||
|
||||
type stack =
|
||||
| Empty
|
||||
| Open of Lexing.position * stack
|
||||
| Sexp of Sexp.Ast.t * stack
|
||||
|
||||
let error = Loc.fail_lex
|
||||
|
||||
let make_list =
|
||||
let rec loop lexbuf acc = function
|
||||
| Empty ->
|
||||
error lexbuf "right parenthesis without matching left parenthesis"
|
||||
| Open (start, stack) ->
|
||||
Sexp (List ({ start; stop = Lexing.lexeme_end_p lexbuf }, acc),
|
||||
stack)
|
||||
| Sexp (sexp, stack) -> loop lexbuf (sexp :: acc) stack
|
||||
in
|
||||
fun lexbuf stack -> loop lexbuf [] stack
|
||||
|
||||
let new_sexp loop stack lexbuf =
|
||||
match stack with
|
||||
| Sexp (sexp, Empty) -> Some sexp
|
||||
| _ -> loop stack lexbuf
|
||||
|
||||
let atom_loc lexbuf : Loc.t =
|
||||
{ start = Lexing.lexeme_start_p lexbuf
|
||||
; stop = Lexing.lexeme_end_p lexbuf
|
||||
}
|
||||
|
||||
let char_for_backslash = function
|
||||
| 'n' -> '\010'
|
||||
| 'r' -> '\013'
|
||||
| 'b' -> '\008'
|
||||
| 't' -> '\009'
|
||||
| c -> c
|
||||
|
||||
let dec_code c1 c2 c3 =
|
||||
100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
|
||||
|
||||
let hex_code c1 c2 =
|
||||
let d1 = Char.code c1 in
|
||||
let val1 =
|
||||
if d1 >= 97 then d1 - 87
|
||||
else if d1 >= 65 then d1 - 55
|
||||
else d1 - 48 in
|
||||
let d2 = Char.code c2 in
|
||||
let val2 =
|
||||
if d2 >= 97 then d2 - 87
|
||||
else if d2 >= 65 then d2 - 55
|
||||
else d2 - 48 in
|
||||
val1 * 16 + val2
|
||||
|
||||
let escaped_buf = Buffer.create 256
|
||||
}
|
||||
|
||||
let lf = '\010'
|
||||
let lf_cr = ['\010' '\013']
|
||||
let dos_newline = "\013\010"
|
||||
let blank = [' ' '\009' '\012']
|
||||
let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
|
||||
let digit = ['0'-'9']
|
||||
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
||||
|
||||
rule main stack = parse
|
||||
| lf | dos_newline
|
||||
{ Lexing.new_line lexbuf; main stack lexbuf }
|
||||
| blank+
|
||||
{ main stack lexbuf }
|
||||
| (';' (_ # lf_cr)*)
|
||||
{ main stack lexbuf }
|
||||
| '('
|
||||
{ main (Open (Lexing.lexeme_start_p lexbuf, stack)) lexbuf }
|
||||
| ')'
|
||||
{ new_sexp main (make_list lexbuf stack) lexbuf }
|
||||
| '"'
|
||||
{ Buffer.clear escaped_buf;
|
||||
scan_string escaped_buf (Lexing.lexeme_start_p lexbuf) stack lexbuf
|
||||
}
|
||||
| "#|"
|
||||
{ block_comment 0 stack lexbuf }
|
||||
| unquoted* as s
|
||||
{ new_sexp main (Sexp (Atom (atom_loc lexbuf, s), stack)) lexbuf }
|
||||
| eof
|
||||
{ match stack with
|
||||
| Empty -> None
|
||||
| _ -> error lexbuf "unterminated s-expression" }
|
||||
| _
|
||||
{ error lexbuf "syntax error" }
|
||||
|
||||
and block_comment depth stack = parse
|
||||
| "#|"
|
||||
{ block_comment (depth + 1) stack lexbuf }
|
||||
| "|#"
|
||||
{ if depth = 0 then
|
||||
main stack lexbuf
|
||||
else
|
||||
block_comment (depth - 1) stack lexbuf }
|
||||
| _
|
||||
{ block_comment depth stack lexbuf }
|
||||
| eof
|
||||
{ error lexbuf "unterminated block comment" }
|
||||
|
||||
and scan_string buf start stack = parse
|
||||
| '"'
|
||||
{ new_sexp main
|
||||
(Sexp (Atom ({ start; stop = Lexing.lexeme_end_p lexbuf },
|
||||
Buffer.contents buf),
|
||||
stack))
|
||||
lexbuf
|
||||
}
|
||||
| '\\' lf
|
||||
{
|
||||
Lexing.new_line lexbuf;
|
||||
scan_string_after_escaped_newline buf start stack lexbuf
|
||||
}
|
||||
| '\\' dos_newline
|
||||
{
|
||||
Lexing.new_line lexbuf;
|
||||
scan_string_after_escaped_newline buf start stack lexbuf
|
||||
}
|
||||
| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
|
||||
{
|
||||
Buffer.add_char buf (char_for_backslash c);
|
||||
scan_string buf start stack lexbuf
|
||||
}
|
||||
| '\\' (digit as c1) (digit as c2) (digit as c3)
|
||||
{
|
||||
let v = dec_code c1 c2 c3 in
|
||||
if v > 255 then error lexbuf "illegal escape";
|
||||
Buffer.add_char buf (Char.chr v);
|
||||
scan_string buf start stack lexbuf
|
||||
}
|
||||
| '\\' 'x' (hexdigit as c1) (hexdigit as c2)
|
||||
{
|
||||
let v = hex_code c1 c2 in
|
||||
Buffer.add_char buf (Char.chr v);
|
||||
scan_string buf start stack lexbuf
|
||||
}
|
||||
| '\\' (_ as c)
|
||||
{
|
||||
Buffer.add_char buf '\\';
|
||||
Buffer.add_char buf c;
|
||||
scan_string buf start stack lexbuf
|
||||
}
|
||||
| lf
|
||||
{
|
||||
Lexing.new_line lexbuf;
|
||||
Buffer.add_char buf '\n';
|
||||
scan_string buf start stack lexbuf
|
||||
}
|
||||
| ([^ '\\' '"'] # lf)+ as s
|
||||
{
|
||||
Buffer.add_string buf s;
|
||||
scan_string buf start stack lexbuf
|
||||
}
|
||||
| eof
|
||||
{
|
||||
error lexbuf "unterminated string"
|
||||
}
|
||||
|
||||
and scan_string_after_escaped_newline buf start stack = parse
|
||||
| [' ' '\t']*
|
||||
{ scan_string buf start stack lexbuf }
|
||||
| ""
|
||||
{ scan_string buf start stack lexbuf }
|
||||
|
||||
and trailing = parse
|
||||
| lf | dos_newline
|
||||
{ Lexing.new_line lexbuf; trailing lexbuf }
|
||||
| blank+
|
||||
{ trailing lexbuf }
|
||||
| (';' (_ # lf_cr)*)
|
||||
{ trailing lexbuf }
|
||||
| eof
|
||||
{ () }
|
||||
| _
|
||||
{ error lexbuf "garbage after s-expression" }
|
||||
|
||||
and is_ocaml_script = parse
|
||||
| "(* -*- tuareg -*- *)" { true }
|
||||
| "" { false }
|
||||
|
||||
{
|
||||
let single lexbuf =
|
||||
match main Empty lexbuf with
|
||||
| None -> error lexbuf "no s-expression found"
|
||||
| Some sexp -> trailing lexbuf; sexp
|
||||
|
||||
let many lexbuf =
|
||||
let rec loop acc =
|
||||
match main Empty lexbuf with
|
||||
| None -> List.rev acc
|
||||
| Some sexp -> loop (sexp :: acc)
|
||||
in
|
||||
loop []
|
||||
|
||||
let many_or_ocaml_script lexbuf =
|
||||
match is_ocaml_script lexbuf with
|
||||
| true -> Ocaml_script
|
||||
| false -> Sexps (many lexbuf)
|
||||
|
||||
module Load = struct
|
||||
let single fn =
|
||||
Io.with_lexbuf_from_file fn ~f:single
|
||||
|
||||
let many fn =
|
||||
Io.with_lexbuf_from_file fn ~f:many
|
||||
|
||||
let many_or_ocaml_script fn =
|
||||
Io.with_lexbuf_from_file fn ~f:many_or_ocaml_script
|
||||
end
|
||||
}
|
|
@ -191,7 +191,7 @@ module Cached_digest = struct
|
|||
|
||||
let load () =
|
||||
if Sys.file_exists db_file then begin
|
||||
let sexp = Sexp_lexer.Load.single db_file in
|
||||
let sexp = Sexp.load ~fname:db_file ~mode:Single in
|
||||
let bindings =
|
||||
let open Sexp.Of_sexp in
|
||||
list
|
||||
|
|
|
@ -55,7 +55,7 @@ struct
|
|||
let to_string path x = To_sexp.t path x |> Sexp.to_string
|
||||
|
||||
let load path =
|
||||
Of_sexp.t path (Sexp_lexer.Load.single (Path.to_string path))
|
||||
Of_sexp.t path (Sexp.load ~fname:(Path.to_string path) ~mode:Single)
|
||||
end
|
||||
|
||||
|
||||
|
|
|
@ -83,4 +83,4 @@ let t sexps =
|
|||
; contexts = List.rev contexts
|
||||
}
|
||||
|
||||
let load fn = t (Sexp_lexer.Load.many fn)
|
||||
let load fname = t (Sexp.load ~fname ~mode:Many)
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e -o pipefail
|
||||
|
||||
TMP="$(mktemp -d)"
|
||||
trap "rm -rf $TMP" EXIT
|
||||
|
||||
rm -rf usexp
|
||||
mkdir -p usexp/src
|
||||
|
||||
(cd $TMP && git clone https://github.com/janestreet/usexp.git)
|
||||
|
||||
SRC=$TMP/usexp
|
||||
|
||||
cp -v $SRC/LICENSE.md usexp
|
||||
|
||||
cp -v $SRC/src/*.{ml,mli} usexp/src
|
||||
|
||||
git checkout usexp/src/jbuild
|
||||
git add -A .
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,3 @@
|
|||
(jbuild_version 1)
|
||||
|
||||
(library ((name usexp)))
|
|
@ -0,0 +1,380 @@
|
|||
type stack =
|
||||
| Empty
|
||||
| Open of Lexing.position * stack
|
||||
| Sexp of Sexp_ast.t * stack
|
||||
let empty_stack = Empty
|
||||
|
||||
type 'a mode =
|
||||
| Single : Sexp_ast.t mode
|
||||
| Many : Sexp_ast.t list mode
|
||||
|
||||
type 'a state =
|
||||
{ mutable automaton_state : int
|
||||
; mutable depth : int
|
||||
; (* Number of opened #| when parsing a block comment *)
|
||||
mutable block_comment_depth : int
|
||||
; (* Number of full sexp to ignore. This correspond to the number of consecutive #;.
|
||||
We don't have to track nested #; because they do not change the result. *)
|
||||
mutable ignoring : int
|
||||
; (* Only meaningful when [ignoring] > 0. Number of opened parentheses of the
|
||||
outermost sexp comment. *)
|
||||
mutable ignoring_depth : int
|
||||
; (* When parsing an escape sequence of the form "\\NNN" or "\\XX", this accumulates
|
||||
the computed number *)
|
||||
mutable escaped_value : int
|
||||
; (* Buffer for accumulating atoms *)
|
||||
atom_buffer : Buffer.t
|
||||
; fname : string
|
||||
; mutable full_sexps : int
|
||||
; mutable offset : int (* global offset *)
|
||||
; mutable line_number : int
|
||||
; mutable bol_offset : int (* offset of beginning of line *)
|
||||
; (* Starting positions of the current token *)
|
||||
mutable token_start_pos : Lexing.position
|
||||
; mode : 'a mode
|
||||
}
|
||||
|
||||
(* these magic numbers are checked in gen_parser_automaton.ml:
|
||||
let () = assert (initial = 0)
|
||||
let () = assert (to_int Error = 1) *)
|
||||
let initial_state = 0
|
||||
let error_state = 1
|
||||
|
||||
let new_state ~fname mode =
|
||||
{ depth = 0
|
||||
; automaton_state = initial_state
|
||||
; block_comment_depth = 0
|
||||
; ignoring = 0
|
||||
; ignoring_depth = 0
|
||||
; escaped_value = 0
|
||||
; atom_buffer = Buffer.create 128
|
||||
; mode = mode
|
||||
; full_sexps = 0
|
||||
; offset = 0
|
||||
; line_number = 1
|
||||
; bol_offset = 0
|
||||
; fname = fname
|
||||
; token_start_pos = { pos_fname = fname; pos_cnum = 0; pos_lnum = 1; pos_bol = 0 }
|
||||
}
|
||||
|
||||
let mode t = t.mode
|
||||
|
||||
let offset t = t.offset
|
||||
let line t = t.line_number
|
||||
let column t = t.offset - t.bol_offset
|
||||
|
||||
let position t =
|
||||
{ Lexing.
|
||||
pos_fname = t.fname
|
||||
; pos_cnum = t.offset
|
||||
; pos_lnum = t.line_number
|
||||
; pos_bol = t.bol_offset
|
||||
}
|
||||
|
||||
let has_unclosed_paren state = state.depth > 0
|
||||
|
||||
let set_error_state state = state.automaton_state <- error_state
|
||||
|
||||
let sexp_of_stack : stack -> Sexp_ast.t = function
|
||||
| Sexp (sexp, Empty) -> sexp
|
||||
| _ -> failwith "Parser_automaton.sexp_of_stack"
|
||||
|
||||
let sexps_of_stack =
|
||||
let rec loop acc : stack -> Sexp_ast.t list = function
|
||||
| Empty -> acc
|
||||
| Open _ -> failwith "Parser_automaton.sexps_of_stack"
|
||||
| Sexp (sexp, stack) -> loop (sexp :: acc) stack
|
||||
in
|
||||
fun stack -> loop [] stack
|
||||
|
||||
let automaton_state state = state.automaton_state
|
||||
|
||||
module Error = struct
|
||||
type t =
|
||||
{ position : Lexing.position
|
||||
; message : string
|
||||
}
|
||||
|
||||
exception Parse_error of t
|
||||
|
||||
module Reason = struct
|
||||
(* To be kept in sync with the Error module in gen/gen_parser_automaton.ml *)
|
||||
type t =
|
||||
| Unexpected_char_parsing_hex_escape
|
||||
| Unexpected_char_parsing_dec_escape
|
||||
| Unterminated_quoted_string
|
||||
| Unterminated_block_comment
|
||||
| Escape_sequence_out_of_range
|
||||
| Unclosed_paren
|
||||
| Too_many_sexps
|
||||
| Closed_paren_without_opened
|
||||
| Comment_token_in_unquoted_atom
|
||||
| Sexp_comment_without_sexp
|
||||
| Unexpected_character_after_cr
|
||||
| No_sexp_found_in_input
|
||||
| Automaton_in_error_state
|
||||
end
|
||||
|
||||
let raise state ~at_eof (reason : Reason.t) =
|
||||
set_error_state state;
|
||||
let message =
|
||||
(* These messages where choosen such that we can build the various Sexplib parsing
|
||||
functions on top of Parsexp and keep the same exceptions.
|
||||
|
||||
At the time of writing this, a simple layer on top of parsexp to implement the
|
||||
sexplib API is passing all the sexplib tests.
|
||||
|
||||
Note that parsexp matches the semantic of Sexp.parse which is slightly
|
||||
different from the ocamllex/ocamlyacc based parser of Sexplib. The latter one
|
||||
is less tested and assumed to be less used. *)
|
||||
match reason with
|
||||
| Unexpected_char_parsing_hex_escape ->
|
||||
"unterminated hexadecimal escape sequence"
|
||||
| Unexpected_char_parsing_dec_escape ->
|
||||
"unterminated decimal escape sequence"
|
||||
| Unterminated_quoted_string ->
|
||||
"unterminated quoted string"
|
||||
| Unterminated_block_comment ->
|
||||
"unterminated block comment"
|
||||
| Escape_sequence_out_of_range ->
|
||||
"escape sequence in quoted string out of range"
|
||||
| Unclosed_paren ->
|
||||
"unclosed parentheses at end of input"
|
||||
| Too_many_sexps ->
|
||||
"s-expression followed by data"
|
||||
| Closed_paren_without_opened ->
|
||||
"unexpected character: ')'"
|
||||
| Comment_token_in_unquoted_atom ->
|
||||
if Buffer.contents state.atom_buffer = "|" then
|
||||
"illegal end of comment"
|
||||
else
|
||||
"comment tokens in unquoted atom"
|
||||
| Sexp_comment_without_sexp ->
|
||||
"unterminated sexp comment"
|
||||
| Unexpected_character_after_cr ->
|
||||
if at_eof then
|
||||
"unexpected end of input after carriage return"
|
||||
else
|
||||
"unexpected character after carriage return"
|
||||
| No_sexp_found_in_input ->
|
||||
"no s-expression found in input"
|
||||
| Automaton_in_error_state ->
|
||||
failwith "Parsexp.Parser_automaton: parser is dead"
|
||||
in
|
||||
let position = position state in
|
||||
raise (Parse_error { position; message })
|
||||
|
||||
let position t = t.position
|
||||
let message t = t.message
|
||||
end
|
||||
|
||||
exception Parse_error = Error.Parse_error
|
||||
|
||||
type 'a action = 'a state -> char -> stack -> stack
|
||||
type 'a epsilon_action = 'a state -> stack -> stack
|
||||
|
||||
let current_pos ?(delta=0) state : Lexing.position =
|
||||
let offset = state.offset + delta in
|
||||
{ pos_fname = state.fname
|
||||
; pos_lnum = state.line_number
|
||||
; pos_cnum = offset
|
||||
; pos_bol = state.bol_offset
|
||||
}
|
||||
|
||||
let set_automaton_state state x = state.automaton_state <- x
|
||||
|
||||
let advance state = state.offset <- state.offset + 1
|
||||
|
||||
let advance_eol state =
|
||||
let newline_offset = state.offset in
|
||||
state.offset <- newline_offset + 1;
|
||||
state.bol_offset <- state.offset;
|
||||
state.line_number <- state.line_number + 1
|
||||
|
||||
let block_comment_depth state = state.block_comment_depth
|
||||
|
||||
let add_token_char _state _char stack = stack
|
||||
|
||||
let add_atom_char state c stack =
|
||||
Buffer.add_char state.atom_buffer c;
|
||||
stack
|
||||
|
||||
let add_quoted_atom_char state c stack =
|
||||
Buffer.add_char state.atom_buffer c;
|
||||
add_token_char state c stack
|
||||
|
||||
let check_new_sexp_allowed : type a. a state -> unit = fun state ->
|
||||
let is_single = match state.mode with Single -> true | _ -> false in
|
||||
if is_single && state.full_sexps > 0 && state.ignoring = 0 then
|
||||
Error.raise state ~at_eof:false Too_many_sexps
|
||||
|
||||
let add_first_char state char stack =
|
||||
check_new_sexp_allowed state;
|
||||
Buffer.add_char state.atom_buffer char;
|
||||
(* For non-quoted atoms, we save both positions at the end. We can always determine the
|
||||
start position from the end position and the atom length for non-quoted atoms.
|
||||
|
||||
Doing it this way allows us to detect single characater atoms for which we need to
|
||||
save the position twice. *)
|
||||
stack
|
||||
|
||||
let eps_add_first_char_hash state stack =
|
||||
check_new_sexp_allowed state;
|
||||
Buffer.add_char state.atom_buffer '#';
|
||||
stack
|
||||
|
||||
let start_quoted_string state _char stack =
|
||||
check_new_sexp_allowed state;
|
||||
state.token_start_pos <- current_pos state;
|
||||
stack
|
||||
|
||||
let add_escaped state c stack =
|
||||
let c' =
|
||||
match c with
|
||||
| 'n' -> '\n'
|
||||
| 'r' -> '\r'
|
||||
| 'b' -> '\b'
|
||||
| 't' -> '\t'
|
||||
| '\\' | '\'' | '"' -> c
|
||||
| _ -> Buffer.add_char state.atom_buffer '\\'; c
|
||||
in
|
||||
Buffer.add_char state.atom_buffer c';
|
||||
add_token_char state c stack
|
||||
|
||||
let eps_add_escaped_cr state stack =
|
||||
Buffer.add_char state.atom_buffer '\r';
|
||||
stack
|
||||
|
||||
let dec_val c = Char.code c - Char.code '0'
|
||||
|
||||
let hex_val c =
|
||||
match c with
|
||||
| '0'..'9' -> Char.code c - Char.code '0'
|
||||
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
|
||||
| _ -> Char.code c - Char.code 'A' + 10
|
||||
|
||||
let add_dec_escape_char state c stack =
|
||||
state.escaped_value <- state.escaped_value * 10 + dec_val c;
|
||||
add_token_char state c stack
|
||||
|
||||
let add_last_dec_escape_char state c stack =
|
||||
let value = state.escaped_value * 10 + dec_val c in
|
||||
state.escaped_value <- 0;
|
||||
if value > 255 then Error.raise state ~at_eof:false Escape_sequence_out_of_range;
|
||||
Buffer.add_char state.atom_buffer (Char.unsafe_chr value);
|
||||
add_token_char state c stack
|
||||
|
||||
let comment_add_last_dec_escape_char state c stack =
|
||||
let value = state.escaped_value * 10 + dec_val c in
|
||||
state.escaped_value <- 0;
|
||||
if value > 255 then Error.raise state ~at_eof:false Escape_sequence_out_of_range;
|
||||
add_token_char state c stack
|
||||
|
||||
let add_hex_escape_char state c stack =
|
||||
state.escaped_value <- (state.escaped_value lsl 4) lor hex_val c;
|
||||
add_token_char state c stack
|
||||
|
||||
let add_last_hex_escape_char state c stack =
|
||||
let value = (state.escaped_value lsl 4) lor hex_val c in
|
||||
state.escaped_value <- 0;
|
||||
Buffer.add_char state.atom_buffer (Char.unsafe_chr value);
|
||||
add_token_char state c stack
|
||||
|
||||
let opening state _char stack =
|
||||
check_new_sexp_allowed state;
|
||||
state.depth <- state.depth + 1;
|
||||
if state.ignoring = 0 then
|
||||
Open (current_pos state, stack)
|
||||
else
|
||||
stack
|
||||
|
||||
let sexp_or_comment_added ~is_comment state stack ~delta:_ =
|
||||
if state.ignoring lor state.depth <> 0 then
|
||||
stack
|
||||
else begin
|
||||
if not is_comment then state.full_sexps <- state.full_sexps + 1;
|
||||
stack
|
||||
end
|
||||
|
||||
let sexp_added state stack ~delta =
|
||||
let stack = sexp_or_comment_added ~is_comment:false state stack ~delta in
|
||||
if state.ignoring <> 0 && state.ignoring_depth = state.depth then begin
|
||||
state.ignoring <- state.ignoring - 1;
|
||||
stack
|
||||
end else stack
|
||||
|
||||
let rec make_list stop acc : stack -> stack = function
|
||||
| Empty -> assert false
|
||||
| Open (start, stack) -> Sexp (List ({ start; stop }, acc), stack)
|
||||
| Sexp (sexp, stack) -> make_list stop (sexp :: acc) stack
|
||||
|
||||
let closing state _char stack =
|
||||
if state.depth > 0 then begin
|
||||
let stack =
|
||||
if state.ignoring = 0 then
|
||||
make_list (current_pos state ~delta:1) [] stack
|
||||
else
|
||||
stack
|
||||
in
|
||||
if state.ignoring <> 0 && state.ignoring_depth = state.depth then
|
||||
Error.raise state ~at_eof:false Sexp_comment_without_sexp;
|
||||
state.depth <- state.depth - 1;
|
||||
sexp_added state stack ~delta:1
|
||||
end else
|
||||
Error.raise state ~at_eof:false Closed_paren_without_opened
|
||||
|
||||
let make_loc ?(delta=0) state : Sexp_ast.Loc.t =
|
||||
{ start = state.token_start_pos
|
||||
; stop = current_pos state ~delta
|
||||
}
|
||||
|
||||
let eps_push_atom state stack =
|
||||
let str = Buffer.contents state.atom_buffer in
|
||||
Buffer.clear state.atom_buffer;
|
||||
let stack =
|
||||
if state.ignoring = 0 then
|
||||
Sexp (Atom (make_loc state, str), stack)
|
||||
else
|
||||
stack
|
||||
in
|
||||
sexp_added state stack ~delta:0
|
||||
|
||||
let push_quoted_atom state _char stack =
|
||||
let str = Buffer.contents state.atom_buffer in
|
||||
Buffer.clear state.atom_buffer;
|
||||
let stack =
|
||||
if state.ignoring = 0 then
|
||||
Sexp (Atom (make_loc state ~delta:1, str), stack)
|
||||
else
|
||||
stack
|
||||
in
|
||||
sexp_added state stack ~delta:1
|
||||
|
||||
let start_sexp_comment state _char stack =
|
||||
if state.ignoring = 0 || state.ignoring_depth = state.depth then begin
|
||||
state.ignoring <- state.ignoring + 1;
|
||||
state.ignoring_depth <- state.depth
|
||||
end;
|
||||
stack
|
||||
|
||||
let start_block_comment state _char stack =
|
||||
state.block_comment_depth <- state.block_comment_depth + 1;
|
||||
stack
|
||||
|
||||
let end_block_comment state _char stack =
|
||||
state.block_comment_depth <- state.block_comment_depth - 1;
|
||||
stack
|
||||
|
||||
let start_line_comment _state _char stack = stack
|
||||
let end_line_comment _state stack = stack
|
||||
|
||||
let eps_eoi_check : type a. a state -> stack -> stack = fun state stack ->
|
||||
if state.depth > 0 then Error.raise state ~at_eof:true Unclosed_paren;
|
||||
if state.ignoring > 0 then Error.raise state ~at_eof:true Sexp_comment_without_sexp;
|
||||
if state.full_sexps = 0 then (
|
||||
match state.mode with
|
||||
| Many -> ()
|
||||
| Single ->
|
||||
Error.raise state ~at_eof:true No_sexp_found_in_input
|
||||
);
|
||||
stack
|
|
@ -0,0 +1,132 @@
|
|||
(** Internal bits used by the generated automaton, not part of the public API *)
|
||||
|
||||
(** Internal state of the automaton *)
|
||||
type 'a state
|
||||
|
||||
type 'a mode =
|
||||
| Single : Sexp_ast.t mode
|
||||
| Many : Sexp_ast.t list mode
|
||||
|
||||
type stack
|
||||
val empty_stack : stack
|
||||
|
||||
val new_state
|
||||
: fname:string
|
||||
-> 'a mode
|
||||
-> 'a state
|
||||
|
||||
val mode : 'a state -> 'a mode
|
||||
|
||||
(** Number of characters fed to the parser *)
|
||||
val offset : _ state -> int
|
||||
|
||||
(** Position in the text *)
|
||||
val line : _ state -> int
|
||||
val column : _ state -> int
|
||||
|
||||
(** Whether there are some unclosed parentheses *)
|
||||
val has_unclosed_paren : _ state -> bool
|
||||
|
||||
val set_error_state : _ state -> unit
|
||||
|
||||
val sexp_of_stack : stack -> Sexp_ast.t
|
||||
val sexps_of_stack : stack -> Sexp_ast.t list
|
||||
|
||||
val automaton_state : _ state -> int
|
||||
|
||||
module Error : sig
|
||||
type t
|
||||
|
||||
val position : t -> Lexing.position
|
||||
val message : t -> string
|
||||
|
||||
module Reason : sig
|
||||
type t =
|
||||
| Unexpected_char_parsing_hex_escape
|
||||
| Unexpected_char_parsing_dec_escape
|
||||
| Unterminated_quoted_string
|
||||
| Unterminated_block_comment
|
||||
| Escape_sequence_out_of_range
|
||||
| Unclosed_paren
|
||||
| Too_many_sexps
|
||||
| Closed_paren_without_opened
|
||||
| Comment_token_in_unquoted_atom
|
||||
| Sexp_comment_without_sexp
|
||||
| Unexpected_character_after_cr
|
||||
| No_sexp_found_in_input
|
||||
| Automaton_in_error_state
|
||||
end
|
||||
|
||||
val raise : _ state -> at_eof:bool -> Reason.t -> _
|
||||
end
|
||||
|
||||
exception Parse_error of Error.t
|
||||
|
||||
val set_automaton_state : _ state -> int -> unit
|
||||
|
||||
(** Advance the position counters. [advance_eol] is for when we read a newline
|
||||
character. *)
|
||||
val advance : _ state -> unit
|
||||
val advance_eol : _ state -> unit
|
||||
|
||||
(** Number of opened #| *)
|
||||
val block_comment_depth : _ state -> int
|
||||
|
||||
type 'a action = 'a state -> char -> stack -> stack
|
||||
type 'a epsilon_action = 'a state -> stack -> stack
|
||||
|
||||
(** Add a character to the atom buffer. [add_quoted_atom_char] does the same for quoted
|
||||
atoms *)
|
||||
val add_atom_char : _ action
|
||||
val add_quoted_atom_char : _ action
|
||||
|
||||
(** Add a character that just follows a '\\' and the '\\' itself if necessary. *)
|
||||
val add_escaped : _ action
|
||||
|
||||
(** [escaped_value <- escaped_value * 10 + (char - '0')]
|
||||
|
||||
These functions make the assumption that [char] is between '0' and '9'.
|
||||
[add_dec_escape_char] also assumes the result doesn't overflow. The automaton
|
||||
definition must make sure this is the case.
|
||||
|
||||
[add_last_dec_escape_char] also adds the resulting character to the atom buffer.
|
||||
*)
|
||||
val add_dec_escape_char : _ action
|
||||
val add_last_dec_escape_char : _ action
|
||||
|
||||
(** Same but for quoted strings inside comments. Useful because it can fail. *)
|
||||
val comment_add_last_dec_escape_char : _ action
|
||||
|
||||
(** Same as [add_dec_escape_char] but for hexadicemal escape sequences *)
|
||||
val add_hex_escape_char : _ action
|
||||
val add_last_hex_escape_char : _ action
|
||||
|
||||
(** Ignore one more full sexp to come *)
|
||||
val start_sexp_comment : _ action
|
||||
|
||||
(** Add the first char of an unquoted atom. *)
|
||||
val add_first_char : _ action
|
||||
val start_quoted_string : _ action
|
||||
|
||||
(** Takes note of a control character in quoted atoms or the uninterpreted characters of
|
||||
comments, for which there is no corresponding [add_*] call (a backslash and the x in
|
||||
"\xff" or any character in a line comment). This does not get called for the opening
|
||||
([start_quoted_string]) or closing ([push_quoted_atom]) quotes themselves.
|
||||
*)
|
||||
val add_token_char : _ action
|
||||
|
||||
val opening : _ action
|
||||
val closing : _ action
|
||||
val push_quoted_atom : _ action
|
||||
|
||||
val start_block_comment : _ action
|
||||
val end_block_comment : _ action
|
||||
|
||||
val start_line_comment : _ action
|
||||
val end_line_comment : _ epsilon_action
|
||||
|
||||
val eps_push_atom : _ epsilon_action
|
||||
val eps_add_first_char_hash : _ epsilon_action
|
||||
val eps_eoi_check : _ epsilon_action
|
||||
|
||||
val eps_add_escaped_cr : _ epsilon_action
|
|
@ -0,0 +1,10 @@
|
|||
module Loc = struct
|
||||
type t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
}
|
||||
end
|
||||
|
||||
type t =
|
||||
| Atom of Loc.t * string
|
||||
| List of Loc.t * t list
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,2 @@
|
|||
val transitions : Obj.t Parser_automaton_internal.action array
|
||||
val transitions_eoi : Obj.t Parser_automaton_internal.epsilon_action array
|
|
@ -0,0 +1,290 @@
|
|||
open StdLabels
|
||||
|
||||
module A = Parser_automaton_internal
|
||||
|
||||
module Atom = struct
|
||||
type t = string
|
||||
|
||||
let escaped_length s =
|
||||
let n = ref 0 in
|
||||
for i = 0 to String.length s - 1 do
|
||||
n := !n +
|
||||
(match String.unsafe_get s i with
|
||||
| '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
|
||||
| ' ' .. '~' -> 1
|
||||
| _ -> 4)
|
||||
done;
|
||||
!n
|
||||
|
||||
let must_escape s = escaped_length s > String.length s
|
||||
|
||||
let escaped_internal s ~with_double_quotes =
|
||||
let n = escaped_length s in
|
||||
if n = String.length s then s else begin
|
||||
let s' = Bytes.create (n + if with_double_quotes then 2 else 0) in
|
||||
let n = ref 0 in
|
||||
if with_double_quotes then begin
|
||||
Bytes.unsafe_set s' 0 '"';
|
||||
n := 1
|
||||
end;
|
||||
for i = 0 to String.length s - 1 do
|
||||
begin match String.unsafe_get s i with
|
||||
| ('\"' | '\\') as c ->
|
||||
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n c
|
||||
| '\n' ->
|
||||
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'n'
|
||||
| '\t' ->
|
||||
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 't'
|
||||
| '\r' ->
|
||||
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'r'
|
||||
| '\b' ->
|
||||
Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'b'
|
||||
| (' ' .. '~') as c -> Bytes.unsafe_set s' !n c
|
||||
| c ->
|
||||
let a = Char.code c in
|
||||
Bytes.unsafe_set s' !n '\\';
|
||||
incr n;
|
||||
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + a / 100));
|
||||
incr n;
|
||||
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a / 10) mod 10));
|
||||
incr n;
|
||||
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + a mod 10));
|
||||
end;
|
||||
incr n
|
||||
done;
|
||||
if with_double_quotes then Bytes.unsafe_set s' !n '"';
|
||||
Bytes.unsafe_to_string s'
|
||||
end
|
||||
|
||||
let escaped s = escaped_internal s ~with_double_quotes:false
|
||||
let serialize s = escaped_internal s ~with_double_quotes:true
|
||||
end
|
||||
|
||||
type t =
|
||||
| Atom of string
|
||||
| List of t list
|
||||
|
||||
type sexp = t
|
||||
|
||||
let rec to_string = function
|
||||
| Atom s -> Atom.serialize s
|
||||
| List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
|
||||
|
||||
let rec pp ppf = function
|
||||
| Atom s ->
|
||||
Format.pp_print_string ppf (Atom.serialize s)
|
||||
| List [] ->
|
||||
Format.pp_print_string ppf "()"
|
||||
| List (first :: rest) ->
|
||||
Format.pp_open_box ppf 1;
|
||||
Format.pp_print_string ppf "(";
|
||||
Format.pp_open_hvbox ppf 0;
|
||||
pp ppf first;
|
||||
List.iter rest ~f:(fun sexp ->
|
||||
Format.pp_print_space ppf ();
|
||||
pp ppf sexp);
|
||||
Format.pp_close_box ppf ();
|
||||
Format.pp_print_string ppf ")";
|
||||
Format.pp_close_box ppf ()
|
||||
|
||||
let split_string s ~on =
|
||||
let rec loop i j =
|
||||
if j = String.length s then
|
||||
[String.sub s ~pos:i ~len:(j - i)]
|
||||
else if s.[j] = on then
|
||||
String.sub s ~pos:i ~len:(j - i) :: loop (j + 1) (j + 1)
|
||||
else
|
||||
loop i (j + 1)
|
||||
in
|
||||
loop 0 0
|
||||
|
||||
let rec pp_split_strings ppf = function
|
||||
| Atom s ->
|
||||
if Atom.must_escape s then begin
|
||||
if String.contains s '\n' then begin
|
||||
match split_string s ~on:'\n' with
|
||||
| [] -> Format.pp_print_string ppf (Atom.serialize s)
|
||||
| first :: rest ->
|
||||
Format.fprintf ppf "@[<hv 1>\"@{<atom>%s" (String.escaped first);
|
||||
List.iter rest ~f:(fun s ->
|
||||
Format.fprintf ppf "@,\\n%s" (String.escaped s));
|
||||
Format.fprintf ppf "@}\"@]"
|
||||
end else
|
||||
Format.fprintf ppf "%S" s
|
||||
end else
|
||||
Format.pp_print_string ppf s
|
||||
| List [] ->
|
||||
Format.pp_print_string ppf "()"
|
||||
| List (first :: rest) ->
|
||||
Format.pp_open_box ppf 1;
|
||||
Format.pp_print_string ppf "(";
|
||||
Format.pp_open_hvbox ppf 0;
|
||||
pp_split_strings ppf first;
|
||||
List.iter rest ~f:(fun sexp ->
|
||||
Format.pp_print_space ppf ();
|
||||
pp_split_strings ppf sexp);
|
||||
Format.pp_close_box ppf ();
|
||||
Format.pp_print_string ppf ")";
|
||||
Format.pp_close_box ppf ()
|
||||
|
||||
type formatter_state =
|
||||
| In_atom
|
||||
| In_makefile_action
|
||||
| In_makefile_stuff
|
||||
|
||||
let prepare_formatter ppf =
|
||||
let state = ref [] in
|
||||
Format.pp_set_mark_tags ppf true;
|
||||
let ofuncs = Format.pp_get_formatter_out_functions ppf () in
|
||||
let tfuncs = Format.pp_get_formatter_tag_functions ppf () in
|
||||
Format.pp_set_formatter_tag_functions ppf
|
||||
{ tfuncs with
|
||||
mark_open_tag = (function
|
||||
| "atom" -> state := In_atom :: !state; ""
|
||||
| "makefile-action" -> state := In_makefile_action :: !state; ""
|
||||
| "makefile-stuff" -> state := In_makefile_stuff :: !state; ""
|
||||
| s -> tfuncs.mark_open_tag s)
|
||||
; mark_close_tag = (function
|
||||
| "atom" | "makefile-action" | "makefile-stuff" -> state := List.tl !state; ""
|
||||
| s -> tfuncs.mark_close_tag s)
|
||||
};
|
||||
Format.pp_set_formatter_out_functions ppf
|
||||
{ ofuncs with
|
||||
out_newline = (fun () ->
|
||||
match !state with
|
||||
| [In_atom; In_makefile_action] ->
|
||||
ofuncs.out_string "\\\n\t" 0 3
|
||||
| [In_atom] ->
|
||||
ofuncs.out_string "\\\n" 0 2
|
||||
| [In_makefile_action] ->
|
||||
ofuncs.out_string " \\\n\t" 0 4
|
||||
| [In_makefile_stuff] ->
|
||||
ofuncs.out_string " \\\n" 0 3
|
||||
| [] ->
|
||||
ofuncs.out_string "\n" 0 1
|
||||
| _ -> assert false)
|
||||
; out_spaces = (fun n ->
|
||||
ofuncs.out_spaces
|
||||
(match !state with
|
||||
| In_atom :: _ -> max 0 (n - 2)
|
||||
| _ -> n))
|
||||
}
|
||||
|
||||
module Loc = Sexp_ast.Loc
|
||||
|
||||
module Ast = struct
|
||||
type t = Sexp_ast.t =
|
||||
| Atom of Loc.t * string
|
||||
| List of Loc.t * t list
|
||||
|
||||
let loc (Atom (loc, _) | List (loc, _)) = loc
|
||||
|
||||
let rec remove_locs : t -> sexp = function
|
||||
| Atom (_, s) -> Atom s
|
||||
| List (_, l) -> List (List.map l ~f:remove_locs)
|
||||
|
||||
module Token = struct
|
||||
type t =
|
||||
| Atom of Loc.t * string
|
||||
| Lparen of Loc.t
|
||||
| Rparen of Loc.t
|
||||
end
|
||||
|
||||
let tokenize =
|
||||
let rec loop acc t =
|
||||
match t with
|
||||
| Atom (loc, s) -> Token.Atom (loc, s) :: acc
|
||||
| List (loc, l) ->
|
||||
let shift (pos : Lexing.position) delta =
|
||||
{ pos with pos_cnum = pos.pos_cnum + delta }
|
||||
in
|
||||
let l_loc = { loc with stop = shift loc.start 1 } in
|
||||
let r_loc = { loc with start = shift loc.stop (-1) } in
|
||||
let acc = Token.Lparen l_loc :: acc in
|
||||
let acc = List.fold_left l ~init:acc ~f:loop in
|
||||
let acc = Token.Rparen r_loc :: acc in
|
||||
acc
|
||||
in
|
||||
fun t -> loop [] t |> List.rev
|
||||
end
|
||||
|
||||
let rec add_loc t ~loc : Ast.t =
|
||||
match t with
|
||||
| Atom s -> Atom (loc, s)
|
||||
| List l -> List (loc, List.map l ~f:(add_loc ~loc))
|
||||
|
||||
module Parser = struct
|
||||
module Error = A.Error
|
||||
exception Error = A.Parse_error
|
||||
|
||||
module Mode = struct
|
||||
type 'a t = 'a A.mode =
|
||||
| Single : Ast.t t
|
||||
| Many : Ast.t list t
|
||||
end
|
||||
|
||||
module Stack = struct
|
||||
type t = A.stack
|
||||
let empty = A.empty_stack
|
||||
end
|
||||
|
||||
type 'a t = 'a A.state
|
||||
let create ~fname ~mode = A.new_state ~fname mode
|
||||
|
||||
let feed : type a. a A.action = fun state char stack ->
|
||||
let idx = (A.automaton_state state lsl 8) lor (Char.code char) in
|
||||
(* We need an Obj.magic as the type of the array can't be generalized.
|
||||
This problem will go away when we get immutable arrays. *)
|
||||
(Obj.magic (Table.transitions.(idx) : Obj.t A.action) : a A.action) state char stack
|
||||
[@@inline always]
|
||||
|
||||
let feed_eoi : type a. a t -> Stack.t -> a = fun state stack ->
|
||||
let stack =
|
||||
(Obj.magic (Table.transitions_eoi.(A.automaton_state state)
|
||||
: Obj.t A.epsilon_action)
|
||||
: a A.epsilon_action) state stack
|
||||
in
|
||||
A.set_error_state state;
|
||||
match A.mode state with
|
||||
| Mode.Single -> A.sexp_of_stack stack
|
||||
| Mode.Many -> A.sexps_of_stack stack
|
||||
|
||||
let rec feed_substring_unsafe str state stack i stop =
|
||||
if i < stop then
|
||||
let c = String.unsafe_get str i in
|
||||
let stack = feed state c stack in
|
||||
feed_substring_unsafe str state stack (i + 1) stop
|
||||
else
|
||||
stack
|
||||
|
||||
let rec feed_subbytes_unsafe str state stack i stop =
|
||||
if i < stop then
|
||||
let c = Bytes.unsafe_get str i in
|
||||
let stack = feed state c stack in
|
||||
feed_subbytes_unsafe str state stack (i + 1) stop
|
||||
else
|
||||
stack
|
||||
|
||||
let feed_substring state str ~pos ~len stack =
|
||||
let str_len = String.length str in
|
||||
if pos < 0 || len < 0 || pos > str_len - len then
|
||||
invalid_arg "Jbuilder_sexp.feed_substring";
|
||||
feed_substring_unsafe str state stack pos (pos + len)
|
||||
|
||||
let feed_subbytes state str ~pos ~len stack =
|
||||
let str_len = Bytes.length str in
|
||||
if pos < 0 || len < 0 || pos > str_len - len then
|
||||
invalid_arg "Jbuilder_sexp.feed_subbytes";
|
||||
feed_subbytes_unsafe str state stack pos (pos + len)
|
||||
|
||||
let feed_string state str stack =
|
||||
feed_substring_unsafe str state stack 0 (String.length str)
|
||||
|
||||
let feed_bytes state str stack =
|
||||
feed_subbytes_unsafe str state stack 0 (Bytes.length str)
|
||||
end
|
||||
|
||||
let parse_string ~fname ~mode str =
|
||||
let p = Parser.create ~fname ~mode in
|
||||
let stack = Parser.feed_string p str Parser.Stack.empty in
|
||||
Parser.feed_eoi p stack
|
|
@ -0,0 +1,112 @@
|
|||
(** Parsing of s-expressions *)
|
||||
|
||||
module Atom : sig
|
||||
type t = string
|
||||
|
||||
(** Whether the atom must be escaped when serialized *)
|
||||
val must_escape : t -> bool
|
||||
|
||||
(** Escape all special characters in the atom *)
|
||||
val escaped : t -> string
|
||||
|
||||
(** [serialize t] is the serialized representation of [t], so either
|
||||
[t] either [escaped t] surrounded by double quotes. *)
|
||||
val serialize : t -> string
|
||||
end
|
||||
|
||||
module Loc : sig
|
||||
type t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
}
|
||||
end
|
||||
|
||||
(** The S-expression type *)
|
||||
type t =
|
||||
| Atom of string
|
||||
| List of t list
|
||||
|
||||
(** Serialize a S-expression *)
|
||||
val to_string : t -> string
|
||||
|
||||
(** Serialize a S-expression using indentation to improve readability *)
|
||||
val pp : Format.formatter -> t -> unit
|
||||
|
||||
(** Same as [pp], but split long strings. The formatter must have been
|
||||
prepared with [prepare_formatter]. *)
|
||||
val pp_split_strings : Format.formatter -> t -> unit
|
||||
|
||||
(** Prepare a formatter for [pp_split_strings]. Additionaly the
|
||||
formatter escape newlines when the tags "makefile-action" or
|
||||
"makefile-stuff" are active. *)
|
||||
val prepare_formatter : Format.formatter -> unit
|
||||
|
||||
(** Abstract syntax tree *)
|
||||
module Ast : sig
|
||||
type sexp = t
|
||||
type t =
|
||||
| Atom of Loc.t * Atom.t
|
||||
| List of Loc.t * t list
|
||||
|
||||
val loc : t -> Loc.t
|
||||
|
||||
val remove_locs : t -> sexp
|
||||
|
||||
module Token : sig
|
||||
type t =
|
||||
| Atom of Loc.t * string
|
||||
| Lparen of Loc.t
|
||||
| Rparen of Loc.t
|
||||
end
|
||||
|
||||
val tokenize : t -> Token.t list
|
||||
end with type sexp := t
|
||||
|
||||
val add_loc : t -> loc:Loc.t -> Ast.t
|
||||
|
||||
module Parser : sig
|
||||
module Error : sig
|
||||
type t
|
||||
|
||||
val position : t -> Lexing.position
|
||||
val message : t -> string
|
||||
end
|
||||
|
||||
(** Exception raised in case of a parsing error *)
|
||||
exception Error of Error.t
|
||||
|
||||
module Mode : sig
|
||||
type sexp = t
|
||||
type 'a t =
|
||||
| Single : Ast.t t
|
||||
| Many : Ast.t list t
|
||||
end with type sexp := t
|
||||
|
||||
module Stack : sig
|
||||
(** Parser stack. The stack is not in [state] for optimization purposes. *)
|
||||
type t
|
||||
|
||||
val empty : t
|
||||
end
|
||||
|
||||
type 'a t
|
||||
|
||||
(** Create a new parser state. [fname] is the filename the input is from. *)
|
||||
val create : fname:string -> mode:'a Mode.t -> 'a t
|
||||
|
||||
(** Feed one character to the parser. In case of error, it raises [Parse_error] *)
|
||||
val feed : _ t -> char -> Stack.t -> Stack.t
|
||||
|
||||
(** Instruct the parser that the end of input was reached. In case of error, it raises
|
||||
[Parse_error] *)
|
||||
val feed_eoi : 'a t -> Stack.t -> 'a
|
||||
|
||||
(** {3 Convenience functions} *)
|
||||
|
||||
val feed_string : _ t -> string -> Stack.t -> Stack.t
|
||||
val feed_substring : _ t -> string -> pos:int -> len:int -> Stack.t -> Stack.t
|
||||
val feed_bytes : _ t -> bytes -> Stack.t -> Stack.t
|
||||
val feed_subbytes : _ t -> bytes -> pos:int -> len:int -> Stack.t -> Stack.t
|
||||
end
|
||||
|
||||
val parse_string : fname:string -> mode:'a Parser.Mode.t -> string -> 'a
|
Loading…
Reference in New Issue