From c9fddcbb77717f34ff81ce89f6aa0b71eefabcea Mon Sep 17 00:00:00 2001 From: Jeremie Dimino Date: Sun, 13 Nov 2016 10:01:32 +0000 Subject: [PATCH] meta parser --- .gitignore | 2 + src/import.ml | 36 +++++++++++++--- src/kind.ml | 6 +-- src/meta.ml | 100 +++++++++++++++++++++++++++++++++++++++------ src/meta_lexer.mli | 12 ++++++ src/meta_lexer.mll | 48 ++++++++++++++++------ 6 files changed, 172 insertions(+), 32 deletions(-) create mode 100644 .gitignore create mode 100644 src/meta_lexer.mli diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..49193b34 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +jbuild +jbuild.* diff --git a/src/import.ml b/src/import.ml index db7e2d3e..aa2c1996 100644 --- a/src/import.ml +++ b/src/import.ml @@ -28,13 +28,39 @@ let (^/) a b = a ^ "/" ^ b let sprintf = Printf.sprintf +let protectx x ~finally ~f = + match f x with + | y -> finally x; y + | exception e -> finally x; raise e + +let with_file_in fn ~f = + protectx (open_in fn) ~finally:close_in ~f + +let with_lexbuf_from_file fn ~f = + with_file_in fn ~f:(fun ic -> f (Lexing.from_channel ic)) + let lines_of_file fn = - let ic = open_in fn in - let rec loop acc = + let rec loop ic acc = match input_line ic with - | exception End_of_file -> close_in ic; List.rev acc - | line -> loop (line :: acc) + | exception End_of_file -> List.rev acc + | line -> loop ic (line :: acc) in - loop [] + with_file_in fn ~f:(fun ic -> loop ic []) +type location = + { start : Lexing.position + ; stop : Lexing.position + } +let lexeme_loc lb = + { start = Lexing.lexeme_start lb + ; stop = Lexing.lexeme_stop lb + } + +exception File_error of location * string + +let file_error ~loc fmt = + Printf.ksprintf (fun msg -> raise (File_error (loc, msg))) fmt + +let lex_error lb fmt = + file_error ~loc:(lexeme_loc lb) fmt diff --git a/src/kind.ml b/src/kind.ml index 0241efee..4c8c2744 100644 --- a/src/kind.ml +++ b/src/kind.ml @@ -44,7 +44,7 @@ let save kind ~filename x = close_out oc let load kind ~filename = - let ic = open_in filename in - let sexp, _locs = Sexp_lexer.single (Lexing.from_channel ic) in - close_in ic; + let sexp, _locs = + with_lexbuf_from_file filename ~f:Sexp_lexer.single lb + in of_sexp kind sexp diff --git a/src/meta.ml b/src/meta.ml index 7b28826d..cf8d9332 100644 --- a/src/meta.ml +++ b/src/meta.ml @@ -1,20 +1,96 @@ -(** META file representation *) +open Import type t = - { name : string - ; sub : t list - ; defs : def list + { name : string + ; entries : entry list } -and kind = Set | Add +and entry = + | Comment of string + | Var of string * predicate list * action * string + | Package of t -and def = - { kind : kind - ; var : string - ; predicates : predicate list - } +and action = Set | Add and predicate = - | Pos of string - | Neg of string + | P of string (* Present *) + | A of string (* Absent *) + +module Parse = struct + let error = lex_error + + let next = Meta_lexer.token + + let package_name lb = + match next lb with + | String s -> + if String.contains s '.' then + error lb "'.' not allowed in sub-package names"; + s + | _ -> error lb "package name expected" + + let string lb = + match lb with + | String s -> s + | _ -> error lb "string expected" + + let lparen lb = + match next lb with + | Lparen -> () + | _ -> error lb "'(' expected" + + let action lb = + match next lb with + | Equal -> Set + | Plus_equal -> Add + | _ -> error lb "'=' or '+=' expected" + + let comma lb = + match next lb with + | Comma -> () + | _ -> error lb "',' expected" + + let rec predicates_and_action lb acc = + match next lb with + | Rparen -> (List.rev acc, action lb) + | Name n -> comma lb; predicates_and_action lb (P n :: acc) + | Minus -> + let n = + match next lb with + | Name p -> p + | _ -> error lb "name expected" + in + comma lb; + predicates_and_action lb (A n :: acc) + | _ -> error lb "name, '-' or ')' expected" + + let rec entries lb depth acc = + match next lb with + | Rparen -> + if depth > 0 then + List.rev acc + else + error lb "closing parenthesis without matching opening one" + | Name "package" -> + let name = package_name lb in + lparen lb; + let entries = entries lb (depth + 1) [] in + entries lb depth (Package { name; entries } :: acc) + | Name var -> + let preds, action = + match next lb with + | Equal -> ([], Set) + | Plus_equal -> ([], Add) + | Lparen -> predicates_and_action lb [] + | _ -> error lb "'=', '+=' or '(' expected" + in + let value = string lb in + Var (var, preds, action, value) + | _ -> + error lb "'package' or variable name expected" +end + +let parse fn = + with_lexbuf_from_file fn ~f:(fun lb -> + Parse.entries lb 0 []) diff --git a/src/meta_lexer.mli b/src/meta_lexer.mli new file mode 100644 index 00000000..b3a41d6d --- /dev/null +++ b/src/meta_lexer.mli @@ -0,0 +1,12 @@ +type token = + | Name of string + | String of string + | Minus + | Lparen + | Rparen + | Comma + | Equal + | Plus_equal + | Eof + +val token : Lexing.lexbuf -> token diff --git a/src/meta_lexer.mll b/src/meta_lexer.mll index 0ab52225..a9f38984 100644 --- a/src/meta_lexer.mll +++ b/src/meta_lexer.mll @@ -1,15 +1,39 @@ -{ open Meta_parser } +{ +type token = + | Name of string + | String of string + | Minus + | Lparen + | Rparen + | Comma + | Equal + | Plus_equal + | Eof +} rule token = parse - | [' ' '\t']* { token lexbuf } - | '#' [^ '\r' '\n']* { token lexbuf } - | ("\n" | "\r\n") { Lexing.new_line lexbuf; token lexbuf } + | [' ' '\t' '\r']* { token lexbuf } + | '#' [^ '\n']* { token lexbuf } + | '\n' { Lexing.new_line lexbuf; token lexbuf } - | ['A'-'Z' 'a'-'z' '0'-'9' '_' '.']+ as s { NAME s } - | '"' ([^'"']* as s) '"' { STRING s } - | '-' { MINUS } - | '(' { LPAREN } - | ')' { RPAREN } - | ',' { COMMA } - | '=' { EQUAL } - | "+=" { PLUS_EQUAL } + | ['A'-'Z' 'a'-'z' '0'-'9' '_' '.']+ as s { Name s } + | '"' ([^ '\\' '"']* ( '\\' ['\\' '"'] [^ '\\' '"']* )* as s) '"' + { let len = String.length s in + let buf = Buffer.create len in + let rec loop i = + if i = len then + Buffer.contents buf + else + match s.[i] with + | '\\' -> Buffer.add_char s.[i + 1]; loop (i + 2) + | _ -> Buffer.add_char s.[i ]; loop (i + 1) + in + String (loop 0) } + | '-' { Minus } + | '(' { Lparen } + | ')' { Rparen } + | ',' { Comma } + | '=' { Equal } + | "+=" { Plus_equal } + | eof { Eof } + | _ { lex_error lexbuf "invalid character" }