| #pragma once |
|
|
| #include <nlohmann/json_fwd.hpp> |
|
|
| #include <memory> |
| #include <unordered_map> |
| #include <string> |
| #include <string_view> |
| #include <functional> |
| #include <vector> |
| #include <variant> |
|
|
| struct common_grammar_builder; |
|
|
| class common_peg_parser_builder; |
|
|
| using common_peg_parser_id = size_t; |
| constexpr common_peg_parser_id COMMON_PEG_INVALID_PARSER_ID = static_cast<common_peg_parser_id>(-1); |
|
|
| using common_peg_ast_id = size_t; |
| constexpr common_peg_ast_id COMMON_PEG_INVALID_AST_ID = static_cast<common_peg_ast_id>(-1); |
|
|
| |
| class common_peg_parser { |
| common_peg_parser_id id_; |
| common_peg_parser_builder & builder_; |
|
|
| public: |
| common_peg_parser(const common_peg_parser & other) : id_(other.id_), builder_(other.builder_) {} |
| common_peg_parser(common_peg_parser_id id, common_peg_parser_builder & builder) : id_(id), builder_(builder) {} |
|
|
| common_peg_parser & operator=(const common_peg_parser & other); |
| common_peg_parser & operator+=(const common_peg_parser & other); |
| common_peg_parser & operator|=(const common_peg_parser & other); |
|
|
| operator common_peg_parser_id() const { return id_; } |
| common_peg_parser_id id() const { return id_; } |
|
|
| common_peg_parser_builder & builder() const { return builder_; } |
|
|
| |
| common_peg_parser operator+(const common_peg_parser & other) const; |
|
|
| |
| common_peg_parser operator<<(const common_peg_parser & other) const; |
|
|
| |
| common_peg_parser operator|(const common_peg_parser & other) const; |
|
|
| common_peg_parser operator+(const char * str) const; |
| common_peg_parser operator+(const std::string & str) const; |
| common_peg_parser operator<<(const char * str) const; |
| common_peg_parser operator<<(const std::string & str) const; |
| common_peg_parser operator|(const char * str) const; |
| common_peg_parser operator|(const std::string & str) const; |
| }; |
|
|
| common_peg_parser operator+(const char * str, const common_peg_parser & p); |
| common_peg_parser operator+(const std::string & str, const common_peg_parser & p); |
| common_peg_parser operator<<(const char * str, const common_peg_parser & p); |
| common_peg_parser operator<<(const std::string & str, const common_peg_parser & p); |
| common_peg_parser operator|(const char * str, const common_peg_parser & p); |
| common_peg_parser operator|(const std::string & str, const common_peg_parser & p); |
|
|
| enum common_peg_parse_result_type { |
| COMMON_PEG_PARSE_RESULT_FAIL = 0, |
| COMMON_PEG_PARSE_RESULT_SUCCESS = 1, |
| COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT = 2, |
| }; |
|
|
| const char * common_peg_parse_result_type_name(common_peg_parse_result_type type); |
|
|
| struct common_peg_ast_node { |
| common_peg_ast_id id; |
| std::string rule; |
| std::string tag; |
| size_t start; |
| size_t end; |
| std::string_view text; |
| std::vector<common_peg_ast_id> children; |
|
|
| bool is_partial = false; |
| }; |
|
|
| struct common_peg_parse_result; |
|
|
| using common_peg_ast_visitor = std::function<void(const common_peg_ast_node & node)>; |
|
|
| class common_peg_ast_arena { |
| std::vector<common_peg_ast_node> nodes_; |
| public: |
| common_peg_ast_id add_node( |
| const std::string & rule, |
| const std::string & tag, |
| size_t start, |
| size_t end, |
| std::string_view text, |
| std::vector<common_peg_ast_id> children, |
| bool is_partial = false |
| ) { |
| common_peg_ast_id id = nodes_.size(); |
| nodes_.push_back({id, rule, tag, start, end, text, std::move(children), is_partial}); |
| return id; |
| } |
|
|
| const common_peg_ast_node & get(common_peg_ast_id id) const { return nodes_.at(id); } |
|
|
| size_t size() const { return nodes_.size(); } |
|
|
| void clear() { nodes_.clear(); } |
|
|
| void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const; |
| void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const; |
| }; |
|
|
| struct common_peg_parse_result { |
| common_peg_parse_result_type type = COMMON_PEG_PARSE_RESULT_FAIL; |
| size_t start = 0; |
| size_t end = 0; |
|
|
| std::vector<common_peg_ast_id> nodes; |
|
|
| common_peg_parse_result() = default; |
|
|
| common_peg_parse_result(common_peg_parse_result_type type, size_t start) |
| : type(type), start(start), end(start) {} |
|
|
| common_peg_parse_result(common_peg_parse_result_type type, size_t start, size_t end) |
| : type(type), start(start), end(end) {} |
|
|
| common_peg_parse_result(common_peg_parse_result_type type, size_t start, size_t end, std::vector<common_peg_ast_id> nodes) |
| : type(type), start(start), end(end), nodes(std::move(nodes)) {} |
|
|
| bool fail() const { return type == COMMON_PEG_PARSE_RESULT_FAIL; } |
| bool need_more_input() const { return type == COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT; } |
| bool success() const { return type == COMMON_PEG_PARSE_RESULT_SUCCESS; } |
| }; |
|
|
| struct common_peg_parse_context { |
| std::string input; |
| bool is_partial; |
| common_peg_ast_arena ast; |
|
|
| int parse_depth; |
|
|
| common_peg_parse_context() |
| : is_partial(false), parse_depth(0) {} |
|
|
| common_peg_parse_context(const std::string & input) |
| : input(input), is_partial(false), parse_depth(0) {} |
|
|
| common_peg_parse_context(const std::string & input, bool is_partial) |
| : input(input), is_partial(is_partial), parse_depth(0) {} |
| }; |
|
|
| class common_peg_arena; |
|
|
| |
| struct common_peg_epsilon_parser {}; |
|
|
| struct common_peg_start_parser {}; |
|
|
| struct common_peg_end_parser {}; |
|
|
| struct common_peg_literal_parser { |
| std::string literal; |
| }; |
|
|
| struct common_peg_sequence_parser { |
| std::vector<common_peg_parser_id> children; |
| }; |
|
|
| struct common_peg_choice_parser { |
| std::vector<common_peg_parser_id> children; |
| }; |
|
|
| struct common_peg_repetition_parser { |
| common_peg_parser_id child; |
| int min_count; |
| int max_count; |
| }; |
|
|
| struct common_peg_and_parser { |
| common_peg_parser_id child; |
| }; |
|
|
| struct common_peg_not_parser { |
| common_peg_parser_id child; |
| }; |
|
|
| struct common_peg_any_parser {}; |
|
|
| struct common_peg_space_parser {}; |
|
|
| struct common_peg_chars_parser { |
| struct char_range { |
| uint32_t start; |
| uint32_t end; |
| bool contains(uint32_t codepoint) const { return codepoint >= start && codepoint <= end; } |
| }; |
|
|
| std::string pattern; |
| std::vector<char_range> ranges; |
| bool negated; |
| int min_count; |
| int max_count; |
| }; |
|
|
| struct common_peg_json_string_parser {}; |
|
|
| struct common_peg_until_parser { |
| std::vector<std::string> delimiters; |
| }; |
|
|
| struct common_peg_schema_parser { |
| common_peg_parser_id child; |
| std::string name; |
| std::shared_ptr<nlohmann::ordered_json> schema; |
|
|
| |
| bool raw; |
| }; |
|
|
| struct common_peg_rule_parser { |
| std::string name; |
| common_peg_parser_id child; |
| bool trigger; |
| }; |
|
|
| struct common_peg_ref_parser { |
| std::string name; |
| }; |
|
|
| struct common_peg_atomic_parser { |
| common_peg_parser_id child; |
| }; |
|
|
| struct common_peg_tag_parser { |
| common_peg_parser_id child; |
| std::string tag; |
| }; |
|
|
| |
| using common_peg_parser_variant = std::variant< |
| common_peg_epsilon_parser, |
| common_peg_start_parser, |
| common_peg_end_parser, |
| common_peg_literal_parser, |
| common_peg_sequence_parser, |
| common_peg_choice_parser, |
| common_peg_repetition_parser, |
| common_peg_and_parser, |
| common_peg_not_parser, |
| common_peg_any_parser, |
| common_peg_space_parser, |
| common_peg_chars_parser, |
| common_peg_json_string_parser, |
| common_peg_until_parser, |
| common_peg_schema_parser, |
| common_peg_rule_parser, |
| common_peg_ref_parser, |
| common_peg_atomic_parser, |
| common_peg_tag_parser |
| >; |
|
|
| class common_peg_arena { |
| std::vector<common_peg_parser_variant> parsers_; |
| std::unordered_map<std::string, common_peg_parser_id> rules_; |
| common_peg_parser_id root_ = COMMON_PEG_INVALID_PARSER_ID; |
|
|
| public: |
| const common_peg_parser_variant & get(common_peg_parser_id id) const { return parsers_.at(id); } |
| common_peg_parser_variant & get(common_peg_parser_id id) { return parsers_.at(id); } |
|
|
| size_t size() const { return parsers_.size(); } |
| bool empty() const { return parsers_.empty(); } |
|
|
| common_peg_parser_id get_rule(const std::string & name) const; |
| bool has_rule(const std::string & name) const { return rules_.find(name) != rules_.end(); } |
|
|
| common_peg_parser_id root() const { return root_; } |
| void set_root(common_peg_parser_id id) { root_ = id; } |
|
|
| common_peg_parse_result parse(common_peg_parse_context & ctx, size_t start = 0) const; |
| common_peg_parse_result parse(common_peg_parser_id id, common_peg_parse_context & ctx, size_t start) const; |
|
|
| void resolve_refs(); |
|
|
| void build_grammar(const common_grammar_builder & builder, bool lazy = false) const; |
|
|
| std::string dump(common_peg_parser_id id) const; |
|
|
| nlohmann::json to_json() const; |
| static common_peg_arena from_json(const nlohmann::json & j); |
|
|
| std::string save() const; |
| void load(const std::string & data); |
|
|
| friend class common_peg_parser_builder; |
|
|
| private: |
| common_peg_parser_id add_parser(common_peg_parser_variant parser); |
| void add_rule(const std::string & name, common_peg_parser_id id); |
|
|
| common_peg_parser_id resolve_ref(common_peg_parser_id id); |
| }; |
|
|
| class common_peg_parser_builder { |
| common_peg_arena arena_; |
|
|
| common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); } |
| common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); } |
|
|
| public: |
| common_peg_parser_builder(); |
|
|
| |
| |
| common_peg_parser eps() { return add(common_peg_epsilon_parser{}); } |
|
|
| |
| |
| common_peg_parser start() { return add(common_peg_start_parser{}); } |
|
|
| |
| |
| common_peg_parser end() { return add(common_peg_end_parser{}); } |
|
|
| |
| |
| common_peg_parser literal(const std::string & literal) { return add(common_peg_literal_parser{literal}); } |
|
|
| |
| |
| common_peg_parser sequence() { return add(common_peg_sequence_parser{}); } |
| common_peg_parser sequence(const std::vector<common_peg_parser_id> & parsers); |
| common_peg_parser sequence(const std::vector<common_peg_parser> & parsers); |
| common_peg_parser sequence(std::initializer_list<common_peg_parser> parsers); |
|
|
| |
| |
| common_peg_parser choice() { return add(common_peg_choice_parser{}); } |
| common_peg_parser choice(const std::vector<common_peg_parser_id> & parsers); |
| common_peg_parser choice(const std::vector<common_peg_parser> & parsers); |
| common_peg_parser choice(std::initializer_list<common_peg_parser> parsers); |
|
|
| |
| |
| common_peg_parser one_or_more(const common_peg_parser & p) { return repeat(p, 1, -1); } |
|
|
| |
| |
| common_peg_parser zero_or_more(const common_peg_parser & p) { return repeat(p, 0, -1); } |
|
|
| |
| |
| common_peg_parser optional(const common_peg_parser & p) { return repeat(p, 0, 1); } |
|
|
| |
| |
| common_peg_parser peek(const common_peg_parser & p) { return add(common_peg_and_parser{p}); } |
|
|
| |
| |
| common_peg_parser negate(const common_peg_parser & p) { return add(common_peg_not_parser{p}); } |
|
|
| |
| |
| common_peg_parser any() { return add(common_peg_any_parser{}); } |
|
|
| |
| |
| |
| |
| common_peg_parser chars(const std::string & classes, int min = 1, int max = -1); |
|
|
| |
| |
| |
| common_peg_parser ref(const std::string & name) { return add(common_peg_ref_parser{name}); } |
|
|
| |
| |
| common_peg_parser space() { return add(common_peg_space_parser{}); } |
|
|
| |
| |
| common_peg_parser until(const std::string & delimiter) { return add(common_peg_until_parser{{delimiter}}); } |
|
|
| |
| |
| common_peg_parser until_one_of(const std::vector<std::string> & delimiters) { return add(common_peg_until_parser{delimiters}); } |
|
|
| |
| |
| common_peg_parser rest() { return until_one_of({}); } |
|
|
| |
| |
| |
| common_peg_parser repeat(const common_peg_parser & p, int min, int max) { return add(common_peg_repetition_parser{p, min,max}); } |
|
|
| |
| |
| common_peg_parser repeat(const common_peg_parser & p, int n) { return repeat(p, n, n); } |
|
|
| |
| |
| common_peg_parser json(); |
| common_peg_parser json_object(); |
| common_peg_parser json_string(); |
| common_peg_parser json_array(); |
| common_peg_parser json_number(); |
| common_peg_parser json_bool(); |
| common_peg_parser json_null(); |
|
|
| |
| |
| common_peg_parser json_string_content(); |
|
|
| |
| |
| common_peg_parser json_member(const std::string & key, const common_peg_parser & p); |
|
|
| |
| |
| common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false); |
|
|
| |
| |
| |
| common_peg_parser rule(const std::string & name, const common_peg_parser & p, bool trigger = false); |
|
|
| |
| |
| |
| common_peg_parser rule(const std::string & name, const std::function<common_peg_parser()> & builder, bool trigger = false); |
|
|
| |
| |
| common_peg_parser trigger_rule(const std::string & name, const common_peg_parser & p) { return rule(name, p, true); } |
| common_peg_parser trigger_rule(const std::string & name, const std::function<common_peg_parser()> & builder) { return rule(name, builder, true); } |
|
|
| |
| |
| |
| common_peg_parser atomic(const common_peg_parser & p) { return add(common_peg_atomic_parser{p}); } |
|
|
| |
| |
| common_peg_parser tag(const std::string & tag, const common_peg_parser & p) { return add(common_peg_tag_parser{p.id(), tag}); } |
|
|
| void set_root(const common_peg_parser & p); |
|
|
| common_peg_arena build(); |
| }; |
|
|
| |
| common_peg_arena build_peg_parser(const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn); |
|
|