#pragma once #include "bnf.h" #include "minicc.h" #include #include #include #include #include class GrammerTest; namespace Gram { struct NodePosition { index_t node_id{}; // 0-based index_t child_pos{}; // 0-based }; // TreeNodes are only intermediate. Terminal symbols don't get TreeNodes // token_id: index into token list // node_id: index into tree node list struct TreeNode { NodePosition pos; // position of this node in tree (i.e. parent node_id + child_pos in parent) index_t node_id{}; // this node's id std::string type; index_t variant; // bnf[type][variant] std::vector child_ids; // < 0: terminal: token_id; > 0: non-terminal: node_id; = 0: unset }; class Compiler { private: // The result std::vector nodes; // Input std::vector tokens; BNF m_bnf; // not const for access via operator[] const std::string m_top; std::unordered_map, PairHash>> reversedPosFirst; // possible parent types of first childs of a given type std::deque symbol_variants; decltype(symbol_variants)::iterator symbol_variants_it; // Tree specific void clear(); // Node specific std::string GetTypeOfNode(index_t node_id) const; index_t AddNode(const std::string& type, index_t variant, NodePosition pos = {}); // Adds actually used Non-Terminal Symbol Removes it on scope exit (RAII) class AddNodeGuard { Compiler& m_compiler; public: AddNodeGuard(Compiler& compiler, index_t variant); ~AddNodeGuard(); }; void IncNodePosition(NodePosition& pos); // top-down algorithm std::unordered_map m_min; // cache for MinimumSymbolsNeeded size_t minimumSymbolsNeeded(const std::string& symbol); size_t minimumSymbolsNeeded(const std::vector& symbol_list); bool match(std::string symbol, size_t begin, size_t end); bool match(std::vector symbol_list, size_t begin, size_t end); // start / end cache struct TupleHash { size_t operator()(const std::tuple& t) const noexcept { size_t h0 {std::hash{}(std::get<0>(t))}; size_t h1 {std::hash{}(std::get<1>(t))}; size_t h2 {std::hash{}(std::get<2>(t))}; return h0 ^ (h1 << 1) ^ (h2 << 2); } }; // map combination of non-terminal+terminal symbol combination to list of // possible BNF positions for the specified non-terminal symbol std::unordered_map, std::vector, PairHashSS> m_match_lut; std::unordered_map m_empty_lut; // list of non-terminal symbols that can lead to empty token list (maps to variant) void fillStartCache(); void constructTree(); public: Compiler(BNF& bnf, const std::string& top); std::vector compile(std::vector p_tokens); void DumpTree(); friend class ::GrammerTest; }; bool ChildIdIsEmpty(int32_t child_id); bool ChildIdIsToken(int32_t child_id); // negative values bool ChildIdIsNode(int32_t child_id); // identity index_t TokenIdFromChildId(int32_t child_id); int32_t ChildIdFromTokenId(index_t token_id); // only convenience functions index_t NodeIdFromChildId(int32_t child_id); int32_t ChildIdFromNodeId(index_t node_id); } // namespace Gram