#pragma once #include "bnf.h" #include "minicc.h" #include #include #include #include #include class GrammerTest; namespace Gram { struct NodePosition { index_t node_id{}; // 0-based index_t child_pos{}; // 0-based }; // TreeNodes are only intermediate. Terminal symbols don't get TreeNodes // token_id: index into token list // node_id: index into tree node list struct TreeNode { NodePosition pos; // position of this node in tree index_t node_id{}; std::string type; index_t variant; // bnf[type][variant] std::vector child_ids; // < 0: terminal: token_id; > 0: non-terminal: node_id; = 0: unset }; class Compiler { private: // The result std::vector nodes; // Input std::vector tokens; BNF &bnf; // not const for access via operator[] const std::string m_top; //std::unordered_map> ReverseBNF; // possible parent types of a given type; unused now: remove? //std::unordered_map> reversedFirst; // possible parent types of first childs of a given type std::unordered_map, PairHash>> reversedPosFirst; // possible parent types of first childs of a given type std::deque symbol_variants; decltype(symbol_variants)::iterator symbol_variants_it; // Tree specific void clear(); // Node specific std::string GetTypeOfNode(index_t node_id) const; index_t AddNode(const std::string& type, index_t variant, NodePosition pos = {}); // Adds actually used Non-Terminal Symbol Removes it on scope exit (RAII) class AddNodeGuard { Compiler& m_compiler; public: AddNodeGuard(Compiler& compiler, index_t variant); ~AddNodeGuard(); }; void IncNodePosition(NodePosition& pos); // top-down algorithm std::unordered_map m_min; // cache for MinimumSymbolsNeeded size_t minimumSymbolsNeeded(const std::string& symbol); size_t minimumSymbolsNeeded(const std::vector& symbol_list); bool match(std::string symbol, size_t begin, size_t end); bool match(std::vector symbol_list, size_t begin, size_t end); // start / end cache struct TupleHash { size_t operator()(const std::tuple& t) const noexcept { size_t h0 {std::hash{}(std::get<0>(t))}; size_t h1 {std::hash{}(std::get<1>(t))}; size_t h2 {std::hash{}(std::get<2>(t))}; return h0 ^ (h1 << 1) ^ (h2 << 2); } }; std::unordered_set, TupleHash> m_start_cache; bool canStartWith(const std::string& non_terminal, size_t variant, const std::string& terminal) const; void fillStartCache(); void constructTree(); std::vector m_symbol_list; index_t m_symbol_list_pos{}; public: Compiler(BNF& bnf, const std::string& top); std::vector compile(std::vector p_tokens); void DumpTree(); friend class ::GrammerTest; }; bool ChildIdIsEmpty(int32_t child_id); bool ChildIdIsToken(int32_t child_id); bool ChildIdIsNode(int32_t child_id); index_t TokenIdFromChildId(int32_t child_id); int32_t ChildIdFromTokenId(index_t token_id); } // namespace Gram