#pragma once #include "bnf.h" #include "minicc.h" #include #include #include class GrammerTest; namespace Gram { // TreeNodes are only intermediate. Terminal symbols don't get of TreeNodes // token_id: index into token list // node_id: index into tree node list struct TreeNode { index_t parent_node_id{}; // root if parent_node_id == node_id index_t node_id{}; std::string type; index_t variant; // bnf[type][variant] std::deque> alternatives; // [type][value] alternatives that we can consider on fail. Discard after parsing! std::vector child_ids; // < 0: terminal: token_id; >= 0: non-terminal: node_id; fill token by token }; class Compiler { private: // The result index_t root_node_id{}; std::vector nodes; // Input std::vector tokens; // Helper data index_t tokens_used{0}; // number of tokens used, this is also the next token index to use BNF &bnf; // not const for access via operator[] const std::string m_top; std::unordered_map> ReverseBNF; // possible parent types of a given type; unused now: remove? std::unordered_map> reversedFirst; // possible parent types of first childs of a given type // Tree specific void clear(); // Node specific std::string GetTypeOfNode(index_t node_id) const; bool IsRootNode(index_t node_id) const; bool rootIsStartSymbol() const; bool AllTokensUsed() const; bool treeIsComplete() const; std::vector& getNodeExpectedChilds(index_t node_id); bool subTreeIsComplete(index_t node_id, index_t& to_fill); size_t CommonPrefix(const std::vector& tokens, const std::vector& types); void AddFirstNode(); bool AddRootNode(); void removeTokensUpTo(index_t token_id); void removeTokensUpTo(index_t token_id, index_t node_id); void RemoveLastNode(); void ChangeNodeType(); void TrackBack(); void Validate() const; std::unordered_map traverse(const std::string& lower, const std::string& upper); std::vector GetPath(std::string upper, std::string lower); index_t AddNode(const std::string& child_type, index_t parent_index); void AddPath(const std::vector& path, index_t current_index); bool FillTree(); // top-down algorithm std::unordered_map m_min; // cache size_t minimumSymbolsNeeded(std::string symbol); size_t minimumSymbolsNeeded(std::vector symbol_list); bool match(std::string symbol, size_t begin, size_t end); bool match(std::vector symbol_list, size_t begin, size_t end); public: Compiler(BNF& bnf, const std::string& top); std::pair> compile(std::vector p_tokens); void DumpTree(); friend class ::GrammerTest; }; bool ChildIdIsToken(int32_t child_id); index_t TokenIdFromChildId(int32_t child_id); int32_t ChildIdFromTokenId(index_t token_id); } // namespace Gram