6 files changed, 47 insertions, 13 deletions
diff --git a/bnf.cpp b/bnf.cpp
index f240f29..7290962 100644
--- a/bnf.cpp
+++ b/bnf.cpp
@@ -19,6 +19,26 @@ std::map<std::string, std::set<std::string>> Reverse(BNF bnf)
  return result;
 }
 
+std::map<std::string, std::set<std::string>> reverseFirst(BNF bnf)
+{
+ std::map<std::string, std::set<std::string>> result;
+
+ for (const auto& [from, to] : bnf) {
+  for (const auto& list : to) {
+   if (list.size() > 0) {
+    const auto& element{list[0]};
+    auto i{result.find(element)};
+    if (i != result.end()) // already present
+     i->second.insert(from);
+    else // new element
+     result.emplace(element, std::set{from});
+   }
+  }
+ }
+
+ return result;
+}
+
 BNF SubBNF(const BNF& bnf, const std::string& top)
 {
  BNF result;
diff --git a/bnf.h b/bnf.h
index 56aacb0..b14ca58 100644
--- a/bnf.h
+++ b/bnf.h
@@ -11,6 +11,7 @@ using namespace std::string_literals;
 
 using BNF = std::map<std::string, std::vector<std::vector<std::string>>>;
 
-std::map<std::string, std::set<std::string>> Reverse(BNF bnf);
+std::map<std::string, std::set<std::string>> Reverse(BNF bnf); // unused now, remove?
+std::map<std::string, std::set<std::string>> reverseFirst(BNF bnf);
 
 BNF SubBNF(const BNF& bnf, const std::string& top);
diff --git a/cpp.cpp b/cpp.cpp
index f02b047..fc614be 100644
--- a/cpp.cpp
+++ b/cpp.cpp
@@ -82,6 +82,7 @@ std::pair<index_t, std::vector<TreeNode>> CPP::preprocessing_tokenize(const std:
   {" "}, {"\t"}, {"\n"}, {"\r"}
  };
  Gram::Compiler compiler(bnf, "file");
+ debug = true;
  std::pair<index_t, std::vector<TreeNode>> Tree = compiler.compile(m_charTokens);
 
  debug = true;
@@ -246,12 +247,21 @@ protected:
  }
 };
 
+#if 0
 TEST_F(CppTest, preprocessing_tokenize) {
  CPP cpp;
  auto ppTree = cpp.preprocessing_tokenize("int main() { return 1; }");
 
  cpp.tokens_from_pptokens(ppTree);
 }
+#endif
+
+TEST_F(CppTest, preprocessing_tokenize2) {
+ CPP cpp;
+ auto ppTree = cpp.preprocessing_tokenize("in ma");
+
+ cpp.tokens_from_pptokens(ppTree);
+}
 
 #if 0
 TEST(Cpp, translate) {
diff --git a/grammer.cpp b/grammer.cpp
index 8243fa8..be01adc 100644
--- a/grammer.cpp
+++ b/grammer.cpp
@@ -170,8 +170,8 @@ void Compiler::AddFirstNode()
  root_node_id = 0;
 
  const std::string& child_type = tokens[0].type;
- auto it = ReverseBNF.find(child_type);
- if (it == ReverseBNF.end())
+ auto it = reversedFirst.find(child_type);
+ if (it == reversedFirst.end())
   throw std::runtime_error("Illegal first token: "s + child_type + " ("s + tokens[0].value + ")"s);
 
  std::set<std::string>& alternatives_set {it->second};
@@ -212,8 +212,8 @@ bool Compiler::AddRootNode()
   AddFirstNode();
  } else {
   const std::string& child_type = nodes[root_node_id].type; // starting at old root
-  auto it = ReverseBNF.find(child_type);
-  if (it == ReverseBNF.end()) // this one doesn't have a parent, maybe a start symbol to discard?
+  auto it = reversedFirst.find(child_type);
+  if (it == reversedFirst.end()) // this one doesn't have a parent, maybe a start symbol to discard?
    return false;
 
   index_t old_root_node_id {root_node_id};
@@ -260,6 +260,7 @@ void Compiler::removeTokensUpTo(index_t token_id)
  removeTokensUpTo(token_id, root_node_id);
 }
 
+// operate on node_id
 void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)
 {
  // token_id should be the new tokens_used
@@ -279,7 +280,7 @@ void Compiler::removeTokensUpTo(index_t token_id, index_t node_id)
   }
 
   // recurse from back, to remove tokens from end
-  for (auto i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) {
+  for (int i = child_ids.size() - 1; token_id < tokens_used && i >= 0; i--) {
    if (!ChildIdIsToken(child_ids[i])) {
     removeTokensUpTo(token_id, child_ids[i]);
    }
@@ -376,9 +377,9 @@ std::map<std::string, std::string> Compiler::traverse(const std::string& lower,
 
   auto it {visited.find(current_node)};
   if (it == visited.end()) { // not visited, yet: visit now
-   auto parents_it {ReverseBNF.find(current_node)};
+   auto parents_it {reversedFirst.find(current_node)};
 
-   if (parents_it != ReverseBNF.end()) {
+   if (parents_it != reversedFirst.end()) {
     auto& parents {parents_it->second};
     
     visited[current_node] = current_child;
@@ -441,7 +442,7 @@ index_t Compiler::AddNode(const std::string& child_type, index_t parent_index)
  nodes.emplace_back(TreeNode{parent_index, index, child_type, variant, alternatives, std::vector<int32_t>{}});
  //root stays, tokens_used stays
 
- Debug("AddNode(): "s + parent.type + "->"s + child_type + ": "s + std::to_string(index));
+ Debug("AddNode(): "s + nodes[parent_index].type + "->"s + child_type + ": "s + std::to_string(index));
  DumpTree();
 
  return index;
@@ -490,7 +491,7 @@ bool Compiler::FillTree()
  return true;
 }
 
-Compiler::Compiler(BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}
+Compiler::Compiler(BNF& bnf, const std::string& Top): bnf(bnf), Top(Top), ReverseBNF{Reverse(bnf)}, reversedFirst{reverseFirst(bnf)}
 {
 }
 
diff --git a/grammer.h b/grammer.h
index bdb2718..c2eb705 100644
--- a/grammer.h
+++ b/grammer.h
@@ -38,8 +38,8 @@ private:
  BNF &bnf; // not const for access via operator[]
  const std::string& Top;
 
- std::map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type
-
+ std::map<std::string, std::set<std::string>> ReverseBNF; // possible parent types of a given type; unused now: remove?
+ std::map<std::string, std::set<std::string>> reversedFirst; // possible parent types of first childs of a given type
 
  // Tree specific
  void clear();
diff --git a/test-lexer.cpp b/test-lexer.cpp
index 79b9930..735f670 100644
--- a/test-lexer.cpp
+++ b/test-lexer.cpp
@@ -64,7 +64,7 @@ TEST_F(Test, BNF) {
  // implicit?
  //std::set<std::string> Terminals{"identifier", "=", ";"};
 
- std::string Code{"a = bc ; c = 123 ; esd = Ff ; 1 = XYZ ; "};
+ std::string Code{"a = bc ; c = 123 ; esd = Ff ; "};//1 = XYZ ; "};
  std::vector<Token> tokens_reference{
   {"identifier", "a", { 1, 1} },
   {"preprocessing-op-or-punc", "=", { 1, 3}},
@@ -78,10 +78,12 @@ TEST_F(Test, BNF) {
   {"preprocessing-op-or-punc", "=", { 1, 24}},
   {"identifier", "Ff", { 1, 26}},
   {"preprocessing-op-or-punc", ";", { 1, 29}},
+#if 0
   {"pp-number", "1", { 1, 31}},
   {"preprocessing-op-or-punc", "=", { 1, 33}},
   {"identifier", "XYZ", { 1, 35}},
   {"preprocessing-op-or-punc", ";", { 1, 39}},
+#endif
  };
 
  Lex::Lexer lexer(LexBNF, LexTop);