1 files changed, 456 insertions, 0 deletions
diff --git a/weblog.cpp b/weblog.cpp
new file mode 100644
index 0000000..cc2be34
--- /dev/null
+++ b/weblog.cpp
@@ -0,0 +1,456 @@
+#include "weblog.h"
+
+#include "libreichwein/mime.h"
+#include "libreichwein/stringhelper.h"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <regex>
+#include <string>
+
+using namespace std::string_literals;
+namespace fs = std::filesystem;
+namespace pt = boost::property_tree;
+using namespace Reichwein::Mime;
+using namespace Reichwein::Stringhelper;
+
+namespace {
+
+ const size_t number_of_articles_on_front_page {10};
+ const std::string article_filename{"article.data"};
+
+ // Used to return errors by generating response page and HTTP status code
+ std::string HttpStatus(std::string status, std::string message, std::function<plugin_interface_setter_type>& SetResponseHeader)
+ {
+  SetResponseHeader("status", status);
+  SetResponseHeader("content_type", "text/html");
+  return status + " " + message;
+ }
+
+ std::string getFile(const fs::path& filename)
+ {
+  std::ifstream file(filename.string(), std::ios::in | std::ios::binary | std::ios::ate);
+
+  if (file.is_open()) {
+   std::ifstream::pos_type fileSize = file.tellg();
+   file.seekg(0, std::ios::beg);
+
+   std::string bytes(fileSize, ' ');
+   file.read(reinterpret_cast<char*>(bytes.data()), fileSize);
+
+   return bytes;
+
+  } else {
+   throw std::runtime_error("Opening "s + filename.string() + " for reading");
+  }
+ }
+
+ bool is_index_page(std::string& rel_target)
+ {
+  return (rel_target.size() == 0 || rel_target == "/");
+ }
+
+ bool is_index_file(std::string& rel_target, fs::path& path)
+ {
+  // must be top-level file, recognized as mime_type()
+  return rel_target.find("/") == rel_target.npos && mime_type(path.string()) != "application/text";
+ }
+
+ bool is_article_page(std::string& rel_target, fs::path& path)
+ {
+  return (rel_target.size() >= 2 && rel_target.back() == '/' && fs::is_directory(path));
+ }
+
+ bool is_article_file(std::string& rel_target, fs::path& path)
+ {
+  return (fs::is_regular_file(path) && path.filename().string() != article_filename);
+ }
+
+ struct ArticleInfo
+ {
+  fs::path path;
+  std::string subject;
+  std::string date;
+ };
+
+ // get article metadata from header lines
+ std::unordered_map<std::string, std::string> getMetaData(fs::path path)
+ {
+  if (path.string().size() > 0 && path.string().back() == '/') {
+   std::string s {path.string()};
+   path = s.substr(0, s.size() - 1);
+  }
+  std::unordered_map<std::string, std::string> result;
+
+  std::string pathname{path.filename().string()};
+  // ISO date
+  std::string date{pathname.substr(0, 4) + "-"s + pathname.substr(4, 2) + "-"s + pathname.substr(6, 2)};
+
+  result["Date"] = date;
+
+  fs::path filepath {path / article_filename};
+
+  std::ifstream file(filepath.string(), std::ios::in);
+
+  if (file.is_open()) {
+   std::string line;
+   while (!file.eof()) {
+    std::getline(file, line);
+    if (line.empty()) // found header end
+     break;
+    size_t pos {line.find(": ")};
+    if (pos == line.npos) {
+     std::cerr << "Warning: Found bad header line in " << filepath << ": " << line << std::endl;
+     continue;
+    }
+    result[line.substr(0, pos)] = line.substr(pos + 2);
+   }
+   return result;
+
+  } else {
+   throw std::runtime_error("Opening "s + filepath.string() + " for reading");
+  }
+ }
+
+ std::vector<ArticleInfo> getArticleList(fs::path& path, size_t page)
+ {
+  std::vector<ArticleInfo> result;
+
+  for (auto& year_entry: fs::directory_iterator(path)) {
+   std::string year_entry_filename{year_entry.path().filename().string()};
+   if (fs::is_directory(year_entry)) {
+    for (auto& entry: fs::directory_iterator(year_entry.path())) {
+     auto metaData{getMetaData(entry.path())};
+     result.emplace_back(ArticleInfo{entry.path(), metaData.at("Subject"), metaData.at("Date")});
+    }
+   }
+  }
+
+  size_t index0{std::min(number_of_articles_on_front_page * (page), result.size())};
+  size_t index1{std::min(number_of_articles_on_front_page * (page + 1), result.size())};
+  // sort backwards
+  std::partial_sort(result.begin(), result.begin() + index1, result.end(), [](const ArticleInfo& a0, const ArticleInfo& a1){ return a0.path.string() > a1.path.string();});
+  
+  return {result.begin() + index0, result.begin() + index1};
+ }
+
+ std::string plainTextFromPTree(const pt::ptree& tree)
+ {
+  std::string result;
+
+  for (auto child: tree) {
+   if (child.first == "<xmltext>")
+    result += child.second.data();
+   else
+    result += plainTextFromPTree(child.second);
+  }
+
+  return result;
+ }
+
+ // returns plain text of string (html xml elements removed)
+ std::string plainTextFromHTML(const std::string& text)
+ {
+  pt::ptree tree;
+
+  std::istringstream ss{text};
+  pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text);
+
+  return plainTextFromPTree(tree);
+ }
+
+ std::string verbatimText(std::string text)
+ {
+  boost::algorithm::replace_all(text, "<", "&lt;");
+  boost::algorithm::replace_all(text, ">", "&gt;");
+
+  return "<pre>"s + text + "</pre>";
+ }
+
+ std::regex re{"\\.[[:space:]\"]", std::regex::basic};
+ 
+ // returns teaser of article in plain text
+ std::string shortVersion(const fs::path& path)
+ {
+  std::string article {getFile(path / article_filename)};
+  size_t pos0 {article.find("\n\n")};
+  if (pos0 == article.npos)
+   return "";
+
+  article = "<file>" + article.substr(pos0 + 2) + "</file>";
+
+  auto metaData{getMetaData(path)};
+  auto it {metaData.find("Content-Type")};
+
+  // convert html to plaintext, if tagged as such
+  // default: text/html
+  if (it == metaData.end() || it->second == "text/html")
+   article = plainTextFromHTML(article);
+
+  size_t pos{1000};
+
+  std::smatch match;
+  if (std::regex_search(article, match, re)) {
+   pos = std::min(pos, static_cast<size_t>(match.position() + match.length()));
+  }
+  
+  return article.substr(0, pos);
+ }
+
+ class HtmlPage
+ {
+  std::function<std::string(const std::string& key)>& mGetRequestParam;
+  std::string mContents;
+  std::string mHeader;
+  const std::string mFooter;
+  
+ public:
+  HtmlPage(std::function<std::string(const std::string& key)>& GetRequestParam,
+           std::string s = ""s)
+   : mGetRequestParam(GetRequestParam)
+   , mContents(s)
+   , mHeader("<!DOCTYPE html><html><head>"
+             "<meta charset=\"utf-8\"/>"
+             "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">"
+             "<title>" + GetRequestParam("WEBLOG_NAME") + "</title>"
+             "<meta name=\"keywords\" content=\"" + GetRequestParam("WEBLOG_KEYWORDS") + "\"/>"
+             "<link rel=\"shortcut icon\" href=\"" + mGetRequestParam("plugin_path") + "/favicon.ico\" type=\"image/x-icon\"/>"
+	     "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + mGetRequestParam("plugin_path") + "/blog.css\"/>"
+             "</head><body><div class=\"page\">")
+   , mFooter("<br/><br/><br/><div class=\"impressum\"><a href=\"" + mGetRequestParam("plugin_path") + "/impressum.html\">Impressum, Datenschutzerklärung</a></div></div></body></html>")
+  {
+  }
+
+  HtmlPage& operator+=(const std::string& s)
+  {
+   mContents += s;
+   return *this;
+  }
+
+  operator std::string() const
+  {
+   return mHeader + mContents + mFooter;
+  }
+ };
+
+ std::string generateIndexPage(fs::path& path,
+                               std::function<std::string(const std::string& key)>& GetRequestParam,
+                               std::function<plugin_interface_setter_type>& SetResponseHeader,
+                               size_t page)
+ {
+  try {
+   if (page > std::numeric_limits<int>::max())
+    throw std::runtime_error("Bad page index: "s + std::to_string(page));
+
+   HtmlPage htmlPage{GetRequestParam, "<h1>"s + GetRequestParam("WEBLOG_NAME") + "</h1>"s};
+
+   fs::path link{ GetRequestParam("plugin_path")};
+   
+   auto list{getArticleList(path, page)};
+   if (list.empty())
+    htmlPage += "(no articles found.)";
+   else {
+    for (const auto& article: list) {
+     std::string linkstart{"<a href=\"" + (link / article.path.filename()).string() + "/\">"};
+     std::string linkend{"</a>"};
+     htmlPage += "<h2>"s + linkstart + article.subject + linkend + "</h2>"s;
+     htmlPage += "<div class=\"date\">" + article.date + "</div>"s;
+     
+     auto sv{shortVersion(article.path)};
+     if (sv.size()) {
+      htmlPage += sv + " "s + linkstart + "more..." + linkend;
+     }
+    }
+    htmlPage += "<br/><br/><br/>";
+    if (page > 0)
+     htmlPage += "<a href=\"?page="s + std::to_string(page - 1) + "\">&lt;&lt;newer</a> "s;
+    htmlPage += "page "s + std::to_string(page + 1);
+    if (list.size() == number_of_articles_on_front_page)
+     htmlPage += " <a href=\"?page="s + std::to_string(page + 1) + "\">older&gt;&gt;</a>"s;
+    htmlPage += "<br/>";
+   }
+   SetResponseHeader("cache_control", "no-store");
+   return htmlPage;
+  } catch (const std::exception& ex) {
+   return HttpStatus("500", "Reading Index page: "s + ex.what(), SetResponseHeader);
+  }
+ }
+
+ std::string generateArticlePage(fs::path& path,
+                                 std::function<std::string(const std::string& key)>& GetRequestParam,
+                                 std::function<plugin_interface_setter_type>& SetResponseHeader)
+ {
+  try {
+   auto metaData{getMetaData(path)};
+
+   std::string data { getFile(path / article_filename)};
+
+   size_t pos {data.find("\n\n")};
+   if (pos == data.npos)
+    throw std::runtime_error("Error parsing article");
+   
+   data = data.substr(pos + 2);
+
+   auto it {metaData.find("Content-Type")};
+   if (it != metaData.end() && it->second == "text/plain")
+    data = verbatimText(data);
+
+   HtmlPage htmlPage{GetRequestParam, "<h1>"s + metaData.at("Subject") + "</h1>"
+     "<div class=\"date\">" + metaData.at("Date") + "</div>"
+     "<br/><br/>"s + data + "<br/>&squ;"};
+
+   return htmlPage;
+  } catch (const std::exception& ex) {
+   return HttpStatus("500", "Reading Article: "s + ex.what(), SetResponseHeader);
+  }
+ }
+
+ std::string generateStaticFile(fs::path& path, std::function<plugin_interface_setter_type>& SetResponseHeader)
+ {
+  try {
+   SetResponseHeader("content_type", mime_type(path.string()));
+   return getFile(path);
+  } catch (const std::exception& ex) {
+   return HttpStatus("500", "Reading Article file: "s + ex.what(), SetResponseHeader);
+  }
+ }
+
+ std::string urlDecode(std::string s)
+ {
+  std::string result;
+
+  size_t pos = 0;
+  while (pos < s.size()) {
+   char c {s[pos]};
+   if (c == '+') {
+    result += ' ';
+   } else if (c == '%' && pos + 2 < s.size()) {
+    try {
+     int i = stoi(s.substr(pos + 1, 2), 0, 16);
+     if (i < 0 || i > 255)
+      return result;
+
+     result += static_cast<char>(i);
+    } catch (...) {
+     return result;
+    }
+
+    pos += 2;
+   } else {
+    result += c;
+   }
+   pos++;
+  }
+
+  return result;
+ }
+
+ std::unordered_map<std::string, std::string> SplitQueryString(std::string& s)
+ {
+  std::unordered_map<std::string, std::string> result;
+
+  size_t qpos = s.find('?');
+  if (qpos != s.npos) {
+   auto list {split(s.substr(qpos + 1), "&")};
+   for (auto i: list) {
+    size_t apos = i.find('=');
+    if (apos != i.npos) {
+     result[urlDecode(i.substr(0, apos))] = urlDecode(i.substr(apos + 1));
+    }
+   }
+  }
+
+  s = s.substr(0, qpos);
+  
+  return result;
+ }
+
+} // anonymous namespace
+
+std::string weblog_plugin::name()
+{
+ return "weblog";
+}
+
+weblog_plugin::weblog_plugin()
+{
+ //std::cout << "Plugin constructor" << std::endl;
+}
+
+weblog_plugin::~weblog_plugin()
+{
+ //std::cout << "Plugin destructor" << std::endl;
+}
+
+std::string weblog_plugin::generate_page(
+  std::function<std::string(const std::string& key)>& GetServerParam,
+  std::function<std::string(const std::string& key)>& GetRequestParam, // request including body (POST...)
+  std::function<void(const std::string& key, const std::string& value)>& SetResponseHeader // to be added to result string
+)
+{
+ try {
+  // Make sure we can handle the method
+  std::string method {GetRequestParam("method")};
+  if (method != "GET" && method != "HEAD")
+   return HttpStatus("400", "Unknown HTTP method", SetResponseHeader);
+
+  // Request path must not contain "..".
+  std::string rel_target{GetRequestParam("rel_target")};
+  std::string target{GetRequestParam("target")};
+  if (rel_target.find("..") != std::string::npos) {
+   return HttpStatus("400", "Illegal request: "s + target, SetResponseHeader);
+  }
+
+  std::unordered_map<std::string, std::string> query { SplitQueryString(rel_target) };
+
+  // Build the path to the requested file
+  std::string doc_root{GetRequestParam("doc_root")};
+  if (rel_target.size() >= 4 && std::all_of(rel_target.begin(), rel_target.begin() + 4, isdigit)) {
+   rel_target = rel_target.substr(0, 4) + "/" + rel_target;
+  }
+  fs::path path {fs::path{doc_root} / rel_target};
+  if (target.size() && target.back() != '/' && fs::is_directory(path)) {
+   std::string location{GetRequestParam("location") + "/"s};
+   SetResponseHeader("location", location);
+   return HttpStatus("301", "Correcting directory path", SetResponseHeader);
+  }
+  
+  SetResponseHeader("content_type", "text/html");
+  
+  size_t page {0};
+  auto it {query.find("page")};
+  if (it != query.end()) {
+   try {
+    page = stoul(it->second);
+   } catch(...) {
+    // ignore: keep default 0
+   }
+  }
+
+  if (is_index_page(rel_target))
+   return generateIndexPage(path, GetRequestParam, SetResponseHeader, page);
+
+  if (is_article_page(rel_target, path))
+   return generateArticlePage(path, GetRequestParam, SetResponseHeader);
+
+  if (is_index_file(rel_target, path) || is_article_file(rel_target, path))
+   return generateStaticFile(path, SetResponseHeader);
+
+  return HttpStatus("404", "Bad path specification: "s + rel_target, SetResponseHeader);
+
+ } catch (const std::exception& ex) {
+  return HttpStatus("500", "Unknown Error: "s + ex.what(), SetResponseHeader);
+ }
+}
+
+bool weblog_plugin::has_own_authentication()
+{
+ return false;
+}