From 1b6682c78518228b705cab2afd5a9eb595a90bbd Mon Sep 17 00:00:00 2001
From: Roland Reichwein <mail@reichwein.it>
Date: Fri, 24 Apr 2020 11:57:28 +0200
Subject: HTML formatting

---
 TODO                      |  4 +++
 plugins/weblog/weblog.cpp | 82 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/TODO b/TODO
index d727ebe..9e0fa1e 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,7 @@
+weblog: mail procmail
+weblog: link consistency check (cron?)
+weblog: style: zitate
+
 Integrate into Debian
 Ubuntu version
 Request properties: Remote Address, e.g. [::1]:8081 -> ipv6 / ipv4
diff --git a/plugins/weblog/weblog.cpp b/plugins/weblog/weblog.cpp
index b24573f..92162db 100644
--- a/plugins/weblog/weblog.cpp
+++ b/plugins/weblog/weblog.cpp
@@ -9,6 +9,7 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <regex>
 #include <string>
 
 using namespace std::string_literals;
@@ -172,15 +173,29 @@ namespace {
   return {result.begin(), result.begin() + size};
  }
 
+ std::string plainTextFromPTree(const pt::ptree& tree)
+ {
+  std::string result;
+
+  for (auto child: tree) {
+   if (child.first == "<xmltext>")
+    result += child.second.data();
+   else
+    result += plainTextFromPTree(child.second);
+  }
+
+  return result;
+ }
+
  // returns plain text of string (html xml elements removed)
- std::string plainText(const std::string& text)
+ std::string plainTextFromHTML(const std::string& text)
  {
   pt::ptree tree;
 
   std::istringstream ss{text};
-  pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::trim_whitespace);
+  pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text);
 
-  return tree.get<std::string>("file");
+  return plainTextFromPTree(tree);
  }
 
  std::string verbatimText(std::string text)
@@ -191,6 +206,8 @@ namespace {
   return "<pre>"s + text + "</pre>";
  }
 
+ std::regex re{"\\.[[:space:]\"]", std::regex::basic};
+ 
  // returns teaser of article in plain text
  std::string shortVersion(const fs::path& path)
  {
@@ -207,39 +224,68 @@ namespace {
   // convert html to plaintext, if tagged as such
   // default: text/html
   if (it == metaData.end() || it->second == "text/html")
-   article = plainText(article);
+   article = plainTextFromHTML(article);
 
-  size_t pos1 {article.find(".")};
-  
-  size_t num {std::min(static_cast<size_t>(1000), pos1) + 1};
+  size_t pos{1000};
 
-  return article.substr(0, num);
+  std::smatch match;
+  if (std::regex_search(article, match, re)) {
+   pos = std::min(pos, static_cast<size_t>(match.position() + match.length()));
+  }
+  
+  return article.substr(0, pos);
  }
 
+ class HtmlPage
+ {
+  static const std::string header;
+  static const std::string footer;
+  
+  std::string mContents;
+  
+ public:
+  HtmlPage(std::string s = ""s): mContents(s)
+  {
+  }
+
+  HtmlPage& operator+=(const std::string& s)
+  {
+   mContents += s;
+   return *this;
+  }
+
+  operator std::string() const
+  {
+   return header + mContents + footer;
+  }
+ };
+  
+ const std::string HtmlPage::header{"<!DOCTYPE html><html><head><meta charset=\"utf-8\"/></head><body>"};
+ const std::string HtmlPage::footer{"<br/><br/><br/><a href=\"impressum.html\">Impressum, Datenschutzerklärung</a></body></html>"};
+
  std::string generateIndexPage(fs::path& path,
                                std::function<std::string(const std::string& key)>& GetRequestParam,
                                std::function<plugin_interface_setter_type>& SetResponseHeader)
  {
   try {
-   std::string result{"<!DOCTYPE html><html><head><meta charset=\"utf-8\"/></head><body><h1>"s + GetRequestParam("WEBLOG_NAME") + "</h1>"s};
-   
+   HtmlPage htmlPage{"<h1>"s + GetRequestParam("WEBLOG_NAME") + "</h1>"s};
+
    fs::path link{ GetRequestParam("rel_target")};
    
    auto list{getArticleList(path)};
+   if (list.empty())
+    htmlPage += "(no articles found.)";
    for (const auto& article: list) {
     std::string linkstart{"<a href=\"" + (link / article.path.filename()).string() + "/\">"};
     std::string linkend{"</a>"};
-    result += "<h2>"s + linkstart + article.subject + linkend + "</h2>"s + article.date + "<br/>"s;
+    htmlPage += "<h2>"s + linkstart + article.subject + linkend + "</h2>"s + article.date + "<br/>"s;
     
     auto sv{shortVersion(article.path)};
     if (sv.size()) {
-     result += sv + " "s + linkstart + "more..." + linkend;
+     htmlPage += sv + " "s + linkstart + "more..." + linkend;
     }
    }
-   result += "<br/><br/><br/>";
-   result += "<a href=\"impressum.html\">Impressum</a>";
-   result += "</body></html>";
-   return result;
+   return htmlPage;
   } catch (const std::exception& ex) {
    return HttpStatus("500", "Reading Index page: "s + ex.what(), SetResponseHeader);
   }
@@ -263,9 +309,9 @@ namespace {
    if (it != metaData.end() && it->second == "text/plain")
     data = verbatimText(data);
 
-   std::string result { "<!DOCTYPE html><html><head><meta charset=\"utf-8\"/></head><body><h1>"s + metaData.at("Subject") + "</h1>"s + metaData.at("Date") + "<br/><br/>"s + data + "</body></html>"s};
+   HtmlPage htmlPage{"<h1>"s + metaData.at("Subject") + "</h1>"s + metaData.at("Date") + "<br/><br/>"s + data};
 
-   return result;
+   return htmlPage;
   } catch (const std::exception& ex) {
    return HttpStatus("500", "Reading Article: "s + ex.what(), SetResponseHeader);
   }
-- 
cgit v1.2.3