summaryrefslogtreecommitdiffhomepage
path: root/weblog.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'weblog.cpp')
-rw-r--r--weblog.cpp456
1 files changed, 456 insertions, 0 deletions
diff --git a/weblog.cpp b/weblog.cpp
new file mode 100644
index 0000000..cc2be34
--- /dev/null
+++ b/weblog.cpp
@@ -0,0 +1,456 @@
+#include "weblog.h"
+
+#include "libreichwein/mime.h"
+#include "libreichwein/stringhelper.h"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <regex>
+#include <string>
+
+using namespace std::string_literals;
+namespace fs = std::filesystem;
+namespace pt = boost::property_tree;
+using namespace Reichwein::Mime;
+using namespace Reichwein::Stringhelper;
+
+namespace {
+
+ const size_t number_of_articles_on_front_page {10};
+ const std::string article_filename{"article.data"};
+
+ // Used to return errors by generating response page and HTTP status code
+ std::string HttpStatus(std::string status, std::string message, std::function<plugin_interface_setter_type>& SetResponseHeader)
+ {
+ SetResponseHeader("status", status);
+ SetResponseHeader("content_type", "text/html");
+ return status + " " + message;
+ }
+
+ std::string getFile(const fs::path& filename)
+ {
+ std::ifstream file(filename.string(), std::ios::in | std::ios::binary | std::ios::ate);
+
+ if (file.is_open()) {
+ std::ifstream::pos_type fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ std::string bytes(fileSize, ' ');
+ file.read(reinterpret_cast<char*>(bytes.data()), fileSize);
+
+ return bytes;
+
+ } else {
+ throw std::runtime_error("Opening "s + filename.string() + " for reading");
+ }
+ }
+
+ bool is_index_page(std::string& rel_target)
+ {
+ return (rel_target.size() == 0 || rel_target == "/");
+ }
+
+ bool is_index_file(std::string& rel_target, fs::path& path)
+ {
+ // must be top-level file, recognized as mime_type()
+ return rel_target.find("/") == rel_target.npos && mime_type(path.string()) != "application/text";
+ }
+
+ bool is_article_page(std::string& rel_target, fs::path& path)
+ {
+ return (rel_target.size() >= 2 && rel_target.back() == '/' && fs::is_directory(path));
+ }
+
+ bool is_article_file(std::string& rel_target, fs::path& path)
+ {
+ return (fs::is_regular_file(path) && path.filename().string() != article_filename);
+ }
+
+ struct ArticleInfo
+ {
+ fs::path path;
+ std::string subject;
+ std::string date;
+ };
+
+ // get article metadata from header lines
+ std::unordered_map<std::string, std::string> getMetaData(fs::path path)
+ {
+ if (path.string().size() > 0 && path.string().back() == '/') {
+ std::string s {path.string()};
+ path = s.substr(0, s.size() - 1);
+ }
+ std::unordered_map<std::string, std::string> result;
+
+ std::string pathname{path.filename().string()};
+ // ISO date
+ std::string date{pathname.substr(0, 4) + "-"s + pathname.substr(4, 2) + "-"s + pathname.substr(6, 2)};
+
+ result["Date"] = date;
+
+ fs::path filepath {path / article_filename};
+
+ std::ifstream file(filepath.string(), std::ios::in);
+
+ if (file.is_open()) {
+ std::string line;
+ while (!file.eof()) {
+ std::getline(file, line);
+ if (line.empty()) // found header end
+ break;
+ size_t pos {line.find(": ")};
+ if (pos == line.npos) {
+ std::cerr << "Warning: Found bad header line in " << filepath << ": " << line << std::endl;
+ continue;
+ }
+ result[line.substr(0, pos)] = line.substr(pos + 2);
+ }
+ return result;
+
+ } else {
+ throw std::runtime_error("Opening "s + filepath.string() + " for reading");
+ }
+ }
+
+ std::vector<ArticleInfo> getArticleList(fs::path& path, size_t page)
+ {
+ std::vector<ArticleInfo> result;
+
+ for (auto& year_entry: fs::directory_iterator(path)) {
+ std::string year_entry_filename{year_entry.path().filename().string()};
+ if (fs::is_directory(year_entry)) {
+ for (auto& entry: fs::directory_iterator(year_entry.path())) {
+ auto metaData{getMetaData(entry.path())};
+ result.emplace_back(ArticleInfo{entry.path(), metaData.at("Subject"), metaData.at("Date")});
+ }
+ }
+ }
+
+ size_t index0{std::min(number_of_articles_on_front_page * (page), result.size())};
+ size_t index1{std::min(number_of_articles_on_front_page * (page + 1), result.size())};
+ // sort backwards
+ std::partial_sort(result.begin(), result.begin() + index1, result.end(), [](const ArticleInfo& a0, const ArticleInfo& a1){ return a0.path.string() > a1.path.string();});
+
+ return {result.begin() + index0, result.begin() + index1};
+ }
+
+ std::string plainTextFromPTree(const pt::ptree& tree)
+ {
+ std::string result;
+
+ for (auto child: tree) {
+ if (child.first == "<xmltext>")
+ result += child.second.data();
+ else
+ result += plainTextFromPTree(child.second);
+ }
+
+ return result;
+ }
+
+ // returns plain text of string (html xml elements removed)
+ std::string plainTextFromHTML(const std::string& text)
+ {
+ pt::ptree tree;
+
+ std::istringstream ss{text};
+ pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text);
+
+ return plainTextFromPTree(tree);
+ }
+
+ std::string verbatimText(std::string text)
+ {
+ boost::algorithm::replace_all(text, "<", "&lt;");
+ boost::algorithm::replace_all(text, ">", "&gt;");
+
+ return "<pre>"s + text + "</pre>";
+ }
+
+ std::regex re{"\\.[[:space:]\"]", std::regex::basic};
+
+ // returns teaser of article in plain text
+ std::string shortVersion(const fs::path& path)
+ {
+ std::string article {getFile(path / article_filename)};
+ size_t pos0 {article.find("\n\n")};
+ if (pos0 == article.npos)
+ return "";
+
+ article = "<file>" + article.substr(pos0 + 2) + "</file>";
+
+ auto metaData{getMetaData(path)};
+ auto it {metaData.find("Content-Type")};
+
+ // convert html to plaintext, if tagged as such
+ // default: text/html
+ if (it == metaData.end() || it->second == "text/html")
+ article = plainTextFromHTML(article);
+
+ size_t pos{1000};
+
+ std::smatch match;
+ if (std::regex_search(article, match, re)) {
+ pos = std::min(pos, static_cast<size_t>(match.position() + match.length()));
+ }
+
+ return article.substr(0, pos);
+ }
+
+ class HtmlPage
+ {
+ std::function<std::string(const std::string& key)>& mGetRequestParam;
+ std::string mContents;
+ std::string mHeader;
+ const std::string mFooter;
+
+ public:
+ HtmlPage(std::function<std::string(const std::string& key)>& GetRequestParam,
+ std::string s = ""s)
+ : mGetRequestParam(GetRequestParam)
+ , mContents(s)
+ , mHeader("<!DOCTYPE html><html><head>"
+ "<meta charset=\"utf-8\"/>"
+ "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">"
+ "<title>" + GetRequestParam("WEBLOG_NAME") + "</title>"
+ "<meta name=\"keywords\" content=\"" + GetRequestParam("WEBLOG_KEYWORDS") + "\"/>"
+ "<link rel=\"shortcut icon\" href=\"" + mGetRequestParam("plugin_path") + "/favicon.ico\" type=\"image/x-icon\"/>"
+ "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + mGetRequestParam("plugin_path") + "/blog.css\"/>"
+ "</head><body><div class=\"page\">")
+ , mFooter("<br/><br/><br/><div class=\"impressum\"><a href=\"" + mGetRequestParam("plugin_path") + "/impressum.html\">Impressum, Datenschutzerklärung</a></div></div></body></html>")
+ {
+ }
+
+ HtmlPage& operator+=(const std::string& s)
+ {
+ mContents += s;
+ return *this;
+ }
+
+ operator std::string() const
+ {
+ return mHeader + mContents + mFooter;
+ }
+ };
+
+ std::string generateIndexPage(fs::path& path,
+ std::function<std::string(const std::string& key)>& GetRequestParam,
+ std::function<plugin_interface_setter_type>& SetResponseHeader,
+ size_t page)
+ {
+ try {
+ if (page > std::numeric_limits<int>::max())
+ throw std::runtime_error("Bad page index: "s + std::to_string(page));
+
+ HtmlPage htmlPage{GetRequestParam, "<h1>"s + GetRequestParam("WEBLOG_NAME") + "</h1>"s};
+
+ fs::path link{ GetRequestParam("plugin_path")};
+
+ auto list{getArticleList(path, page)};
+ if (list.empty())
+ htmlPage += "(no articles found.)";
+ else {
+ for (const auto& article: list) {
+ std::string linkstart{"<a href=\"" + (link / article.path.filename()).string() + "/\">"};
+ std::string linkend{"</a>"};
+ htmlPage += "<h2>"s + linkstart + article.subject + linkend + "</h2>"s;
+ htmlPage += "<div class=\"date\">" + article.date + "</div>"s;
+
+ auto sv{shortVersion(article.path)};
+ if (sv.size()) {
+ htmlPage += sv + " "s + linkstart + "more..." + linkend;
+ }
+ }
+ htmlPage += "<br/><br/><br/>";
+ if (page > 0)
+ htmlPage += "<a href=\"?page="s + std::to_string(page - 1) + "\">&lt;&lt;newer</a> "s;
+ htmlPage += "page "s + std::to_string(page + 1);
+ if (list.size() == number_of_articles_on_front_page)
+ htmlPage += " <a href=\"?page="s + std::to_string(page + 1) + "\">older&gt;&gt;</a>"s;
+ htmlPage += "<br/>";
+ }
+ SetResponseHeader("cache_control", "no-store");
+ return htmlPage;
+ } catch (const std::exception& ex) {
+ return HttpStatus("500", "Reading Index page: "s + ex.what(), SetResponseHeader);
+ }
+ }
+
+ std::string generateArticlePage(fs::path& path,
+ std::function<std::string(const std::string& key)>& GetRequestParam,
+ std::function<plugin_interface_setter_type>& SetResponseHeader)
+ {
+ try {
+ auto metaData{getMetaData(path)};
+
+ std::string data { getFile(path / article_filename)};
+
+ size_t pos {data.find("\n\n")};
+ if (pos == data.npos)
+ throw std::runtime_error("Error parsing article");
+
+ data = data.substr(pos + 2);
+
+ auto it {metaData.find("Content-Type")};
+ if (it != metaData.end() && it->second == "text/plain")
+ data = verbatimText(data);
+
+ HtmlPage htmlPage{GetRequestParam, "<h1>"s + metaData.at("Subject") + "</h1>"
+ "<div class=\"date\">" + metaData.at("Date") + "</div>"
+ "<br/><br/>"s + data + "<br/>&squ;"};
+
+ return htmlPage;
+ } catch (const std::exception& ex) {
+ return HttpStatus("500", "Reading Article: "s + ex.what(), SetResponseHeader);
+ }
+ }
+
+ std::string generateStaticFile(fs::path& path, std::function<plugin_interface_setter_type>& SetResponseHeader)
+ {
+ try {
+ SetResponseHeader("content_type", mime_type(path.string()));
+ return getFile(path);
+ } catch (const std::exception& ex) {
+ return HttpStatus("500", "Reading Article file: "s + ex.what(), SetResponseHeader);
+ }
+ }
+
+ std::string urlDecode(std::string s)
+ {
+ std::string result;
+
+ size_t pos = 0;
+ while (pos < s.size()) {
+ char c {s[pos]};
+ if (c == '+') {
+ result += ' ';
+ } else if (c == '%' && pos + 2 < s.size()) {
+ try {
+ int i = stoi(s.substr(pos + 1, 2), 0, 16);
+ if (i < 0 || i > 255)
+ return result;
+
+ result += static_cast<char>(i);
+ } catch (...) {
+ return result;
+ }
+
+ pos += 2;
+ } else {
+ result += c;
+ }
+ pos++;
+ }
+
+ return result;
+ }
+
+ std::unordered_map<std::string, std::string> SplitQueryString(std::string& s)
+ {
+ std::unordered_map<std::string, std::string> result;
+
+ size_t qpos = s.find('?');
+ if (qpos != s.npos) {
+ auto list {split(s.substr(qpos + 1), "&")};
+ for (auto i: list) {
+ size_t apos = i.find('=');
+ if (apos != i.npos) {
+ result[urlDecode(i.substr(0, apos))] = urlDecode(i.substr(apos + 1));
+ }
+ }
+ }
+
+ s = s.substr(0, qpos);
+
+ return result;
+ }
+
+} // anonymous namespace
+
+std::string weblog_plugin::name()
+{
+ return "weblog";
+}
+
+weblog_plugin::weblog_plugin()
+{
+ //std::cout << "Plugin constructor" << std::endl;
+}
+
+weblog_plugin::~weblog_plugin()
+{
+ //std::cout << "Plugin destructor" << std::endl;
+}
+
+std::string weblog_plugin::generate_page(
+ std::function<std::string(const std::string& key)>& GetServerParam,
+ std::function<std::string(const std::string& key)>& GetRequestParam, // request including body (POST...)
+ std::function<void(const std::string& key, const std::string& value)>& SetResponseHeader // to be added to result string
+)
+{
+ try {
+ // Make sure we can handle the method
+ std::string method {GetRequestParam("method")};
+ if (method != "GET" && method != "HEAD")
+ return HttpStatus("400", "Unknown HTTP method", SetResponseHeader);
+
+ // Request path must not contain "..".
+ std::string rel_target{GetRequestParam("rel_target")};
+ std::string target{GetRequestParam("target")};
+ if (rel_target.find("..") != std::string::npos) {
+ return HttpStatus("400", "Illegal request: "s + target, SetResponseHeader);
+ }
+
+ std::unordered_map<std::string, std::string> query { SplitQueryString(rel_target) };
+
+ // Build the path to the requested file
+ std::string doc_root{GetRequestParam("doc_root")};
+ if (rel_target.size() >= 4 && std::all_of(rel_target.begin(), rel_target.begin() + 4, isdigit)) {
+ rel_target = rel_target.substr(0, 4) + "/" + rel_target;
+ }
+ fs::path path {fs::path{doc_root} / rel_target};
+ if (target.size() && target.back() != '/' && fs::is_directory(path)) {
+ std::string location{GetRequestParam("location") + "/"s};
+ SetResponseHeader("location", location);
+ return HttpStatus("301", "Correcting directory path", SetResponseHeader);
+ }
+
+ SetResponseHeader("content_type", "text/html");
+
+ size_t page {0};
+ auto it {query.find("page")};
+ if (it != query.end()) {
+ try {
+ page = stoul(it->second);
+ } catch(...) {
+ // ignore: keep default 0
+ }
+ }
+
+ if (is_index_page(rel_target))
+ return generateIndexPage(path, GetRequestParam, SetResponseHeader, page);
+
+ if (is_article_page(rel_target, path))
+ return generateArticlePage(path, GetRequestParam, SetResponseHeader);
+
+ if (is_index_file(rel_target, path) || is_article_file(rel_target, path))
+ return generateStaticFile(path, SetResponseHeader);
+
+ return HttpStatus("404", "Bad path specification: "s + rel_target, SetResponseHeader);
+
+ } catch (const std::exception& ex) {
+ return HttpStatus("500", "Unknown Error: "s + ex.what(), SetResponseHeader);
+ }
+}
+
+bool weblog_plugin::has_own_authentication()
+{
+ return false;
+}