From 4c4a001cacd9c3e1c2552dbcaf50165091a82ac1 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sat, 11 Feb 2023 11:24:22 +0100 Subject: Separated out weblog from webserver --- weblog.cpp | 456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 456 insertions(+) create mode 100644 weblog.cpp (limited to 'weblog.cpp') diff --git a/weblog.cpp b/weblog.cpp new file mode 100644 index 0000000..cc2be34 --- /dev/null +++ b/weblog.cpp @@ -0,0 +1,456 @@ +#include "weblog.h" + +#include "libreichwein/mime.h" +#include "libreichwein/stringhelper.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace std::string_literals; +namespace fs = std::filesystem; +namespace pt = boost::property_tree; +using namespace Reichwein::Mime; +using namespace Reichwein::Stringhelper; + +namespace { + + const size_t number_of_articles_on_front_page {10}; + const std::string article_filename{"article.data"}; + + // Used to return errors by generating response page and HTTP status code + std::string HttpStatus(std::string status, std::string message, std::function& SetResponseHeader) + { + SetResponseHeader("status", status); + SetResponseHeader("content_type", "text/html"); + return status + " " + message; + } + + std::string getFile(const fs::path& filename) + { + std::ifstream file(filename.string(), std::ios::in | std::ios::binary | std::ios::ate); + + if (file.is_open()) { + std::ifstream::pos_type fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::string bytes(fileSize, ' '); + file.read(reinterpret_cast(bytes.data()), fileSize); + + return bytes; + + } else { + throw std::runtime_error("Opening "s + filename.string() + " for reading"); + } + } + + bool is_index_page(std::string& rel_target) + { + return (rel_target.size() == 0 || rel_target == "/"); + } + + bool is_index_file(std::string& rel_target, fs::path& path) + { + // must be top-level file, recognized as mime_type() + return rel_target.find("/") == rel_target.npos && mime_type(path.string()) != "application/text"; + } + + bool is_article_page(std::string& rel_target, fs::path& path) + { + return (rel_target.size() >= 2 && rel_target.back() == '/' && fs::is_directory(path)); + } + + bool is_article_file(std::string& rel_target, fs::path& path) + { + return (fs::is_regular_file(path) && path.filename().string() != article_filename); + } + + struct ArticleInfo + { + fs::path path; + std::string subject; + std::string date; + }; + + // get article metadata from header lines + std::unordered_map getMetaData(fs::path path) + { + if (path.string().size() > 0 && path.string().back() == '/') { + std::string s {path.string()}; + path = s.substr(0, s.size() - 1); + } + std::unordered_map result; + + std::string pathname{path.filename().string()}; + // ISO date + std::string date{pathname.substr(0, 4) + "-"s + pathname.substr(4, 2) + "-"s + pathname.substr(6, 2)}; + + result["Date"] = date; + + fs::path filepath {path / article_filename}; + + std::ifstream file(filepath.string(), std::ios::in); + + if (file.is_open()) { + std::string line; + while (!file.eof()) { + std::getline(file, line); + if (line.empty()) // found header end + break; + size_t pos {line.find(": ")}; + if (pos == line.npos) { + std::cerr << "Warning: Found bad header line in " << filepath << ": " << line << std::endl; + continue; + } + result[line.substr(0, pos)] = line.substr(pos + 2); + } + return result; + + } else { + throw std::runtime_error("Opening "s + filepath.string() + " for reading"); + } + } + + std::vector getArticleList(fs::path& path, size_t page) + { + std::vector result; + + for (auto& year_entry: fs::directory_iterator(path)) { + std::string year_entry_filename{year_entry.path().filename().string()}; + if (fs::is_directory(year_entry)) { + for (auto& entry: fs::directory_iterator(year_entry.path())) { + auto metaData{getMetaData(entry.path())}; + result.emplace_back(ArticleInfo{entry.path(), metaData.at("Subject"), metaData.at("Date")}); + } + } + } + + size_t index0{std::min(number_of_articles_on_front_page * (page), result.size())}; + size_t index1{std::min(number_of_articles_on_front_page * (page + 1), result.size())}; + // sort backwards + std::partial_sort(result.begin(), result.begin() + index1, result.end(), [](const ArticleInfo& a0, const ArticleInfo& a1){ return a0.path.string() > a1.path.string();}); + + return {result.begin() + index0, result.begin() + index1}; + } + + std::string plainTextFromPTree(const pt::ptree& tree) + { + std::string result; + + for (auto child: tree) { + if (child.first == "") + result += child.second.data(); + else + result += plainTextFromPTree(child.second); + } + + return result; + } + + // returns plain text of string (html xml elements removed) + std::string plainTextFromHTML(const std::string& text) + { + pt::ptree tree; + + std::istringstream ss{text}; + pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text); + + return plainTextFromPTree(tree); + } + + std::string verbatimText(std::string text) + { + boost::algorithm::replace_all(text, "<", "<"); + boost::algorithm::replace_all(text, ">", ">"); + + return "

"s + text + "

"; + } + + std::regex re{"\\.[[:space:]\"]", std::regex::basic}; + + // returns teaser of article in plain text + std::string shortVersion(const fs::path& path) + { + std::string article {getFile(path / article_filename)}; + size_t pos0 {article.find("\n\n")}; + if (pos0 == article.npos) + return ""; + + article = "" + article.substr(pos0 + 2) + ""; + + auto metaData{getMetaData(path)}; + auto it {metaData.find("Content-Type")}; + + // convert html to plaintext, if tagged as such + // default: text/html + if (it == metaData.end() || it->second == "text/html") + article = plainTextFromHTML(article); + + size_t pos{1000}; + + std::smatch match; + if (std::regex_search(article, match, re)) { + pos = std::min(pos, static_cast(match.position() + match.length())); + } + + return article.substr(0, pos); + } + + class HtmlPage + { + std::function& mGetRequestParam; + std::string mContents; + std::string mHeader; + const std::string mFooter; + + public: + HtmlPage(std::function& GetRequestParam, + std::string s = ""s) + : mGetRequestParam(GetRequestParam) + , mContents(s) + , mHeader("" + "" + "" + "" + GetRequestParam("WEBLOG_NAME") + "" + "" + "" + "" + "

") + , mFooter("

Impressum, Datenschutzerklärung

") + { + } + + HtmlPage& operator+=(const std::string& s) + { + mContents += s; + return *this; + } + + operator std::string() const + { + return mHeader + mContents + mFooter; + } + }; + + std::string generateIndexPage(fs::path& path, + std::function& GetRequestParam, + std::function& SetResponseHeader, + size_t page) + { + try { + if (page > std::numeric_limits::max()) + throw std::runtime_error("Bad page index: "s + std::to_string(page)); + + HtmlPage htmlPage{GetRequestParam, "

"s + GetRequestParam("WEBLOG_NAME") + "

"s}; + + fs::path link{ GetRequestParam("plugin_path")}; + + auto list{getArticleList(path, page)}; + if (list.empty()) + htmlPage += "(no articles found.)"; + else { + for (const auto& article: list) { + std::string linkstart{""}; + std::string linkend{""}; + htmlPage += "

"s + linkstart + article.subject + linkend + "

"s; + htmlPage += "

" + article.date + "

"s; + + auto sv{shortVersion(article.path)}; + if (sv.size()) { + htmlPage += sv + " "s + linkstart + "more..." + linkend; + } + } + htmlPage += "

"; + if (page > 0) + htmlPage += "<<newer "s; + htmlPage += "page "s + std::to_string(page + 1); + if (list.size() == number_of_articles_on_front_page) + htmlPage += " older>>"s; + htmlPage += "
"; + } + SetResponseHeader("cache_control", "no-store"); + return htmlPage; + } catch (const std::exception& ex) { + return HttpStatus("500", "Reading Index page: "s + ex.what(), SetResponseHeader); + } + } + + std::string generateArticlePage(fs::path& path, + std::function& GetRequestParam, + std::function& SetResponseHeader) + { + try { + auto metaData{getMetaData(path)}; + + std::string data { getFile(path / article_filename)}; + + size_t pos {data.find("\n\n")}; + if (pos == data.npos) + throw std::runtime_error("Error parsing article"); + + data = data.substr(pos + 2); + + auto it {metaData.find("Content-Type")}; + if (it != metaData.end() && it->second == "text/plain") + data = verbatimText(data); + + HtmlPage htmlPage{GetRequestParam, "

"s + metaData.at("Subject") + "

" + "

" + metaData.at("Date") + "

" + "

"s + data + "
□"}; + + return htmlPage; + } catch (const std::exception& ex) { + return HttpStatus("500", "Reading Article: "s + ex.what(), SetResponseHeader); + } + } + + std::string generateStaticFile(fs::path& path, std::function& SetResponseHeader) + { + try { + SetResponseHeader("content_type", mime_type(path.string())); + return getFile(path); + } catch (const std::exception& ex) { + return HttpStatus("500", "Reading Article file: "s + ex.what(), SetResponseHeader); + } + } + + std::string urlDecode(std::string s) + { + std::string result; + + size_t pos = 0; + while (pos < s.size()) { + char c {s[pos]}; + if (c == '+') { + result += ' '; + } else if (c == '%' && pos + 2 < s.size()) { + try { + int i = stoi(s.substr(pos + 1, 2), 0, 16); + if (i < 0 || i > 255) + return result; + + result += static_cast(i); + } catch (...) { + return result; + } + + pos += 2; + } else { + result += c; + } + pos++; + } + + return result; + } + + std::unordered_map SplitQueryString(std::string& s) + { + std::unordered_map result; + + size_t qpos = s.find('?'); + if (qpos != s.npos) { + auto list {split(s.substr(qpos + 1), "&")}; + for (auto i: list) { + size_t apos = i.find('='); + if (apos != i.npos) { + result[urlDecode(i.substr(0, apos))] = urlDecode(i.substr(apos + 1)); + } + } + } + + s = s.substr(0, qpos); + + return result; + } + +} // anonymous namespace + +std::string weblog_plugin::name() +{ + return "weblog"; +} + +weblog_plugin::weblog_plugin() +{ + //std::cout << "Plugin constructor" << std::endl; +} + +weblog_plugin::~weblog_plugin() +{ + //std::cout << "Plugin destructor" << std::endl; +} + +std::string weblog_plugin::generate_page( + std::function& GetServerParam, + std::function& GetRequestParam, // request including body (POST...) + std::function& SetResponseHeader // to be added to result string +) +{ + try { + // Make sure we can handle the method + std::string method {GetRequestParam("method")}; + if (method != "GET" && method != "HEAD") + return HttpStatus("400", "Unknown HTTP method", SetResponseHeader); + + // Request path must not contain "..". + std::string rel_target{GetRequestParam("rel_target")}; + std::string target{GetRequestParam("target")}; + if (rel_target.find("..") != std::string::npos) { + return HttpStatus("400", "Illegal request: "s + target, SetResponseHeader); + } + + std::unordered_map query { SplitQueryString(rel_target) }; + + // Build the path to the requested file + std::string doc_root{GetRequestParam("doc_root")}; + if (rel_target.size() >= 4 && std::all_of(rel_target.begin(), rel_target.begin() + 4, isdigit)) { + rel_target = rel_target.substr(0, 4) + "/" + rel_target; + } + fs::path path {fs::path{doc_root} / rel_target}; + if (target.size() && target.back() != '/' && fs::is_directory(path)) { + std::string location{GetRequestParam("location") + "/"s}; + SetResponseHeader("location", location); + return HttpStatus("301", "Correcting directory path", SetResponseHeader); + } + + SetResponseHeader("content_type", "text/html"); + + size_t page {0}; + auto it {query.find("page")}; + if (it != query.end()) { + try { + page = stoul(it->second); + } catch(...) { + // ignore: keep default 0 + } + } + + if (is_index_page(rel_target)) + return generateIndexPage(path, GetRequestParam, SetResponseHeader, page); + + if (is_article_page(rel_target, path)) + return generateArticlePage(path, GetRequestParam, SetResponseHeader); + + if (is_index_file(rel_target, path) || is_article_file(rel_target, path)) + return generateStaticFile(path, SetResponseHeader); + + return HttpStatus("404", "Bad path specification: "s + rel_target, SetResponseHeader); + + } catch (const std::exception& ex) { + return HttpStatus("500", "Unknown Error: "s + ex.what(), SetResponseHeader); + } +} + +bool weblog_plugin::has_own_authentication() +{ + return false; +} -- cgit v1.2.3