#include "weblog.h" #include "libcommon/mime.h" #include "libcommon/stringutil.h" #include #include #include #include #include #include #include #include #include #include using namespace std::string_literals; namespace fs = std::filesystem; namespace pt = boost::property_tree; namespace { const size_t number_of_articles_on_front_page {10}; const std::string article_filename{"article.data"}; // Used to return errors by generating response page and HTTP status code std::string HttpStatus(std::string status, std::string message, std::function& SetResponseHeader) { SetResponseHeader("status", status); SetResponseHeader("content_type", "text/html"); return status + " " + message; } std::string getFile(const fs::path& filename) { std::ifstream file(filename.string(), std::ios::in | std::ios::binary | std::ios::ate); if (file.is_open()) { std::ifstream::pos_type fileSize = file.tellg(); file.seekg(0, std::ios::beg); std::string bytes(fileSize, ' '); file.read(reinterpret_cast(bytes.data()), fileSize); return bytes; } else { throw std::runtime_error("Opening "s + filename.string() + " for reading"); } } bool is_index_page(std::string& rel_target) { return (rel_target.size() == 0 || rel_target == "/"); } bool is_index_file(std::string& rel_target, fs::path& path) { // must be top-level file, recognized as mime_type() return rel_target.find("/") == rel_target.npos && mime_type(path.string()) != "application/text"; } bool is_article_page(std::string& rel_target, fs::path& path) { return (rel_target.size() >= 2 && rel_target.back() == '/' && fs::is_directory(path)); } bool is_article_file(std::string& rel_target, fs::path& path) { return (fs::is_regular_file(path) && path.filename().string() != article_filename); } struct ArticleInfo { fs::path path; std::string subject; std::string date; }; // get article metadata from header lines std::unordered_map getMetaData(fs::path path) { if (path.string().size() > 0 && path.string().back() == '/') { std::string s {path.string()}; path = s.substr(0, s.size() - 1); } std::unordered_map result; std::string pathname{path.filename().string()}; // ISO date std::string date{pathname.substr(0, 4) + "-"s + pathname.substr(4, 2) + "-"s + pathname.substr(6, 2)}; result["Date"] = date; fs::path filepath {path / article_filename}; std::ifstream file(filepath.string(), std::ios::in); if (file.is_open()) { std::string line; while (!file.eof()) { std::getline(file, line); if (line.empty()) // found header end break; size_t pos {line.find(": ")}; if (pos == line.npos) { std::cerr << "Warning: Found bad header line in " << filepath << ": " << line << std::endl; continue; } result[line.substr(0, pos)] = line.substr(pos + 2); } return result; } else { throw std::runtime_error("Opening "s + filepath.string() + " for reading"); } } std::vector getArticleList(fs::path& path, size_t page) { std::vector result; for (auto& year_entry: fs::directory_iterator(path)) { std::string year_entry_filename{year_entry.path().filename().string()}; if (fs::is_directory(year_entry)) { for (auto& entry: fs::directory_iterator(year_entry.path())) { auto metaData{getMetaData(entry.path())}; result.emplace_back(ArticleInfo{entry.path(), metaData.at("Subject"), metaData.at("Date")}); } } } size_t index0{std::min(number_of_articles_on_front_page * (page), result.size())}; size_t index1{std::min(number_of_articles_on_front_page * (page + 1), result.size())}; // sort backwards std::partial_sort(result.begin(), result.begin() + index1, result.end(), [](const ArticleInfo& a0, const ArticleInfo& a1){ return a0.path.string() > a1.path.string();}); return {result.begin() + index0, result.begin() + index1}; } std::string plainTextFromPTree(const pt::ptree& tree) { std::string result; for (auto child: tree) { if (child.first == "") result += child.second.data(); else result += plainTextFromPTree(child.second); } return result; } // returns plain text of string (html xml elements removed) std::string plainTextFromHTML(const std::string& text) { pt::ptree tree; std::istringstream ss{text}; pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text); return plainTextFromPTree(tree); } std::string verbatimText(std::string text) { boost::algorithm::replace_all(text, "<", "<"); boost::algorithm::replace_all(text, ">", ">"); return "
"s + text + "
"; } std::regex re{"\\.[[:space:]\"]", std::regex::basic}; // returns teaser of article in plain text std::string shortVersion(const fs::path& path) { std::string article {getFile(path / article_filename)}; size_t pos0 {article.find("\n\n")}; if (pos0 == article.npos) return ""; article = "" + article.substr(pos0 + 2) + ""; auto metaData{getMetaData(path)}; auto it {metaData.find("Content-Type")}; // convert html to plaintext, if tagged as such // default: text/html if (it == metaData.end() || it->second == "text/html") article = plainTextFromHTML(article); size_t pos{1000}; std::smatch match; if (std::regex_search(article, match, re)) { pos = std::min(pos, static_cast(match.position() + match.length())); } return article.substr(0, pos); } class HtmlPage { std::function& mGetRequestParam; std::string mContents; std::string mHeader; const std::string mFooter; public: HtmlPage(std::function& GetRequestParam, std::string s = ""s) : mGetRequestParam(GetRequestParam) , mContents(s) , mHeader("" "" "" "" + GetRequestParam("WEBLOG_NAME") + "" "" "" "" "") { } HtmlPage& operator+=(const std::string& s) { mContents += s; return *this; } operator std::string() const { return mHeader + mContents + mFooter; } }; std::string generateIndexPage(fs::path& path, std::function& GetRequestParam, std::function& SetResponseHeader, size_t page) { try { HtmlPage htmlPage{GetRequestParam, "

"s + GetRequestParam("WEBLOG_NAME") + "

"s}; fs::path link{ GetRequestParam("plugin_path")}; auto list{getArticleList(path, page)}; if (list.empty()) htmlPage += "(no articles found.)"; else { for (const auto& article: list) { std::string linkstart{""}; std::string linkend{""}; htmlPage += "

"s + linkstart + article.subject + linkend + "

"s; htmlPage += "
" + article.date + "
"s; auto sv{shortVersion(article.path)}; if (sv.size()) { htmlPage += sv + " "s + linkstart + "more..." + linkend; } } htmlPage += "


"; if (page > 0) htmlPage += "<<newer "s; htmlPage += "page "s + std::to_string(page + 1); if (list.size() == number_of_articles_on_front_page) htmlPage += " older>>"s; htmlPage += "
"; } SetResponseHeader("cache_control", "no-store"); return htmlPage; } catch (const std::exception& ex) { return HttpStatus("500", "Reading Index page: "s + ex.what(), SetResponseHeader); } } std::string generateArticlePage(fs::path& path, std::function& GetRequestParam, std::function& SetResponseHeader) { try { auto metaData{getMetaData(path)}; std::string data { getFile(path / article_filename)}; size_t pos {data.find("\n\n")}; if (pos == data.npos) throw std::runtime_error("Error parsing article"); data = data.substr(pos + 2); auto it {metaData.find("Content-Type")}; if (it != metaData.end() && it->second == "text/plain") data = verbatimText(data); HtmlPage htmlPage{GetRequestParam, "

"s + metaData.at("Subject") + "

" "
" + metaData.at("Date") + "
" "

"s + data}; return htmlPage; } catch (const std::exception& ex) { return HttpStatus("500", "Reading Article: "s + ex.what(), SetResponseHeader); } } std::string generateStaticFile(fs::path& path, std::function& SetResponseHeader) { try { SetResponseHeader("content_type", mime_type(path.string())); return getFile(path); } catch (const std::exception& ex) { return HttpStatus("500", "Reading Article file: "s + ex.what(), SetResponseHeader); } } std::string urlDecode(std::string s) { std::string result; size_t pos = 0; while (pos < s.size()) { char c {s[pos]}; if (c == '+') { result += ' '; } else if (c == '%' && pos + 2 < s.size()) { try { int i = stoi(s.substr(pos + 1, 2), 0, 16); if (i < 0 || i > 255) return result; result += static_cast(i); } catch (...) { return result; } pos += 2; } else { result += c; } pos++; } return result; } std::unordered_map SplitQueryString(std::string& s) { std::unordered_map result; size_t qpos = s.find('?'); if (qpos != s.npos) { auto list {split(s.substr(qpos + 1), "&")}; for (auto i: list) { size_t apos = i.find('='); if (apos != i.npos) { result[urlDecode(i.substr(0, apos))] = urlDecode(i.substr(apos + 1)); } } } s = s.substr(0, qpos); return result; } } // anonymous namespace std::string weblog_plugin::name() { return "weblog"; } weblog_plugin::weblog_plugin() { //std::cout << "Plugin constructor" << std::endl; } weblog_plugin::~weblog_plugin() { //std::cout << "Plugin destructor" << std::endl; } std::string weblog_plugin::generate_page( std::function& GetServerParam, std::function& GetRequestParam, // request including body (POST...) std::function& SetResponseHeader // to be added to result string ) { try { // Make sure we can handle the method std::string method {GetRequestParam("method")}; if (method != "GET" && method != "HEAD") return HttpStatus("400", "Unknown HTTP method", SetResponseHeader); // Request path must not contain "..". std::string rel_target{GetRequestParam("rel_target")}; std::string target{GetRequestParam("target")}; if (rel_target.find("..") != std::string::npos) { return HttpStatus("400", "Illegal request: "s + target, SetResponseHeader); } std::unordered_map query { SplitQueryString(rel_target) }; // Build the path to the requested file std::string doc_root{GetRequestParam("doc_root")}; if (rel_target.size() >= 4 && std::all_of(rel_target.begin(), rel_target.begin() + 4, isdigit)) { rel_target = rel_target.substr(0, 4) + "/" + rel_target; } fs::path path {fs::path{doc_root} / rel_target}; if (target.size() && target.back() != '/' && fs::is_directory(path)) { std::string location{GetRequestParam("location") + "/"s}; SetResponseHeader("location", location); return HttpStatus("301", "Correcting directory path", SetResponseHeader); } SetResponseHeader("content_type", "text/html"); size_t page {0}; auto it {query.find("page")}; if (it != query.end()) { page = stoul(it->second); } if (is_index_page(rel_target)) return generateIndexPage(path, GetRequestParam, SetResponseHeader, page); if (is_article_page(rel_target, path)) return generateArticlePage(path, GetRequestParam, SetResponseHeader); if (is_index_file(rel_target, path) || is_article_file(rel_target, path)) return generateStaticFile(path, SetResponseHeader); return HttpStatus("404", "Bad path specification: "s + rel_target, SetResponseHeader); } catch (const std::exception& ex) { return HttpStatus("500", "Unknown Error: "s + ex.what(), SetResponseHeader); } } bool weblog_plugin::has_own_authentication() { return false; }