#include "weblog.h" #include "libreichwein/file.h" #include "libreichwein/mime.h" #include "libreichwein/stringhelper.h" #include "libreichwein/url.h" #include #include #include #include #include #include #include #include #include #include #include using namespace std::string_literals; namespace fs = std::filesystem; namespace pt = boost::property_tree; using namespace Reichwein; using namespace Reichwein::Mime; using namespace Reichwein::Stringhelper; namespace { const size_t number_of_articles_on_front_page {10}; const std::string article_filename{"article.data"}; // Used to return errors by generating response page and HTTP status code void HttpStatus(std::string status, std::string message, FCGX_Request& request) { FCGX_FPrintF(request.out, "Status: %s Error\r\n", status.c_str()); FCGX_PutS("Content-Type: text/plain\r\n\r\n", request.out); FCGX_FPrintF(request.out, "%s %s", status.c_str(), message.c_str()); } bool is_index_page(std::string& rel_target) { return (rel_target.size() == 0 || rel_target == "/"); } bool is_index_file(std::string& rel_target, fs::path& path) { // must be top-level file, recognized as mime_type() return rel_target.find("/") == rel_target.npos && mime_type(path.string()) != "application/text"; } bool is_article_page(std::string& rel_target, fs::path& path) { return (rel_target.size() >= 2 && rel_target.back() == '/' && fs::is_directory(path)); } bool is_article_file(std::string& rel_target, fs::path& path) { return (fs::is_regular_file(path) && path.filename().string() != article_filename); } struct ArticleInfo { fs::path path; std::string subject; std::string date; }; // get article metadata from header lines std::unordered_map getMetaData(fs::path path) { if (path.string().size() > 0 && path.string().back() == '/') { std::string s {path.string()}; path = s.substr(0, s.size() - 1); } std::unordered_map result; std::string pathname{path.filename().string()}; // ISO date std::string date{pathname.substr(0, 4) + "-"s + pathname.substr(4, 2) + "-"s + pathname.substr(6, 2)}; result["Date"] = date; fs::path filepath {path / article_filename}; std::ifstream file(filepath.string(), std::ios::in); if (file.is_open()) { std::string line; while (!file.eof()) { std::getline(file, line); if (line.empty()) // found header end break; size_t pos {line.find(": ")}; if (pos == line.npos) { std::cerr << "Warning: Found bad header line in " << filepath << ": " << line << std::endl; continue; } result[line.substr(0, pos)] = line.substr(pos + 2); } return result; } else { throw std::runtime_error("Opening "s + filepath.string() + " for reading"); } } std::vector getArticleList(fs::path& path, size_t page) { std::vector result; for (auto& year_entry: fs::directory_iterator(path)) { std::string year_entry_filename{year_entry.path().filename().string()}; if (fs::is_directory(year_entry)) { for (auto& entry: fs::directory_iterator(year_entry.path())) { auto metaData{getMetaData(entry.path())}; result.emplace_back(ArticleInfo{entry.path(), metaData.at("Subject"), metaData.at("Date")}); } } } size_t index0{std::min(number_of_articles_on_front_page * (page), result.size())}; size_t index1{std::min(number_of_articles_on_front_page * (page + 1), result.size())}; // sort backwards std::partial_sort(result.begin(), result.begin() + index1, result.end(), [](const ArticleInfo& a0, const ArticleInfo& a1){ return a0.path.string() > a1.path.string();}); return {result.begin() + index0, result.begin() + index1}; } std::string plainTextFromPTree(const pt::ptree& tree) { std::string result; for (auto child: tree) { if (child.first == "") result += child.second.data(); else result += plainTextFromPTree(child.second); } return result; } // returns plain text of string (html xml elements removed) std::string plainTextFromHTML(const std::string& text) { pt::ptree tree; std::istringstream ss{text}; pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text); return plainTextFromPTree(tree); } std::string verbatimText(std::string text) { boost::algorithm::replace_all(text, "<", "<"); boost::algorithm::replace_all(text, ">", ">"); return "
"s + text + "
"; } std::regex re{"\\.[[:space:]\"]", std::regex::basic}; // returns teaser of article in plain text std::string shortVersion(const fs::path& path) { std::string article {File::getFile(path / article_filename)}; size_t pos0 {article.find("\n\n")}; if (pos0 == article.npos) return ""; article = "" + article.substr(pos0 + 2) + ""; auto metaData{getMetaData(path)}; auto it {metaData.find("Content-Type")}; // convert html to plaintext, if tagged as such // default: text/html if (it == metaData.end() || it->second == "text/html") article = plainTextFromHTML(article); size_t pos{1000}; std::smatch match; if (std::regex_search(article, match, re)) { pos = std::min(pos, static_cast(match.position() + match.length())); } return article.substr(0, pos); } std::string get_rel_target(FCGX_Request& request) { std::string result{FCGX_GetParam("PATH_INFO", request.envp)}; auto pos{result.find('?')}; if (pos != result.npos) result = result.substr(0, pos); return result; } std::string plugin_path(FCGX_Request& request) { std::string target{FCGX_GetParam("DOCUMENT_URI", request.envp)}; while (target.size() > 1 && target.back() == '/') { target = target.substr(0, target.size() - 1); } std::string rel_target{get_rel_target(request)}; while (rel_target.size() > 1 && rel_target.back() == '/') { rel_target = rel_target.substr(0, rel_target.size() - 1); } if (target.ends_with(rel_target)) { return target.substr(0, target.size() - rel_target.size()); } return "/"; } class HtmlPage { Config& m_config; std::string mContents; std::string mHeader; const std::string mFooter; public: HtmlPage(FCGX_Request& request, Config& config, std::string s = ""s): m_config{config}, mContents{s}, mHeader("" "" "" "" + m_config.getName() + "" "" "" "" "") { } HtmlPage& operator+=(const std::string& s) { mContents += s; return *this; } operator std::string() const { return mHeader + mContents + mFooter; } }; void generateIndexPage(fs::path& path, FCGX_Request& request, Config& config, size_t page) { try { if (page > std::numeric_limits::max()) throw std::runtime_error("Bad page index: "s + std::to_string(page)); HtmlPage htmlPage{request, config, "

"s + config.getName() + "

"s}; fs::path link{ plugin_path(request) }; auto list{getArticleList(path, page)}; if (list.empty()) htmlPage += "(no articles found.)"; else { for (const auto& article: list) { std::string linkstart{""}; std::string linkend{""}; htmlPage += "

"s + linkstart + article.subject + linkend + "

"s; htmlPage += "
" + article.date + "
"s; auto sv{shortVersion(article.path)}; if (sv.size()) { htmlPage += sv + " "s + linkstart + "more..." + linkend; } } htmlPage += "


"; if (page > 0) htmlPage += "<<newer "s; htmlPage += "page "s + std::to_string(page + 1); if (list.size() == number_of_articles_on_front_page) htmlPage += " older>>"s; htmlPage += "
"; } FCGX_PutS("Content-Type: text/html\r\n", request.out); FCGX_FPrintF(request.out, "Cache-Control: no-store\r\n\r\n"); std::string data{htmlPage}; FCGX_PutStr(data.c_str(), data.size(), request.out); } catch (const std::exception& ex) { HttpStatus("500", "Reading Index page: "s + ex.what(), request); } } void generateArticlePage(fs::path& path, FCGX_Request& request, Config& config) { try { auto metaData{getMetaData(path)}; std::string data { File::getFile(path / article_filename)}; size_t pos {data.find("\n\n")}; if (pos == data.npos) throw std::runtime_error("Error parsing article"); data = data.substr(pos + 2); auto it {metaData.find("Content-Type")}; if (it != metaData.end() && it->second == "text/plain") data = verbatimText(data); HtmlPage htmlPage{request, config, "

"s + metaData.at("Subject") + "

" "
" + metaData.at("Date") + "
" "

"s + data + "
□"}; std::string result {htmlPage}; FCGX_PutS("Content-Type: text/html\r\n\r\n", request.out); FCGX_PutStr(result.c_str(), result.size(), request.out); } catch (const std::exception& ex) { return HttpStatus("500", "Reading Article: "s + ex.what(), request); } } void generateStaticFile(fs::path& path, FCGX_Request& request) { try { FCGX_FPrintF(request.out, "Content-Type: %s\r\n\r\n", mime_type(path.string()).c_str()); std::string data{File::getFile(path)}; FCGX_PutStr(data.c_str(), data.size(), request.out); } catch (const std::exception& ex) { HttpStatus("500", "Reading Article file: "s + ex.what(), request); } } std::unordered_map SplitQueryString(std::string& s) { std::unordered_map result; size_t qpos = s.find('?'); if (qpos != s.npos) { auto list {split(s.substr(qpos + 1), "&")}; for (auto i: list) { size_t apos = i.find('='); if (apos != i.npos) { result[Reichwein::URL::urlDecode(i.substr(0, apos))] = Reichwein::URL::urlDecode(i.substr(apos + 1)); } } } s = s.substr(0, qpos); return result; } void usage() { std::cout << "usage: weblog [-c ]" << std::endl; } fs::path getConfigPath(int argc, char* argv[]) { if (argc == 2 && argv[1] == "-h"s) { usage(); exit(0); } if (argc == 3 && argv[1] == "-c"s) return argv[2]; return {}; } } // anonymous namespace Weblog::Weblog(int argc, char* argv[]): m_config{getConfigPath(argc, argv).empty() ? Config{} : Config{getConfigPath(argc, argv)}} { } Weblog::~Weblog() { } namespace { void generate_page(FCGX_Request& request, Config& config) { try { // Make sure we can handle the method std::string method {FCGX_GetParam("REQUEST_METHOD", request.envp)}; if (method != "GET" && method != "HEAD") return HttpStatus("400", "Unknown HTTP method", request); // Request path must not contain "..". std::string rel_target{FCGX_GetParam("PATH_INFO", request.envp)}; std::string target{FCGX_GetParam("SCRIPT_NAME", request.envp)}; if (rel_target.find("..") != std::string::npos) { return HttpStatus("400", "Illegal request: "s + target, request); } std::unordered_map query { SplitQueryString(rel_target) }; // Build the path to the requested file std::string path_translated{config.getDataPath()}; if (rel_target.size() >= 4 && std::all_of(rel_target.begin(), rel_target.begin() + 4, isdigit)) { rel_target = rel_target.substr(0, 4) + "/" + rel_target; } fs::path path {fs::path{path_translated} / rel_target}; if (target.size() && target.back() != '/' && fs::is_directory(path) && !boost::algorithm::contains(target, "?")) { std::string location{target + "/"s}; FCGX_FPrintF(request.out, "Location: %s\r\n", location.c_str()); return HttpStatus("301", "Correcting directory path", request); } size_t page {0}; auto it {query.find("page")}; if (it != query.end()) { try { page = stoul(it->second); } catch(...) { // ignore: keep default 0 } } if (is_index_page(rel_target)) { return generateIndexPage(path, request, config, page); } if (is_article_page(rel_target, path)) { return generateArticlePage(path, request, config); } if (is_index_file(rel_target, path) || is_article_file(rel_target, path)) { return generateStaticFile(path, request); } return HttpStatus("404", "Bad path specification: "s + rel_target, request); } catch (const std::exception& ex) { return HttpStatus("500", "Unknown Error: "s + ex.what(), request); } } } // namespace int Weblog::run() { int result = FCGX_Init(); if (result != 0) { // error on init std::cerr << "Error: FCGX_Init()" << std::endl; return 1; } result = FCGX_IsCGI(); if (result) { std::cerr << "Error: No FCGI environment available" << std::endl; return 1; } FCGX_Request request; result = FCGX_InitRequest(&request, 0, 0); if (result != 0) { std::cerr << "Error: FCGX_InitRequest()" << std::endl; return 1; } while (FCGX_Accept_r(&request) >= 0) { try { generate_page(request, m_config); } catch (const std::exception& ex) { FCGX_PutS("Status: 500 Internal Server Error\r\n", request.out); FCGX_PutS("Content-Type: text/plain\r\n\r\n", request.out); FCGX_FPrintF(request.out, "Error: %s\r\n", ex.what()); } } return 0; }