From 4c4a001cacd9c3e1c2552dbcaf50165091a82ac1 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sat, 11 Feb 2023 11:24:22 +0100 Subject: Separated out weblog from webserver --- Makefile | 52 ++++++ html/blog.css | 76 +++++++++ html/favicon.ico | Bin 0 -> 2238 bytes procmail/procmail.sh | 119 ++++++++++++++ procmail/procmailrc | 12 ++ weblog.cpp | 456 +++++++++++++++++++++++++++++++++++++++++++++++++++ weblog.h | 23 +++ 7 files changed, 738 insertions(+) create mode 100644 Makefile create mode 100644 html/blog.css create mode 100644 html/favicon.ico create mode 100755 procmail/procmail.sh create mode 100644 procmail/procmailrc create mode 100644 weblog.cpp create mode 100644 weblog.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2e19495 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +include ../../common.mk + +PROJECTNAME=weblog + +CXXFLAGS+= -fvisibility=hidden -fPIC + +CXXFLAGS+= -I../.. + +LDLIBS=\ +-lreichwein \ +-lboost_context \ +-lboost_coroutine \ +-lboost_program_options \ +-lboost_system \ +-lboost_thread \ +-lboost_filesystem \ +-lboost_regex \ +-lpthread \ +-lssl -lcrypto \ +-ldl + +PROGSRC=\ + weblog.cpp + +SRC=$(PROGSRC) + +all: $(PROJECTNAME).so + +$(PROJECTNAME).so: $(SRC:.cpp=.o) + $(CXX) $(LDFLAGS) $^ -shared $(LDLIBS) $(LIBS) -o $@ + +%.d: %.cpp + $(CXX) $(CXXFLAGS) -MM -MP -MF $@ -c $< + +%.o: %.cpp %.d + $(CXX) $(CXXFLAGS) -c $< -o $@ + +# dependencies + +ADD_DEP=Makefile + +install: + mkdir -p $(DESTDIR)/usr/lib/webserver/plugins + cp $(PROJECTNAME).so $(DESTDIR)/usr/lib/webserver/plugins + +# misc --------------------------------------------------- +clean: + -rm -f *.o *.so *.d + +.PHONY: clean install all + +-include $(wildcard $(SRC:.cpp=.d)) diff --git a/html/blog.css b/html/blog.css new file mode 100644 index 0000000..5277980 --- /dev/null +++ b/html/blog.css @@ -0,0 +1,76 @@ +body { + font-family: "sans-serif"; +} + +figcaption { + text-align: center; + font-size: 8px; + color: #808080; +} + +figure { + display: inline-block; +} + +h2 { + margin: 25px 0px 3px 0px; +} + +div.date { + font-size: 8px; + color: #808080; + margin: 0px 0px 10px 0px; +} + +div.impressum { + margin: 500px 0px 0px 0px; +} + +.citation { + font-style:italic; + margin-left: 50px; + margin-right: 50px; +} + +.reference { + text-align: right; + margin-bottom: 30px; +} + + +.mobile { + width: 300px; + border-width: 80px 15px 80px 15px; + border-style: solid; + border-radius: 30px; + border-color: #000000; +} + +.logo { + display: block; + margin: 0 auto; +} + +.screenshot { + width: 400px; + border: 2px solid; + border-color: #8888AA; +} + +img.banner { + vertical-align: -5px; +} + +@media only screen and (min-width: 1px) and (max-width: 630px) { +} + +@media only screen and (min-width: 631px) and (max-width: 950px) { +} + +@media only screen and (min-width: 951px) { + div.page { + max-width: 950px; + width: 100%; + margin: 0 auto; + } +} diff --git a/html/favicon.ico b/html/favicon.ico new file mode 100644 index 0000000..e8cbddb Binary files /dev/null and b/html/favicon.ico differ diff --git a/procmail/procmail.sh b/procmail/procmail.sh new file mode 100755 index 0000000..134d957 --- /dev/null +++ b/procmail/procmail.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# +# Mail Blog input script, to be called by procmail +# +set -e + +function atexit { + if [ "$ERRORMSG" != "" -o -s errormsg.txt ] ; then + (echo "$ERRORMSG" ; cat errormsg.txt) | mutt -s "bloginput: Error" -- "$ADDR" >> ~/log 2>&1 + fi + cd + rm -rf "$DIR" +} +trap atexit EXIT + +function mimedecode() { + perl -ne 'require MIME::Head; $head = MIME::Head->read(\*STDIN); $head->decode(); print $head->get("Subject")' +} + +function next_index() { + DIR=$1 + + LASTENTRY=`ls -1dr $DIR/$(date +%Y)/$(date +%Y%m%d)* 2>/dev/null | xargs -n 1 basename | head -n1` + + if [ "$LASTENTRY" == "" ] ; then + echo 001 + return + fi + + INDEX=${LASTENTRY:9:3} + + INDEX=$(($INDEX + 1)) + + while [ "$((${#INDEX} < 3))" == "1" ] ; do INDEX=0$INDEX ; done + + echo $INDEX +} + +echo "Log at `date`:" >> ~/log 2>&1 + +BLOGDIR=/var/www/rolandreichwein.de-blog +#BLOGDIR=/var/www/blog + +# Default: Error +ERRORMSG="General error." +USERLOG="" + +DIR=`mktemp -d` + +cd "$DIR" + +touch body.txt +touch errormsg.txt + +cat > inmail + +ADDR=`grep "^From: " inmail | sed -e 's/^From: //'` +if echo "$ADDR" | grep -q -v "^[a-zA-Z0-9@._-<>\" ]\+$" ; then + # can't send error message to unknown requester + ERRORMSG="" + exit 1 +fi +if echo "$ADDR" | grep -q "weblog-bloginput" ; then + # don't reply to mails from weblog-bloginput, i.e. prevent mail loops + ERRORMSG="" + exit 1 +fi + +ALLLINES=`wc -l inmail | cut -f1 -d" "` + +HEADERLINES=`cat inmail | (n=0; while read i ; do + if [ "$i" == "" ] ; then + echo $n + break + fi + n=$(($n + 1)) +done)` + +HEADER=`head -n$HEADERLINES inmail` +BODY=`tail -n$(($ALLLINES - $HEADERLINES - 1)) inmail` + +echo "$HEADER" > /home/weblog-bloginput/header.txt +echo "$BODY" > /home/weblog-bloginput/body.txt + +SUBJECT=`echo "$HEADER" | mimedecode` +SUBJECT_PATH=`echo "$SUBJECT" | sed -e 's/[^a-zA-Z0-9]/_/g'` +ARTICLEINDEX=`next_index $BLOGDIR` + +ARTICLEDIR="$BLOGDIR/`date +%Y/%Y%m%d`_${ARTICLEINDEX}_$SUBJECT_PATH" + +mkdir -p $ARTICLEDIR +echo "Subject: $SUBJECT" >> $ARTICLEDIR/article.data +echo "" >> $ARTICLEDIR/article.data + +# get attachments +munpack -C $ARTICLEDIR `pwd`/inmail >/dev/null || true + +DATANAME="`ls $ARTICLEDIR/*.desc || true`" 2>/dev/null +if [ "$DATANAME" = "" ] ; then + echo "$BODY" >> $ARTICLEDIR/article.data +else + cat "$DATANAME" >> $ARTICLEDIR/article.data + rm "$DATANAME" +fi + +chmod -R a+r $ARTICLEDIR +chmod a+x $ARTICLEDIR + +( +echo "Processed successfully." +echo "ARTICLEDIR=$ARTICLEDIR" +) | mutt -s "Bloginput: $SUBJECT_PATH" -- "$ADDR" >> ~/log 2>&1 + +echo "User log:" >> ~/log +cat body.txt >> ~/log + +ERRORMSG="" +echo "Done successfully at `date`." >> ~/log 2>&1 + diff --git a/procmail/procmailrc b/procmail/procmailrc new file mode 100644 index 0000000..fa1fe6f --- /dev/null +++ b/procmail/procmailrc @@ -0,0 +1,12 @@ +:0: +* ^X-Virus-Infected +Virus +# TODO: can be /dev/null later + +:0: +* ^X-Spam-Flag: YES +Spam +# TODO: can be /dev/null later + +:0 fhbw +| /home/weblog-bloginput/procmail.sh diff --git a/weblog.cpp b/weblog.cpp new file mode 100644 index 0000000..cc2be34 --- /dev/null +++ b/weblog.cpp @@ -0,0 +1,456 @@ +#include "weblog.h" + +#include "libreichwein/mime.h" +#include "libreichwein/stringhelper.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace std::string_literals; +namespace fs = std::filesystem; +namespace pt = boost::property_tree; +using namespace Reichwein::Mime; +using namespace Reichwein::Stringhelper; + +namespace { + + const size_t number_of_articles_on_front_page {10}; + const std::string article_filename{"article.data"}; + + // Used to return errors by generating response page and HTTP status code + std::string HttpStatus(std::string status, std::string message, std::function& SetResponseHeader) + { + SetResponseHeader("status", status); + SetResponseHeader("content_type", "text/html"); + return status + " " + message; + } + + std::string getFile(const fs::path& filename) + { + std::ifstream file(filename.string(), std::ios::in | std::ios::binary | std::ios::ate); + + if (file.is_open()) { + std::ifstream::pos_type fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::string bytes(fileSize, ' '); + file.read(reinterpret_cast(bytes.data()), fileSize); + + return bytes; + + } else { + throw std::runtime_error("Opening "s + filename.string() + " for reading"); + } + } + + bool is_index_page(std::string& rel_target) + { + return (rel_target.size() == 0 || rel_target == "/"); + } + + bool is_index_file(std::string& rel_target, fs::path& path) + { + // must be top-level file, recognized as mime_type() + return rel_target.find("/") == rel_target.npos && mime_type(path.string()) != "application/text"; + } + + bool is_article_page(std::string& rel_target, fs::path& path) + { + return (rel_target.size() >= 2 && rel_target.back() == '/' && fs::is_directory(path)); + } + + bool is_article_file(std::string& rel_target, fs::path& path) + { + return (fs::is_regular_file(path) && path.filename().string() != article_filename); + } + + struct ArticleInfo + { + fs::path path; + std::string subject; + std::string date; + }; + + // get article metadata from header lines + std::unordered_map getMetaData(fs::path path) + { + if (path.string().size() > 0 && path.string().back() == '/') { + std::string s {path.string()}; + path = s.substr(0, s.size() - 1); + } + std::unordered_map result; + + std::string pathname{path.filename().string()}; + // ISO date + std::string date{pathname.substr(0, 4) + "-"s + pathname.substr(4, 2) + "-"s + pathname.substr(6, 2)}; + + result["Date"] = date; + + fs::path filepath {path / article_filename}; + + std::ifstream file(filepath.string(), std::ios::in); + + if (file.is_open()) { + std::string line; + while (!file.eof()) { + std::getline(file, line); + if (line.empty()) // found header end + break; + size_t pos {line.find(": ")}; + if (pos == line.npos) { + std::cerr << "Warning: Found bad header line in " << filepath << ": " << line << std::endl; + continue; + } + result[line.substr(0, pos)] = line.substr(pos + 2); + } + return result; + + } else { + throw std::runtime_error("Opening "s + filepath.string() + " for reading"); + } + } + + std::vector getArticleList(fs::path& path, size_t page) + { + std::vector result; + + for (auto& year_entry: fs::directory_iterator(path)) { + std::string year_entry_filename{year_entry.path().filename().string()}; + if (fs::is_directory(year_entry)) { + for (auto& entry: fs::directory_iterator(year_entry.path())) { + auto metaData{getMetaData(entry.path())}; + result.emplace_back(ArticleInfo{entry.path(), metaData.at("Subject"), metaData.at("Date")}); + } + } + } + + size_t index0{std::min(number_of_articles_on_front_page * (page), result.size())}; + size_t index1{std::min(number_of_articles_on_front_page * (page + 1), result.size())}; + // sort backwards + std::partial_sort(result.begin(), result.begin() + index1, result.end(), [](const ArticleInfo& a0, const ArticleInfo& a1){ return a0.path.string() > a1.path.string();}); + + return {result.begin() + index0, result.begin() + index1}; + } + + std::string plainTextFromPTree(const pt::ptree& tree) + { + std::string result; + + for (auto child: tree) { + if (child.first == "") + result += child.second.data(); + else + result += plainTextFromPTree(child.second); + } + + return result; + } + + // returns plain text of string (html xml elements removed) + std::string plainTextFromHTML(const std::string& text) + { + pt::ptree tree; + + std::istringstream ss{text}; + pt::read_xml(ss, tree, pt::xml_parser::no_comments | pt::xml_parser::no_concat_text); + + return plainTextFromPTree(tree); + } + + std::string verbatimText(std::string text) + { + boost::algorithm::replace_all(text, "<", "<"); + boost::algorithm::replace_all(text, ">", ">"); + + return "
"s + text + "
"; + } + + std::regex re{"\\.[[:space:]\"]", std::regex::basic}; + + // returns teaser of article in plain text + std::string shortVersion(const fs::path& path) + { + std::string article {getFile(path / article_filename)}; + size_t pos0 {article.find("\n\n")}; + if (pos0 == article.npos) + return ""; + + article = "" + article.substr(pos0 + 2) + ""; + + auto metaData{getMetaData(path)}; + auto it {metaData.find("Content-Type")}; + + // convert html to plaintext, if tagged as such + // default: text/html + if (it == metaData.end() || it->second == "text/html") + article = plainTextFromHTML(article); + + size_t pos{1000}; + + std::smatch match; + if (std::regex_search(article, match, re)) { + pos = std::min(pos, static_cast(match.position() + match.length())); + } + + return article.substr(0, pos); + } + + class HtmlPage + { + std::function& mGetRequestParam; + std::string mContents; + std::string mHeader; + const std::string mFooter; + + public: + HtmlPage(std::function& GetRequestParam, + std::string s = ""s) + : mGetRequestParam(GetRequestParam) + , mContents(s) + , mHeader("" + "" + "" + "" + GetRequestParam("WEBLOG_NAME") + "" + "" + "" + "" + "") + { + } + + HtmlPage& operator+=(const std::string& s) + { + mContents += s; + return *this; + } + + operator std::string() const + { + return mHeader + mContents + mFooter; + } + }; + + std::string generateIndexPage(fs::path& path, + std::function& GetRequestParam, + std::function& SetResponseHeader, + size_t page) + { + try { + if (page > std::numeric_limits::max()) + throw std::runtime_error("Bad page index: "s + std::to_string(page)); + + HtmlPage htmlPage{GetRequestParam, "

"s + GetRequestParam("WEBLOG_NAME") + "

"s}; + + fs::path link{ GetRequestParam("plugin_path")}; + + auto list{getArticleList(path, page)}; + if (list.empty()) + htmlPage += "(no articles found.)"; + else { + for (const auto& article: list) { + std::string linkstart{""}; + std::string linkend{""}; + htmlPage += "

"s + linkstart + article.subject + linkend + "

"s; + htmlPage += "
" + article.date + "
"s; + + auto sv{shortVersion(article.path)}; + if (sv.size()) { + htmlPage += sv + " "s + linkstart + "more..." + linkend; + } + } + htmlPage += "


"; + if (page > 0) + htmlPage += "<<newer "s; + htmlPage += "page "s + std::to_string(page + 1); + if (list.size() == number_of_articles_on_front_page) + htmlPage += " older>>"s; + htmlPage += "
"; + } + SetResponseHeader("cache_control", "no-store"); + return htmlPage; + } catch (const std::exception& ex) { + return HttpStatus("500", "Reading Index page: "s + ex.what(), SetResponseHeader); + } + } + + std::string generateArticlePage(fs::path& path, + std::function& GetRequestParam, + std::function& SetResponseHeader) + { + try { + auto metaData{getMetaData(path)}; + + std::string data { getFile(path / article_filename)}; + + size_t pos {data.find("\n\n")}; + if (pos == data.npos) + throw std::runtime_error("Error parsing article"); + + data = data.substr(pos + 2); + + auto it {metaData.find("Content-Type")}; + if (it != metaData.end() && it->second == "text/plain") + data = verbatimText(data); + + HtmlPage htmlPage{GetRequestParam, "

"s + metaData.at("Subject") + "

" + "
" + metaData.at("Date") + "
" + "

"s + data + "
□"}; + + return htmlPage; + } catch (const std::exception& ex) { + return HttpStatus("500", "Reading Article: "s + ex.what(), SetResponseHeader); + } + } + + std::string generateStaticFile(fs::path& path, std::function& SetResponseHeader) + { + try { + SetResponseHeader("content_type", mime_type(path.string())); + return getFile(path); + } catch (const std::exception& ex) { + return HttpStatus("500", "Reading Article file: "s + ex.what(), SetResponseHeader); + } + } + + std::string urlDecode(std::string s) + { + std::string result; + + size_t pos = 0; + while (pos < s.size()) { + char c {s[pos]}; + if (c == '+') { + result += ' '; + } else if (c == '%' && pos + 2 < s.size()) { + try { + int i = stoi(s.substr(pos + 1, 2), 0, 16); + if (i < 0 || i > 255) + return result; + + result += static_cast(i); + } catch (...) { + return result; + } + + pos += 2; + } else { + result += c; + } + pos++; + } + + return result; + } + + std::unordered_map SplitQueryString(std::string& s) + { + std::unordered_map result; + + size_t qpos = s.find('?'); + if (qpos != s.npos) { + auto list {split(s.substr(qpos + 1), "&")}; + for (auto i: list) { + size_t apos = i.find('='); + if (apos != i.npos) { + result[urlDecode(i.substr(0, apos))] = urlDecode(i.substr(apos + 1)); + } + } + } + + s = s.substr(0, qpos); + + return result; + } + +} // anonymous namespace + +std::string weblog_plugin::name() +{ + return "weblog"; +} + +weblog_plugin::weblog_plugin() +{ + //std::cout << "Plugin constructor" << std::endl; +} + +weblog_plugin::~weblog_plugin() +{ + //std::cout << "Plugin destructor" << std::endl; +} + +std::string weblog_plugin::generate_page( + std::function& GetServerParam, + std::function& GetRequestParam, // request including body (POST...) + std::function& SetResponseHeader // to be added to result string +) +{ + try { + // Make sure we can handle the method + std::string method {GetRequestParam("method")}; + if (method != "GET" && method != "HEAD") + return HttpStatus("400", "Unknown HTTP method", SetResponseHeader); + + // Request path must not contain "..". + std::string rel_target{GetRequestParam("rel_target")}; + std::string target{GetRequestParam("target")}; + if (rel_target.find("..") != std::string::npos) { + return HttpStatus("400", "Illegal request: "s + target, SetResponseHeader); + } + + std::unordered_map query { SplitQueryString(rel_target) }; + + // Build the path to the requested file + std::string doc_root{GetRequestParam("doc_root")}; + if (rel_target.size() >= 4 && std::all_of(rel_target.begin(), rel_target.begin() + 4, isdigit)) { + rel_target = rel_target.substr(0, 4) + "/" + rel_target; + } + fs::path path {fs::path{doc_root} / rel_target}; + if (target.size() && target.back() != '/' && fs::is_directory(path)) { + std::string location{GetRequestParam("location") + "/"s}; + SetResponseHeader("location", location); + return HttpStatus("301", "Correcting directory path", SetResponseHeader); + } + + SetResponseHeader("content_type", "text/html"); + + size_t page {0}; + auto it {query.find("page")}; + if (it != query.end()) { + try { + page = stoul(it->second); + } catch(...) { + // ignore: keep default 0 + } + } + + if (is_index_page(rel_target)) + return generateIndexPage(path, GetRequestParam, SetResponseHeader, page); + + if (is_article_page(rel_target, path)) + return generateArticlePage(path, GetRequestParam, SetResponseHeader); + + if (is_index_file(rel_target, path) || is_article_file(rel_target, path)) + return generateStaticFile(path, SetResponseHeader); + + return HttpStatus("404", "Bad path specification: "s + rel_target, SetResponseHeader); + + } catch (const std::exception& ex) { + return HttpStatus("500", "Unknown Error: "s + ex.what(), SetResponseHeader); + } +} + +bool weblog_plugin::has_own_authentication() +{ + return false; +} diff --git a/weblog.h b/weblog.h new file mode 100644 index 0000000..0994b91 --- /dev/null +++ b/weblog.h @@ -0,0 +1,23 @@ +#pragma once + +#include "../../plugin_interface.h" + +class weblog_plugin: public webserver_plugin_interface +{ +public: + weblog_plugin(); + ~weblog_plugin(); + + std::string name() override; + + std::string generate_page( + std::function& GetServerParam, + std::function& GetRequestParam, // request including body (POST...) + std::function& SetResponseHeader // to be added to result string + ) override; + + bool has_own_authentication() override; +}; + +extern "C" BOOST_SYMBOL_EXPORT weblog_plugin webserver_plugin; +weblog_plugin webserver_plugin; -- cgit v1.2.3