summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhysick <96335032+DegustatorPonos@users.noreply.github.com>2026-05-10 18:06:16 +0500
committerPhysick <96335032+DegustatorPonos@users.noreply.github.com>2026-05-10 18:06:16 +0500
commit838eaa77fa389203a4c41751b36993575bbbfe04 (patch)
tree706fb459923e590978bbacdde5fe2fb3bacf4b74
Initial evening
-rw-r--r--Makefile5
-rwxr-xr-xappbin0 -> 677224 bytes
-rw-r--r--src/RSS.cpp81
-rw-r--r--src/RSS.hpp222
-rw-r--r--src/main.cpp21
5 files changed, 329 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..6f05802
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,5 @@
+SOURCEFILES := src/main.cpp src/RSS.cpp
+GPPFLAGS := -Wall -Wextra -std=c++23
+
+all:
+ g++ $(GPPFLAGS) $(SOURCEFILES) -lcurl -o app
diff --git a/app b/app
new file mode 100755
index 0000000..50da33a
--- /dev/null
+++ b/app
Binary files differ
diff --git a/src/RSS.cpp b/src/RSS.cpp
new file mode 100644
index 0000000..a1c0dde
--- /dev/null
+++ b/src/RSS.cpp
@@ -0,0 +1,81 @@
+#include "RSS.hpp"
+#include <iomanip>
+#include <iostream>
+#include <ctime>
+#include <memory>
+#include <stdexcept>
+#include <curl/curl.h>
+
+RSS::RSS(std::string url) {
+ URL = url;
+ channelInfo = ChannelInfo(url);
+ parse(request());
+}
+
+size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* userp) {
+ size_t totalSize = size * nmemb;
+ userp->append((char*)contents, totalSize);
+ return totalSize;
+}
+
+std::string RSS::request() {
+ std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl (
+ curl_easy_init(), &curl_easy_cleanup);
+
+ if (!curl) {
+ throw std::runtime_error("Failed to initialize CURL");
+ }
+
+ std::string data;
+
+ curl_easy_setopt(curl.get(), CURLOPT_URL, URL.c_str());
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &data);
+ curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
+
+ auto res = curl_easy_perform(curl.get());
+ if (res != CURLE_OK) {
+ throw std::runtime_error(std::string("CURL request failed: ") + curl_easy_strerror(res));
+ }
+
+ curl_easy_cleanup(curl.get());
+ return data;
+}
+
+void RSS::parse(std::string contents) {
+ // std::cout << "starting tokenization..." << std::endl;
+ try {
+ auto leaf = XML_leaf(contents).GetChild("channel");
+ if (leaf.Raw == "<>")
+ throw new std::runtime_error("The feed does not contain <channel> element");
+
+ this->channelInfo.Title = leaf.GetChild("title").Value;
+
+ auto items = leaf.GetChildren("item");
+ for (auto item : items)
+ Entries.push_back(RSS_Entry(item));
+ } catch (const char *err) {
+ std::cout << "Failed to parse feed '" << URL << "' :" << err << std::endl;
+ return;
+ }
+}
+
+RSS_Entry::RSS_Entry(XML_leaf node) {
+ Title = node.GetChild("title").GetValue();
+ URL = node.GetChild("link").GetValue();
+ Contents = node.GetChild("description").GetValue();
+
+ // auto pubDateField = node.GetChild("pubDate");
+ // if (pubDateField.Value.length() == 0) return;
+ // std::cout << "pub date" << pubDateField.Value << std::endl;
+ // strptime(pubDateField.Value.c_str(), "%Y-%m-%dT%H:%M:%SZ", &pubDate);
+}
+
+void RSS_Entry::print() {
+ std::cout
+ << "==============================\n"
+ << Title << '\n'
+ // << std::put_time(&pubDate, "%Y-%m-%d %H:%M:%S") << '\n'
+ << Contents << '\n'
+ << URL<< '\n';
+}
diff --git a/src/RSS.hpp b/src/RSS.hpp
new file mode 100644
index 0000000..709125d
--- /dev/null
+++ b/src/RSS.hpp
@@ -0,0 +1,222 @@
+#ifndef RSS_H
+#define RSS_H
+
+#include <cstddef>
+#include <ctime>
+#include <memory>
+#include <regex>
+#include <stack>
+#include <stdexcept>
+#include <string>
+#include <iostream>
+#include <utility>
+#include <variant>
+#include <vector>
+
+class XML_leaf {
+ public:
+ std::string Raw;
+ std::string Name;
+ std::string Value = "";
+ std::vector<XML_leaf> leafs{};
+
+ XML_leaf(std::string raw, std::string value) {
+ Raw = raw;
+ Name = getTag(raw);
+ Value = value;
+ };
+
+ XML_leaf(std::string raw) {
+ // The smallest valid (?) XML element is <></>
+ if (raw.length() < 5 || raw[0] != '<')
+ throw std::runtime_error("The XML " + raw + " is invalid");
+
+ auto tokens = TokenizeXML(trimSpaces(raw));
+
+ if (tokens.size() < 3 || !isTag(tokens[0]))
+ throw new std::runtime_error("Invalid XML");
+
+ auto result = std::make_unique<XML_leaf>("<>", "");
+ std::stack<std::unique_ptr<XML_leaf>> stack{};
+
+ // Building a tree
+ for (size_t i = 1; i < tokens.size(); i++) {
+ const auto currentToken = tokens[i];
+
+ if (!isTag(currentToken)) {
+ if (stack.size() == 0)
+ continue;
+ stack.top()->setValue(currentToken);
+ continue;
+ }
+
+ if (stack.size() == 0 || !isClosingTagOf(currentToken, stack.top()->Raw)) {
+ if (IsSelfClosingTag(currentToken)) {
+ stack.top()->leafs.push_back(std::move(XML_leaf(currentToken, "")));
+ continue;
+ }
+ stack.push(std::make_unique<XML_leaf>(currentToken, ""));
+ continue;
+ }
+
+ auto complete = std::move(stack.top());
+ stack.pop();
+ if (stack.size() == 0) { // The node is closing a doc
+ result.swap(complete);
+ break;
+ }
+ stack.top()->leafs.push_back(std::move(*complete));
+ }
+
+ this->Raw = result->Raw;
+ this->Name = result->Name;
+ this->Value = result->Value;
+ this->leafs = result->leafs;
+ }
+
+ XML_leaf GetChild(std::string query) {
+ for (auto child : leafs) {
+ // std::cout << "Comparing " << child.Raw << " and " << query << std::endl;
+ if (child.Name == query)
+ return child;
+ }
+ return XML_leaf("<>", "empty");
+ }
+
+ std::vector<XML_leaf> GetChildren(std::string query) {
+ std::vector<XML_leaf> outp = {};
+ for (auto child : leafs) {
+ // std::cout << "Comparing " << child.Raw << " and " << query << std::endl;
+ if (child.Name == query)
+ outp.push_back(child);
+ }
+ return outp;
+ }
+
+ std::string GetValue() {
+ if (isCDATA(Value))
+ return getCDATA(Value);
+ return Value;
+ }
+
+ private:
+ void setValue(std::string newValue) {
+ Value = newValue;
+ }
+
+ static std::string trimSpaces(std::string raw) {
+ auto outp = std::regex_replace(raw, std::regex(" +"), " ");
+ outp = std::regex_replace(outp, std::regex("> <"), "><");
+ return outp;
+ }
+
+ static std::vector<std::string> TokenizeXML(std::string raw) {
+ std::vector<std::string> outp{};
+ // std::cout << "Called TokenizeXML()" << '\n';
+
+ ulong idx = 0;
+ while (idx < raw.length()) {
+ char ptr = raw[idx];
+ ulong next_idx = 0;
+ if (ptr == '<') {
+ if (isCDATA(raw.substr(idx, raw.length() - idx)))
+ next_idx = raw.find("]]>", idx) + 2;
+ else
+ next_idx = raw.find('>', idx);
+ } else {
+ next_idx = raw.find('<', idx) - 1;
+ }
+ outp.push_back(raw.substr(idx, (next_idx - idx) + 1));
+ idx = next_idx + 1;
+ }
+
+ return outp;
+ };
+
+ static bool isTag(std::string token){
+ return token.length() > 2
+ && token[0] == '<'
+ && token[token.length() - 1] == '>'
+ && token[1] != '!';
+ }
+
+ static bool isCDATA(std::string token) {
+ return token.find("<![CDATA[") == 0;
+ }
+
+ // Assumes the string is CDATA
+ static std::string getCDATA(std::string token) {
+ // 9 = "<!CDATA[".length
+ // 3 = "]]>".length
+ if (token.length() < 9 + 3) return token;
+ return token.substr(9, token.length() - (9+3));
+ }
+
+ static bool isClosingTagOf(std::string token, std::string opening_tag){
+ if (!(token.length() > 3
+ && token[0] == '<'
+ && token[1] == '/'
+ && token[token.length() - 1] == '>'))
+ return false;
+ if (!isTag(opening_tag)) return false;
+
+ return getTag(token).substr(1, token.length() - 1) == getTag(opening_tag);
+ }
+
+ static bool IsSelfClosingTag(std::string token) {
+ return token.length() > 3
+ && token[0] == '<'
+ && token[token.length() - 1] == '>'
+ && token[token.length() - 2] == '/';
+ }
+
+ static std::string getTag(std::string full) {
+ if (!isTag(full)) return full;
+ auto space_idx = full.find_first_of(' ');
+ if (space_idx == std::variant_npos) {
+ return full.substr(1, full.length() - 2);
+ } else {
+ return full.substr(1, space_idx - 1);
+ }
+ }
+};
+
+struct ChannelInfo {
+ std::string Title = "";
+ std::string URL = "";
+
+ ChannelInfo(std::string url) {
+ URL = url;
+ };
+
+ ChannelInfo() {
+ };
+ void print() {
+ std::cout << Title <<'(' << URL << ")\n";
+ }
+};
+
+struct RSS_Entry {
+ std::string URL = "";
+ std::string Title = "";
+ std::string Contents = "";
+ std::tm pubDate = {};
+
+ RSS_Entry(XML_leaf node);
+ void print();
+};
+
+class RSS {
+ public:
+ std::string URL;
+ ChannelInfo channelInfo;
+ std::vector<RSS_Entry> Entries = {};
+
+ RSS(std::string url);
+
+ private:
+ std::string request();
+ void parse(std::string contents);
+};
+
+#endif // RSS_H
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..f936e85
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,21 @@
+#include "RSS.hpp"
+#include <curl/curl.h>
+#include <memory>
+
+int main() {
+ // auto tmp = RSS_leaf::TokenizeXML("<test> abc </test>");
+ auto rss = std::make_unique<RSS>("https://www.independent.co.uk/news/uk/rss");
+ rss->channelInfo.print();
+ int i = 0;
+ for (auto entry : rss->Entries) {
+ if (i > 3) break;
+ entry.print();
+ ++i;
+ }
+ // auto rss_1 = std::make_unique<RSS>("https://feeds.washingtonpost.com/rss/world");
+ // auto rss_2 = std::make_unique<RSS>("https://news.yahoo.com/rss/mostviewed");
+ // for (auto entry : rss_2->Entries)
+ // entry.print();
+
+ return 0;
+}