diff options
| author | Physick <96335032+DegustatorPonos@users.noreply.github.com> | 2026-05-10 18:06:16 +0500 |
|---|---|---|
| committer | Physick <96335032+DegustatorPonos@users.noreply.github.com> | 2026-05-10 18:06:16 +0500 |
| commit | 838eaa77fa389203a4c41751b36993575bbbfe04 (patch) | |
| tree | 706fb459923e590978bbacdde5fe2fb3bacf4b74 | |
Initial evening
| -rw-r--r-- | Makefile | 5 | ||||
| -rwxr-xr-x | app | bin | 0 -> 677224 bytes | |||
| -rw-r--r-- | src/RSS.cpp | 81 | ||||
| -rw-r--r-- | src/RSS.hpp | 222 | ||||
| -rw-r--r-- | src/main.cpp | 21 |
5 files changed, 329 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6f05802 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +SOURCEFILES := src/main.cpp src/RSS.cpp +GPPFLAGS := -Wall -Wextra -std=c++23 + +all: + g++ $(GPPFLAGS) $(SOURCEFILES) -lcurl -o app Binary files differdiff --git a/src/RSS.cpp b/src/RSS.cpp new file mode 100644 index 0000000..a1c0dde --- /dev/null +++ b/src/RSS.cpp @@ -0,0 +1,81 @@ +#include "RSS.hpp" +#include <iomanip> +#include <iostream> +#include <ctime> +#include <memory> +#include <stdexcept> +#include <curl/curl.h> + +RSS::RSS(std::string url) { + URL = url; + channelInfo = ChannelInfo(url); + parse(request()); +} + +size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* userp) { + size_t totalSize = size * nmemb; + userp->append((char*)contents, totalSize); + return totalSize; +} + +std::string RSS::request() { + std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl ( + curl_easy_init(), &curl_easy_cleanup); + + if (!curl) { + throw std::runtime_error("Failed to initialize CURL"); + } + + std::string data; + + curl_easy_setopt(curl.get(), CURLOPT_URL, URL.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &data); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + + auto res = curl_easy_perform(curl.get()); + if (res != CURLE_OK) { + throw std::runtime_error(std::string("CURL request failed: ") + curl_easy_strerror(res)); + } + + curl_easy_cleanup(curl.get()); + return data; +} + +void RSS::parse(std::string contents) { + // std::cout << "starting tokenization..." << std::endl; + try { + auto leaf = XML_leaf(contents).GetChild("channel"); + if (leaf.Raw == "<>") + throw new std::runtime_error("The feed does not contain <channel> element"); + + this->channelInfo.Title = leaf.GetChild("title").Value; + + auto items = leaf.GetChildren("item"); + for (auto item : items) + Entries.push_back(RSS_Entry(item)); + } catch (const char *err) { + std::cout << "Failed to parse feed '" << URL << "' :" << err << std::endl; + return; + } +} + +RSS_Entry::RSS_Entry(XML_leaf node) { + Title = node.GetChild("title").GetValue(); + URL = node.GetChild("link").GetValue(); + Contents = node.GetChild("description").GetValue(); + + // auto pubDateField = node.GetChild("pubDate"); + // if (pubDateField.Value.length() == 0) return; + // std::cout << "pub date" << pubDateField.Value << std::endl; + // strptime(pubDateField.Value.c_str(), "%Y-%m-%dT%H:%M:%SZ", &pubDate); +} + +void RSS_Entry::print() { + std::cout + << "==============================\n" + << Title << '\n' + // << std::put_time(&pubDate, "%Y-%m-%d %H:%M:%S") << '\n' + << Contents << '\n' + << URL<< '\n'; +} diff --git a/src/RSS.hpp b/src/RSS.hpp new file mode 100644 index 0000000..709125d --- /dev/null +++ b/src/RSS.hpp @@ -0,0 +1,222 @@ +#ifndef RSS_H +#define RSS_H + +#include <cstddef> +#include <ctime> +#include <memory> +#include <regex> +#include <stack> +#include <stdexcept> +#include <string> +#include <iostream> +#include <utility> +#include <variant> +#include <vector> + +class XML_leaf { + public: + std::string Raw; + std::string Name; + std::string Value = ""; + std::vector<XML_leaf> leafs{}; + + XML_leaf(std::string raw, std::string value) { + Raw = raw; + Name = getTag(raw); + Value = value; + }; + + XML_leaf(std::string raw) { + // The smallest valid (?) XML element is <></> + if (raw.length() < 5 || raw[0] != '<') + throw std::runtime_error("The XML " + raw + " is invalid"); + + auto tokens = TokenizeXML(trimSpaces(raw)); + + if (tokens.size() < 3 || !isTag(tokens[0])) + throw new std::runtime_error("Invalid XML"); + + auto result = std::make_unique<XML_leaf>("<>", ""); + std::stack<std::unique_ptr<XML_leaf>> stack{}; + + // Building a tree + for (size_t i = 1; i < tokens.size(); i++) { + const auto currentToken = tokens[i]; + + if (!isTag(currentToken)) { + if (stack.size() == 0) + continue; + stack.top()->setValue(currentToken); + continue; + } + + if (stack.size() == 0 || !isClosingTagOf(currentToken, stack.top()->Raw)) { + if (IsSelfClosingTag(currentToken)) { + stack.top()->leafs.push_back(std::move(XML_leaf(currentToken, ""))); + continue; + } + stack.push(std::make_unique<XML_leaf>(currentToken, "")); + continue; + } + + auto complete = std::move(stack.top()); + stack.pop(); + if (stack.size() == 0) { // The node is closing a doc + result.swap(complete); + break; + } + stack.top()->leafs.push_back(std::move(*complete)); + } + + this->Raw = result->Raw; + this->Name = result->Name; + this->Value = result->Value; + this->leafs = result->leafs; + } + + XML_leaf GetChild(std::string query) { + for (auto child : leafs) { + // std::cout << "Comparing " << child.Raw << " and " << query << std::endl; + if (child.Name == query) + return child; + } + return XML_leaf("<>", "empty"); + } + + std::vector<XML_leaf> GetChildren(std::string query) { + std::vector<XML_leaf> outp = {}; + for (auto child : leafs) { + // std::cout << "Comparing " << child.Raw << " and " << query << std::endl; + if (child.Name == query) + outp.push_back(child); + } + return outp; + } + + std::string GetValue() { + if (isCDATA(Value)) + return getCDATA(Value); + return Value; + } + + private: + void setValue(std::string newValue) { + Value = newValue; + } + + static std::string trimSpaces(std::string raw) { + auto outp = std::regex_replace(raw, std::regex(" +"), " "); + outp = std::regex_replace(outp, std::regex("> <"), "><"); + return outp; + } + + static std::vector<std::string> TokenizeXML(std::string raw) { + std::vector<std::string> outp{}; + // std::cout << "Called TokenizeXML()" << '\n'; + + ulong idx = 0; + while (idx < raw.length()) { + char ptr = raw[idx]; + ulong next_idx = 0; + if (ptr == '<') { + if (isCDATA(raw.substr(idx, raw.length() - idx))) + next_idx = raw.find("]]>", idx) + 2; + else + next_idx = raw.find('>', idx); + } else { + next_idx = raw.find('<', idx) - 1; + } + outp.push_back(raw.substr(idx, (next_idx - idx) + 1)); + idx = next_idx + 1; + } + + return outp; + }; + + static bool isTag(std::string token){ + return token.length() > 2 + && token[0] == '<' + && token[token.length() - 1] == '>' + && token[1] != '!'; + } + + static bool isCDATA(std::string token) { + return token.find("<![CDATA[") == 0; + } + + // Assumes the string is CDATA + static std::string getCDATA(std::string token) { + // 9 = "<!CDATA[".length + // 3 = "]]>".length + if (token.length() < 9 + 3) return token; + return token.substr(9, token.length() - (9+3)); + } + + static bool isClosingTagOf(std::string token, std::string opening_tag){ + if (!(token.length() > 3 + && token[0] == '<' + && token[1] == '/' + && token[token.length() - 1] == '>')) + return false; + if (!isTag(opening_tag)) return false; + + return getTag(token).substr(1, token.length() - 1) == getTag(opening_tag); + } + + static bool IsSelfClosingTag(std::string token) { + return token.length() > 3 + && token[0] == '<' + && token[token.length() - 1] == '>' + && token[token.length() - 2] == '/'; + } + + static std::string getTag(std::string full) { + if (!isTag(full)) return full; + auto space_idx = full.find_first_of(' '); + if (space_idx == std::variant_npos) { + return full.substr(1, full.length() - 2); + } else { + return full.substr(1, space_idx - 1); + } + } +}; + +struct ChannelInfo { + std::string Title = ""; + std::string URL = ""; + + ChannelInfo(std::string url) { + URL = url; + }; + + ChannelInfo() { + }; + void print() { + std::cout << Title <<'(' << URL << ")\n"; + } +}; + +struct RSS_Entry { + std::string URL = ""; + std::string Title = ""; + std::string Contents = ""; + std::tm pubDate = {}; + + RSS_Entry(XML_leaf node); + void print(); +}; + +class RSS { + public: + std::string URL; + ChannelInfo channelInfo; + std::vector<RSS_Entry> Entries = {}; + + RSS(std::string url); + + private: + std::string request(); + void parse(std::string contents); +}; + +#endif // RSS_H diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..f936e85 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,21 @@ +#include "RSS.hpp" +#include <curl/curl.h> +#include <memory> + +int main() { + // auto tmp = RSS_leaf::TokenizeXML("<test> abc </test>"); + auto rss = std::make_unique<RSS>("https://www.independent.co.uk/news/uk/rss"); + rss->channelInfo.print(); + int i = 0; + for (auto entry : rss->Entries) { + if (i > 3) break; + entry.print(); + ++i; + } + // auto rss_1 = std::make_unique<RSS>("https://feeds.washingtonpost.com/rss/world"); + // auto rss_2 = std::make_unique<RSS>("https://news.yahoo.com/rss/mostviewed"); + // for (auto entry : rss_2->Entries) + // entry.print(); + + return 0; +} |
