diff options
| author | Physick <96335032+DegustatorPonos@users.noreply.github.com> | 2026-05-10 18:06:16 +0500 |
|---|---|---|
| committer | Physick <96335032+DegustatorPonos@users.noreply.github.com> | 2026-05-10 18:06:16 +0500 |
| commit | 838eaa77fa389203a4c41751b36993575bbbfe04 (patch) | |
| tree | 706fb459923e590978bbacdde5fe2fb3bacf4b74 /src/RSS.hpp | |
Initial evening
Diffstat (limited to 'src/RSS.hpp')
| -rw-r--r-- | src/RSS.hpp | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/src/RSS.hpp b/src/RSS.hpp new file mode 100644 index 0000000..709125d --- /dev/null +++ b/src/RSS.hpp @@ -0,0 +1,222 @@ +#ifndef RSS_H +#define RSS_H + +#include <cstddef> +#include <ctime> +#include <memory> +#include <regex> +#include <stack> +#include <stdexcept> +#include <string> +#include <iostream> +#include <utility> +#include <variant> +#include <vector> + +class XML_leaf { + public: + std::string Raw; + std::string Name; + std::string Value = ""; + std::vector<XML_leaf> leafs{}; + + XML_leaf(std::string raw, std::string value) { + Raw = raw; + Name = getTag(raw); + Value = value; + }; + + XML_leaf(std::string raw) { + // The smallest valid (?) XML element is <></> + if (raw.length() < 5 || raw[0] != '<') + throw std::runtime_error("The XML " + raw + " is invalid"); + + auto tokens = TokenizeXML(trimSpaces(raw)); + + if (tokens.size() < 3 || !isTag(tokens[0])) + throw new std::runtime_error("Invalid XML"); + + auto result = std::make_unique<XML_leaf>("<>", ""); + std::stack<std::unique_ptr<XML_leaf>> stack{}; + + // Building a tree + for (size_t i = 1; i < tokens.size(); i++) { + const auto currentToken = tokens[i]; + + if (!isTag(currentToken)) { + if (stack.size() == 0) + continue; + stack.top()->setValue(currentToken); + continue; + } + + if (stack.size() == 0 || !isClosingTagOf(currentToken, stack.top()->Raw)) { + if (IsSelfClosingTag(currentToken)) { + stack.top()->leafs.push_back(std::move(XML_leaf(currentToken, ""))); + continue; + } + stack.push(std::make_unique<XML_leaf>(currentToken, "")); + continue; + } + + auto complete = std::move(stack.top()); + stack.pop(); + if (stack.size() == 0) { // The node is closing a doc + result.swap(complete); + break; + } + stack.top()->leafs.push_back(std::move(*complete)); + } + + this->Raw = result->Raw; + this->Name = result->Name; + this->Value = result->Value; + this->leafs = result->leafs; + } + + XML_leaf GetChild(std::string query) { + for (auto child : leafs) { + // std::cout << "Comparing " << child.Raw << " and " << query << std::endl; + if (child.Name == query) + return child; + } + return XML_leaf("<>", "empty"); + } + + std::vector<XML_leaf> GetChildren(std::string query) { + std::vector<XML_leaf> outp = {}; + for (auto child : leafs) { + // std::cout << "Comparing " << child.Raw << " and " << query << std::endl; + if (child.Name == query) + outp.push_back(child); + } + return outp; + } + + std::string GetValue() { + if (isCDATA(Value)) + return getCDATA(Value); + return Value; + } + + private: + void setValue(std::string newValue) { + Value = newValue; + } + + static std::string trimSpaces(std::string raw) { + auto outp = std::regex_replace(raw, std::regex(" +"), " "); + outp = std::regex_replace(outp, std::regex("> <"), "><"); + return outp; + } + + static std::vector<std::string> TokenizeXML(std::string raw) { + std::vector<std::string> outp{}; + // std::cout << "Called TokenizeXML()" << '\n'; + + ulong idx = 0; + while (idx < raw.length()) { + char ptr = raw[idx]; + ulong next_idx = 0; + if (ptr == '<') { + if (isCDATA(raw.substr(idx, raw.length() - idx))) + next_idx = raw.find("]]>", idx) + 2; + else + next_idx = raw.find('>', idx); + } else { + next_idx = raw.find('<', idx) - 1; + } + outp.push_back(raw.substr(idx, (next_idx - idx) + 1)); + idx = next_idx + 1; + } + + return outp; + }; + + static bool isTag(std::string token){ + return token.length() > 2 + && token[0] == '<' + && token[token.length() - 1] == '>' + && token[1] != '!'; + } + + static bool isCDATA(std::string token) { + return token.find("<![CDATA[") == 0; + } + + // Assumes the string is CDATA + static std::string getCDATA(std::string token) { + // 9 = "<!CDATA[".length + // 3 = "]]>".length + if (token.length() < 9 + 3) return token; + return token.substr(9, token.length() - (9+3)); + } + + static bool isClosingTagOf(std::string token, std::string opening_tag){ + if (!(token.length() > 3 + && token[0] == '<' + && token[1] == '/' + && token[token.length() - 1] == '>')) + return false; + if (!isTag(opening_tag)) return false; + + return getTag(token).substr(1, token.length() - 1) == getTag(opening_tag); + } + + static bool IsSelfClosingTag(std::string token) { + return token.length() > 3 + && token[0] == '<' + && token[token.length() - 1] == '>' + && token[token.length() - 2] == '/'; + } + + static std::string getTag(std::string full) { + if (!isTag(full)) return full; + auto space_idx = full.find_first_of(' '); + if (space_idx == std::variant_npos) { + return full.substr(1, full.length() - 2); + } else { + return full.substr(1, space_idx - 1); + } + } +}; + +struct ChannelInfo { + std::string Title = ""; + std::string URL = ""; + + ChannelInfo(std::string url) { + URL = url; + }; + + ChannelInfo() { + }; + void print() { + std::cout << Title <<'(' << URL << ")\n"; + } +}; + +struct RSS_Entry { + std::string URL = ""; + std::string Title = ""; + std::string Contents = ""; + std::tm pubDate = {}; + + RSS_Entry(XML_leaf node); + void print(); +}; + +class RSS { + public: + std::string URL; + ChannelInfo channelInfo; + std::vector<RSS_Entry> Entries = {}; + + RSS(std::string url); + + private: + std::string request(); + void parse(std::string contents); +}; + +#endif // RSS_H |
