summaryrefslogtreecommitdiff
path: root/src/RSS.hpp
diff options
context:
space:
mode:
authorPhysick <96335032+DegustatorPonos@users.noreply.github.com>2026-05-10 18:06:16 +0500
committerPhysick <96335032+DegustatorPonos@users.noreply.github.com>2026-05-10 18:06:16 +0500
commit838eaa77fa389203a4c41751b36993575bbbfe04 (patch)
tree706fb459923e590978bbacdde5fe2fb3bacf4b74 /src/RSS.hpp
Initial evening
Diffstat (limited to 'src/RSS.hpp')
-rw-r--r--src/RSS.hpp222
1 files changed, 222 insertions, 0 deletions
diff --git a/src/RSS.hpp b/src/RSS.hpp
new file mode 100644
index 0000000..709125d
--- /dev/null
+++ b/src/RSS.hpp
@@ -0,0 +1,222 @@
+#ifndef RSS_H
+#define RSS_H
+
+#include <cstddef>
+#include <ctime>
+#include <memory>
+#include <regex>
+#include <stack>
+#include <stdexcept>
+#include <string>
+#include <iostream>
+#include <utility>
+#include <variant>
+#include <vector>
+
+class XML_leaf {
+ public:
+ std::string Raw;
+ std::string Name;
+ std::string Value = "";
+ std::vector<XML_leaf> leafs{};
+
+ XML_leaf(std::string raw, std::string value) {
+ Raw = raw;
+ Name = getTag(raw);
+ Value = value;
+ };
+
+ XML_leaf(std::string raw) {
+ // The smallest valid (?) XML element is <></>
+ if (raw.length() < 5 || raw[0] != '<')
+ throw std::runtime_error("The XML " + raw + " is invalid");
+
+ auto tokens = TokenizeXML(trimSpaces(raw));
+
+ if (tokens.size() < 3 || !isTag(tokens[0]))
+ throw new std::runtime_error("Invalid XML");
+
+ auto result = std::make_unique<XML_leaf>("<>", "");
+ std::stack<std::unique_ptr<XML_leaf>> stack{};
+
+ // Building a tree
+ for (size_t i = 1; i < tokens.size(); i++) {
+ const auto currentToken = tokens[i];
+
+ if (!isTag(currentToken)) {
+ if (stack.size() == 0)
+ continue;
+ stack.top()->setValue(currentToken);
+ continue;
+ }
+
+ if (stack.size() == 0 || !isClosingTagOf(currentToken, stack.top()->Raw)) {
+ if (IsSelfClosingTag(currentToken)) {
+ stack.top()->leafs.push_back(std::move(XML_leaf(currentToken, "")));
+ continue;
+ }
+ stack.push(std::make_unique<XML_leaf>(currentToken, ""));
+ continue;
+ }
+
+ auto complete = std::move(stack.top());
+ stack.pop();
+ if (stack.size() == 0) { // The node is closing a doc
+ result.swap(complete);
+ break;
+ }
+ stack.top()->leafs.push_back(std::move(*complete));
+ }
+
+ this->Raw = result->Raw;
+ this->Name = result->Name;
+ this->Value = result->Value;
+ this->leafs = result->leafs;
+ }
+
+ XML_leaf GetChild(std::string query) {
+ for (auto child : leafs) {
+ // std::cout << "Comparing " << child.Raw << " and " << query << std::endl;
+ if (child.Name == query)
+ return child;
+ }
+ return XML_leaf("<>", "empty");
+ }
+
+ std::vector<XML_leaf> GetChildren(std::string query) {
+ std::vector<XML_leaf> outp = {};
+ for (auto child : leafs) {
+ // std::cout << "Comparing " << child.Raw << " and " << query << std::endl;
+ if (child.Name == query)
+ outp.push_back(child);
+ }
+ return outp;
+ }
+
+ std::string GetValue() {
+ if (isCDATA(Value))
+ return getCDATA(Value);
+ return Value;
+ }
+
+ private:
+ void setValue(std::string newValue) {
+ Value = newValue;
+ }
+
+ static std::string trimSpaces(std::string raw) {
+ auto outp = std::regex_replace(raw, std::regex(" +"), " ");
+ outp = std::regex_replace(outp, std::regex("> <"), "><");
+ return outp;
+ }
+
+ static std::vector<std::string> TokenizeXML(std::string raw) {
+ std::vector<std::string> outp{};
+ // std::cout << "Called TokenizeXML()" << '\n';
+
+ ulong idx = 0;
+ while (idx < raw.length()) {
+ char ptr = raw[idx];
+ ulong next_idx = 0;
+ if (ptr == '<') {
+ if (isCDATA(raw.substr(idx, raw.length() - idx)))
+ next_idx = raw.find("]]>", idx) + 2;
+ else
+ next_idx = raw.find('>', idx);
+ } else {
+ next_idx = raw.find('<', idx) - 1;
+ }
+ outp.push_back(raw.substr(idx, (next_idx - idx) + 1));
+ idx = next_idx + 1;
+ }
+
+ return outp;
+ };
+
+ static bool isTag(std::string token){
+ return token.length() > 2
+ && token[0] == '<'
+ && token[token.length() - 1] == '>'
+ && token[1] != '!';
+ }
+
+ static bool isCDATA(std::string token) {
+ return token.find("<![CDATA[") == 0;
+ }
+
+ // Assumes the string is CDATA
+ static std::string getCDATA(std::string token) {
+ // 9 = "<!CDATA[".length
+ // 3 = "]]>".length
+ if (token.length() < 9 + 3) return token;
+ return token.substr(9, token.length() - (9+3));
+ }
+
+ static bool isClosingTagOf(std::string token, std::string opening_tag){
+ if (!(token.length() > 3
+ && token[0] == '<'
+ && token[1] == '/'
+ && token[token.length() - 1] == '>'))
+ return false;
+ if (!isTag(opening_tag)) return false;
+
+ return getTag(token).substr(1, token.length() - 1) == getTag(opening_tag);
+ }
+
+ static bool IsSelfClosingTag(std::string token) {
+ return token.length() > 3
+ && token[0] == '<'
+ && token[token.length() - 1] == '>'
+ && token[token.length() - 2] == '/';
+ }
+
+ static std::string getTag(std::string full) {
+ if (!isTag(full)) return full;
+ auto space_idx = full.find_first_of(' ');
+ if (space_idx == std::variant_npos) {
+ return full.substr(1, full.length() - 2);
+ } else {
+ return full.substr(1, space_idx - 1);
+ }
+ }
+};
+
+struct ChannelInfo {
+ std::string Title = "";
+ std::string URL = "";
+
+ ChannelInfo(std::string url) {
+ URL = url;
+ };
+
+ ChannelInfo() {
+ };
+ void print() {
+ std::cout << Title <<'(' << URL << ")\n";
+ }
+};
+
+struct RSS_Entry {
+ std::string URL = "";
+ std::string Title = "";
+ std::string Contents = "";
+ std::tm pubDate = {};
+
+ RSS_Entry(XML_leaf node);
+ void print();
+};
+
+class RSS {
+ public:
+ std::string URL;
+ ChannelInfo channelInfo;
+ std::vector<RSS_Entry> Entries = {};
+
+ RSS(std::string url);
+
+ private:
+ std::string request();
+ void parse(std::string contents);
+};
+
+#endif // RSS_H