#ifndef RSS_H #define RSS_H #include #include #include #include #include #include #include #include #include #include #include class XML_leaf { public: std::string Raw; std::string Name; std::string Value = ""; std::vector leafs{}; XML_leaf(std::string raw, std::string value) { Raw = raw; Name = getTag(raw); Value = value; }; XML_leaf(std::string raw) { // The smallest valid (?) XML element is <> if (raw.length() < 5 || raw[0] != '<') throw std::runtime_error("The XML " + raw + " is invalid"); auto tokens = TokenizeXML(trimSpaces(raw)); if (tokens.size() < 3 || !isTag(tokens[0])) throw new std::runtime_error("Invalid XML"); auto result = std::make_unique("<>", ""); std::stack> stack{}; // Building a tree for (size_t i = 1; i < tokens.size(); i++) { const auto currentToken = tokens[i]; if (!isTag(currentToken)) { if (stack.size() == 0) continue; stack.top()->setValue(currentToken); continue; } if (stack.size() == 0 || !isClosingTagOf(currentToken, stack.top()->Raw)) { if (IsSelfClosingTag(currentToken)) { stack.top()->leafs.push_back(std::move(XML_leaf(currentToken, ""))); continue; } stack.push(std::make_unique(currentToken, "")); continue; } auto complete = std::move(stack.top()); stack.pop(); if (stack.size() == 0) { // The node is closing a doc result.swap(complete); break; } stack.top()->leafs.push_back(std::move(*complete)); } this->Raw = result->Raw; this->Name = result->Name; this->Value = result->Value; this->leafs = result->leafs; } XML_leaf GetChild(std::string query) { for (auto child : leafs) { // std::cout << "Comparing " << child.Raw << " and " << query << std::endl; if (child.Name == query) return child; } return XML_leaf("<>", "empty"); } std::vector GetChildren(std::string query) { std::vector outp = {}; for (auto child : leafs) { // std::cout << "Comparing " << child.Raw << " and " << query << std::endl; if (child.Name == query) outp.push_back(child); } return outp; } std::string GetValue() { if (isCDATA(Value)) return getCDATA(Value); return Value; } private: void setValue(std::string newValue) { Value = newValue; } static std::string trimSpaces(std::string raw) { auto outp = std::regex_replace(raw, std::regex(" +"), " "); outp = std::regex_replace(outp, std::regex("> <"), "><"); return outp; } static std::vector TokenizeXML(std::string raw) { std::vector outp{}; // std::cout << "Called TokenizeXML()" << '\n'; ulong idx = 0; while (idx < raw.length()) { char ptr = raw[idx]; ulong next_idx = 0; if (ptr == '<') { if (isCDATA(raw.substr(idx, raw.length() - idx))) next_idx = raw.find("]]>", idx) + 2; else next_idx = raw.find('>', idx); } else { next_idx = raw.find('<', idx) - 1; } outp.push_back(raw.substr(idx, (next_idx - idx) + 1)); idx = next_idx + 1; } return outp; }; static bool isTag(std::string token){ return token.length() > 2 && token[0] == '<' && token[token.length() - 1] == '>' && token[1] != '!'; } static bool isCDATA(std::string token) { return token.find("".length if (token.length() < 9 + 3) return token; return token.substr(9, token.length() - (9+3)); } static bool isClosingTagOf(std::string token, std::string opening_tag){ if (!(token.length() > 3 && token[0] == '<' && token[1] == '/' && token[token.length() - 1] == '>')) return false; if (!isTag(opening_tag)) return false; return getTag(token).substr(1, token.length() - 1) == getTag(opening_tag); } static bool IsSelfClosingTag(std::string token) { return token.length() > 3 && token[0] == '<' && token[token.length() - 1] == '>' && token[token.length() - 2] == '/'; } static std::string getTag(std::string full) { if (!isTag(full)) return full; auto space_idx = full.find_first_of(' '); if (space_idx == std::variant_npos) { return full.substr(1, full.length() - 2); } else { return full.substr(1, space_idx - 1); } } }; struct ChannelInfo { std::string Title = ""; std::string URL = ""; ChannelInfo(std::string url) { URL = url; }; ChannelInfo() { }; void print() { std::cout << Title <<'(' << URL << ")\n"; } }; struct RSS_Entry { std::string URL = ""; std::string Title = ""; std::string Contents = ""; std::tm pubDate = {}; RSS_Entry(XML_leaf node); void print(); }; class RSS { public: std::string URL; ChannelInfo channelInfo; std::vector Entries = {}; RSS(std::string url); private: std::string request(); void parse(std::string contents); }; #endif // RSS_H