From 3a0640801be422a00336a9bb5db695b6addba6f5 Mon Sep 17 00:00:00 2001 From: Relintai Date: Sun, 21 Nov 2021 10:44:08 +0100 Subject: [PATCH] Extract next ling using the rel="next" attribute. --- main.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/main.cpp b/main.cpp index 0744df1..fcfaf65 100644 --- a/main.cpp +++ b/main.cpp @@ -167,12 +167,10 @@ void download_posts(Database *db, const String &site) { if (last_url == "") { should_skip = false; - + last_url = first_url; } - - RLOG_MSG("Post downloading started for " + site + " | last url: " + last_url); bool done = false; @@ -197,22 +195,17 @@ void download_posts(Database *db, const String &site) { RLOG_WARN("Couldn't extract data!\n"); } - HTMLParserTag *next_div = p.root->get_first("div", "class", "nav-next"); + HTMLParserTag *n_link_tag = p.root->get_first("a", "rel", "next"); - if (next_div) { - if (next_div->tags.size() == 1) { - HTMLParserTag *link = next_div->tags[0]; + if (n_link_tag) { + next_link = n_link_tag->get_attribute_value("href"); - next_link = link->get_attribute_value("href"); - - if (next_link == "") { - RLOG_WARN("Couldn't extract link!\n"); - } - } else { - RLOG_WARN("Couldn't extract next_div! (tags.size() != 1)!\n"); + if (next_link == "") { + RLOG_WARN("Couldn't extract link!\n"); } } else { - RLOG_WARN("Couldn't extract next_div!\n"); + next_link = ""; + RLOG_WARN("Couldn't extract link tag!\n"); } if (should_skip) {