Implemented to_string, and print for HTMLParser. also indexing fixes, and changed attributes and tags into pointers.

This commit is contained in:
Relintai 2021-11-18 12:03:43 +01:00
parent 65ddfe2353
commit c23afa0332
2 changed files with 104 additions and 19 deletions

View File

@ -1,6 +1,23 @@
#include "html_parser.h" #include "html_parser.h"
String HTMLParserAttribute::to_string() {
if (single) {
return attribute;
}
if (data.find('"' == -1)) {
return attribute + "=\"" + data + "\"";
} else {
return attribute + "=\'" + data + "\'";
}
}
void HTMLParserAttribute::print() {
to_string().print();
}
HTMLParserAttribute::HTMLParserAttribute() { HTMLParserAttribute::HTMLParserAttribute() {
single = false;
} }
HTMLParserAttribute::~HTMLParserAttribute() { HTMLParserAttribute::~HTMLParserAttribute() {
@ -81,7 +98,7 @@ void HTMLParserTag::process() {
} }
//grab the tag itself //grab the tag itself
tag = tag_text.substr(0, fspc_index + 1); tag = tag_text.substr(0, fspc_index);
String args = tag_text.substr(fspc_index + 1, tag_text.size() - fspc_index - 1); String args = tag_text.substr(fspc_index + 1, tag_text.size() - fspc_index - 1);
parse_args(args); parse_args(args);
@ -109,16 +126,17 @@ void HTMLParserTag::parse_args(const String &args) {
int equals_index = args.find('=', i); int equals_index = args.find('=', i);
HTMLParserAttribute a; HTMLParserAttribute *a = new HTMLParserAttribute();
if (equals_index == -1) { if (equals_index == -1) {
a.attribute = args.substr(i, args.size() - 1); a->attribute = args.substr(i, args.size() - i);
a->single = true;
attributes.push_back(a); attributes.push_back(a);
return; return;
} }
a.attribute = args.substr(i, equals_index - i); a->attribute = args.substr(i, equals_index - i);
//todo //todo
//a.trim(); //a.trim();
@ -150,25 +168,63 @@ void HTMLParserTag::parse_args(const String &args) {
find_char = c; find_char = c;
} }
int end_index = args.find(find_char, next_char_index); int end_index = args.find(find_char, next_char_index);
if (end_index == -1) { if (end_index == -1) {
//missing closing ' or " if c is ' or " //missing closing ' or " if c is ' or "
//else missing parameter //else missing parameter
a.data = args.substr(next_char_index, args.size() - next_char_index - 1); a->data = args.substr(next_char_index, args.size() - next_char_index - 1);
attributes.push_back(a); attributes.push_back(a);
return; return;
} }
a.data = args.substr(next_char_index, end_index - next_char_index); a->data = args.substr(next_char_index, end_index - next_char_index);
attributes.push_back(a); attributes.push_back(a);
i = end_index + 1; i = end_index + 1;
} }
} }
String HTMLParserTag::to_string() {
String s;
if (type == HTML_PARSER_TAG_TYPE_CONTENT) {
s = data;
} else if (type == HTML_PARSER_TAG_TYPE_OPENING_TAG) {
s = "<" + tag;
for (int i = 0; i < attributes.size(); ++i) {
s += " " + attributes[i]->to_string();
}
s += ">";
} else if (type == HTML_PARSER_TAG_TYPE_CLOSING_TAG) {
s = "</" + tag + ">";
} else if (type == HTML_PARSER_TAG_TYPE_SELF_CLOSING_TAG) {
s = "<" + tag;
for (int i = 0; i < attributes.size(); ++i) {
s += " " + attributes[i]->to_string();
}
s += "/>";
} else if (type == HTML_PARSER_TAG_TYPE_COMMENT) {
s = "<!-- " + data + " -->";
} else if (type == HTML_PARSER_TAG_TYPE_DOCTYPE) {
s = "<!doctype " + data + ">";
}
for (int i = 0; i < tags.size(); ++i) {
s += tags[i]->to_string();
}
return s;
}
void HTMLParserTag::print() {
to_string().print();
}
HTMLParserTag::HTMLParserTag() { HTMLParserTag::HTMLParserTag() {
type = HTMLParserTag::HTML_PARSER_TAG_TYPE_NONE; type = HTMLParserTag::HTML_PARSER_TAG_TYPE_NONE;
} }
@ -177,16 +233,18 @@ HTMLParserTag::~HTMLParserTag() {
} }
void HTMLParser::parse(const String &data) { void HTMLParser::parse(const String &data) {
Vector<HTMLParserTag> tags; Vector<HTMLParserTag *> tags;
//split into tags
for (int i = 0; i < data.size(); ++i) { for (int i = 0; i < data.size(); ++i) {
if (data[i] == '<') { if (data[i] == '<') {
for (int j = i + 1; j < data.size(); ++j) { for (int j = i + 1; j < data.size(); ++j) {
if (data[j] == '>') { if (data[j] == '>') {
HTMLParserTag t; HTMLParserTag *t = new HTMLParserTag();
t.data = data.substr(i, j - i + 1); t->data = data.substr(i, j - i + 1);
t.process(); t->process();
t->print();
tags.push_back(t); tags.push_back(t);
@ -197,10 +255,10 @@ void HTMLParser::parse(const String &data) {
} else { } else {
for (int j = i + 1; j < data.size(); ++j) { for (int j = i + 1; j < data.size(); ++j) {
if (data[j] == '<') { if (data[j] == '<') {
HTMLParserTag t; HTMLParserTag *t = new HTMLParserTag();
t.data = data.substr(i, j - i); t->data = data.substr(i, j - i);
t.type = HTMLParserTag::HTML_PARSER_TAG_TYPE_CONTENT; t->type = HTMLParserTag::HTML_PARSER_TAG_TYPE_CONTENT;
tags.push_back(t); tags.push_back(t);
@ -210,9 +268,26 @@ void HTMLParser::parse(const String &data) {
} }
} }
} }
//process tags into hierarchical order
//Vector<HTMLParserTag> tag_stack;
//for (int i = 0; i < tags.size(); ++i) {
//}
for (int i = 0; i < tags.size(); ++i) {
delete tags[i];
}
}
String HTMLParser::to_string() {
return html->to_string();
}
void HTMLParser::print() {
html->print();
} }
HTMLParser::HTMLParser() { HTMLParser::HTMLParser() {
html = nullptr;
} }
HTMLParser::~HTMLParser() { HTMLParser::~HTMLParser() {

View File

@ -1,13 +1,17 @@
#ifndef HTML_BUILDER_H #ifndef HTML_BUILDER_H
#define HTML_BUILDER_H #define HTML_BUILDER_H
#include "core/string.h"
#include "core/containers/vector.h" #include "core/containers/vector.h"
#include "core/string.h"
class HTMLParserAttribute { class HTMLParserAttribute {
public: public:
String attribute; String attribute;
String data; String data;
bool single;
String to_string();
void print();
HTMLParserAttribute(); HTMLParserAttribute();
virtual ~HTMLParserAttribute(); virtual ~HTMLParserAttribute();
@ -30,11 +34,14 @@ public:
String tag; String tag;
String data; String data;
Vector<HTMLParserTag> tags; Vector<HTMLParserTag*> tags;
Vector<HTMLParserAttribute> attributes; Vector<HTMLParserAttribute*> attributes;
void process(); void process();
void parse_args(const String& args); void parse_args(const String &args);
String to_string();
void print();
HTMLParserTag(); HTMLParserTag();
virtual ~HTMLParserTag(); virtual ~HTMLParserTag();
@ -42,11 +49,14 @@ public:
class HTMLParser { class HTMLParser {
public: public:
HTMLParserTag html; HTMLParserTag *html;
void parse(const String &data); void parse(const String &data);
//void parse_tag(const String &data, const int index); //void parse_tag(const String &data, const int index);
String to_string();
void print();
HTMLParser(); HTMLParser();
virtual ~HTMLParser(); virtual ~HTMLParser();
}; };