diff --git a/SConstruct b/SConstruct index ee89cfe..fe448fc 100644 --- a/SConstruct +++ b/SConstruct @@ -138,7 +138,11 @@ env_base.Append(CXX=["-o3"]) #env_base.Append(CXX=["-g2"]) env = env_base.Clone() + Export("env") + +SConscript("libs/bbcpp/SCsub") + SConscript("core/SCsub") for d in database_list: diff --git a/libs/HEADS b/libs/HEADS index e0f29fe..4277590 100644 --- a/libs/HEADS +++ b/libs/HEADS @@ -1,3 +1,4 @@ RapidJSON 0ccdbf364c577803e2a751f5aededce935314313 brynet b0d13e7419628d0f7051a2bb310daaf8a506e08b -rapidxml 1.13 \ No newline at end of file +rapidxml 1.13 +bbcpp a035c4942ed9e5277833fe80e444406f959c3d88 \ No newline at end of file diff --git a/libs/bbcpp/BBDocument.cpp b/libs/bbcpp/BBDocument.cpp new file mode 100755 index 0000000..142bc23 --- /dev/null +++ b/libs/bbcpp/BBDocument.cpp @@ -0,0 +1,114 @@ +#include +#include +#include "BBDocument.h" + +namespace bbcpp +{ + +BBNode::BBNode(NodeType nodeType, const std::string& name) + : _name(name), _nodeType(nodeType) +{ + // nothing to do +} + +BBText &BBDocument::newText(const std::string &text) +{ + // first try to append this text to the item on top of the stack + // if that is a BBText object, if not, then see if the last element + // pushed to BBDocument is a text item, and if so append this to that + // text + if (_stack.size() > 0 && _stack.top()->getChildren().size() > 0) + { + auto totalChildCnt = _stack.top()->getChildren().size(); + auto textnode = _stack.top()->getChildren().at(totalChildCnt - 1)->downCast(false); + if (textnode) + { + textnode->append(text); + return *textnode; + } + } + else if (_children.size() > 0) + { + auto textnode = _children.back()->downCast(false); + if (textnode) + { + textnode->append(text); + return *textnode; + } + } + + // ok, there was no previous text element so we wil either add this text + // element as a child of the top item OR we'll add it to the BBDocucment + // object + auto textNode = std::make_shared(text); + if (_stack.size() > 0) + { + _stack.top()->appendChild(textNode); + } + else + { + // add this node to the document-node if needed + appendChild(textNode); + } + + return *textNode; +} + +BBElement& BBDocument::newElement(const std::string &name) +{ + auto newNode = std::make_shared(name); + if (_stack.size() > 0) + { + _stack.top()->appendChild(newNode); + } + else + { + // add this node to the document-node if needed + appendChild(newNode); + } + + _stack.push(newNode); + return *newNode; +} + +BBElement& BBDocument::newClosingElement(const std::string& name) +{ + auto newNode = std::make_shared(name, BBElement::CLOSING); + if (_stack.size() > 0) + { + _stack.top()->appendChild(newNode); + _stack.pop(); + } + else + { + appendChild(newNode); + } + + return *newNode; +} + +BBElement& BBDocument::newKeyValueElement(const std::string& name, const ParameterMap& pairs) +{ + auto newNode = std::make_shared(name, BBElement::PARAMETER); + if (_stack.size() > 0) + { + _stack.top()->appendChild(newNode); + } + else + { + // add this node to the document-node if needed + appendChild(newNode); + } + + for (const auto& kv : pairs) + { + newNode->setOrAddParameter(kv.first, kv.second); + } + + _stack.push(newNode); + return *newNode; +} + + + +} // namespace diff --git a/libs/bbcpp/BBDocument.h b/libs/bbcpp/BBDocument.h new file mode 100755 index 0000000..b0ffa7f --- /dev/null +++ b/libs/bbcpp/BBDocument.h @@ -0,0 +1,605 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace bbcpp +{ + +inline bool IsDigit(char c) +{ + return ('0' <= c && c <= '9'); +} + +inline bool IsAlpha(char c) +{ + static const char alpha[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + return (std::strchr(alpha, c) != nullptr); +} + +inline bool IsAlNum(char c) +{ + return IsAlpha(c) || IsDigit(c); +} + +inline bool IsSpace(char c) +{ + return std::isspace(static_cast(c)) != 0; +} + +class BBNode; +class BBText; +class BBElement; +class BBDocument; + +using BBNodePtr = std::shared_ptr; +using BBTextPtr = std::shared_ptr; +using BBElementPtr = std::shared_ptr; + +using BBNodeWeakPtr = std::weak_ptr; +using BBNodeList = std::vector; +using BBNodeStack = std::stack; +using BBDocumentPtr = std::shared_ptr; + +using ParameterMap = std::map; + +class BBNode : public std::enable_shared_from_this +{ + template + NewTypePtrT cast(BBNodePtr node, bool bThrowOnFail) + { + if (node == nullptr && !bThrowOnFail) + { + return NewTypePtrT(); + } + else if (node == nullptr) + { + throw std::invalid_argument("Cannot downcast BBNode, object is null"); + } + + NewTypePtrT newobj = std::dynamic_pointer_cast(node); + + if (newobj == nullptr && bThrowOnFail) + { + throw std::invalid_argument("Cannot downcast, object is not correct type"); + } + + return newobj; + } + + template + NewTypePtrT cast(BBNodePtr node, bool bThrowOnFail) const + { + if (node == nullptr && !bThrowOnFail) + { + return NewTypePtrT(); + } + else if (node == nullptr) + { + throw std::invalid_argument("Cannot downcast, BBNode object is null"); + } + + NewTypePtrT newobj = std::dynamic_pointer_cast(node); + + if (newobj == nullptr && bThrowOnFail) + { + throw std::invalid_argument("Cannot downcast, object is not correct type"); + } + + return newobj; + } + +public: + enum class NodeType + { + DOCUMENT, + ELEMENT, // [b]bold[/b], [QUOTE], [QUOTE=Username;1234], [QUOTE user=Bob] + TEXT, // plain text + ATTRIBUTE + }; + + BBNode(NodeType nodeType, const std::string& name); + virtual ~BBNode() = default; + + const std::string& getNodeName() const { return _name; } + NodeType getNodeType() const { return _nodeType; } + BBNodePtr getParent() const { return BBNodePtr(_parent); } + + const BBNodeList& getChildren() const { return _children; } + + virtual void appendChild(BBNodePtr node) + { + _children.push_back(node); + node->_parent = shared_from_this(); + } + + template + NewTypePtrT downCast(bool bThrowOnFail = true) + { + return cast(shared_from_this(), bThrowOnFail); + } + + template + NewTypePtrT downCast(bool bThrowOnFail = true) const + { + return cast(shared_from_this(), bThrowOnFail); + } + +protected: + std::string _name; + NodeType _nodeType; + BBNodeWeakPtr _parent; + BBNodeList _children; + + friend class BBText; + friend class BBDocument; + friend class BBElement; +}; + +class BBText : public BBNode +{ +public: + BBText(const std::string& value) + : BBNode(BBNode::NodeType::TEXT, value) + { + // nothing to do + } + + virtual ~BBText() = default; + + virtual const std::string getText() const { return _name; } + + void append(const std::string& text) + { + _name.append(text); + } +}; + +class BBElement : public BBNode +{ +public: + enum ElementType + { + SIMPLE, // [b]bold[/b], [code]print("hello")[/code] + VALUE, // [QUOTE=Username;12345]This is a quote[/QUOTE] (mostly used by vBulletin) + PARAMETER, // [QUOTE user=Bob userid=1234]This is a quote[/QUOTE] + CLOSING // [/b], [/code] + }; + + BBElement(const std::string& name, ElementType et = BBElement::SIMPLE) + : BBNode(BBNode::NodeType::ELEMENT, name), + _elementType(et) + { + // nothing to do + } + + virtual ~BBElement() = default; + + const ElementType getElementType() const { return _elementType; } + + void setOrAddParameter(const std::string& key, const std::string& value, bool addIfNotExists = true) + { + _parameters.insert({key,value}); + } + + std::string getParameter(const std::string& key, bool bDoThrow = true) + { + if (_parameters.find(key) == _parameters.end() && bDoThrow) + { + throw std::invalid_argument("Undefine attribute '" + key + "'"); + } + + return _parameters.at(key); + } + + const ParameterMap& getParameters() const { return _parameters; } + +private: + ElementType _elementType = BBElement::SIMPLE; + ParameterMap _parameters; +}; + +class BBDocument : public BBNode +{ + BBDocument() + : BBNode(BBNode::NodeType::DOCUMENT, "#document") + { + // nothing to do + } + + template + citerator parseText(citerator begin, citerator end) + { + auto endingChar = begin; + + for (auto it = begin; it != end; it++) + { + if (*it == '[') + { + endingChar = it; + break; + } + } + + if (endingChar == begin) + { + endingChar = end; + } + + newText(std::string(begin, endingChar)); + + return endingChar; + } + + template + citerator parseElementName(citerator begin, citerator end, std::string& buf) + { + auto start = begin; + std::stringstream str; + + for (auto it = start; it != end; it++) + { + // TODO: alphanumeric names only? + if (bbcpp::IsAlNum((char)*it)) + { + str << *it; + } + else + { + buf.assign(str.str()); + return it; + } + } + + return start; + } + + template + citerator parseValue(citerator begin, citerator end, std::string& value) + { + auto start = begin; + while (bbcpp::IsSpace(*start) && start != end) + { + start++; + } + + if (start == end) + { + // we got to the end and there was nothing but spaces + // so return our starting point so the caller can create + // a text node with those spaces + return end; + } + + std::stringstream temp; + + for (auto it = start; it != end; it++) + { + if (bbcpp::IsAlNum(*it)) + { + temp << *it; + } + else if (*it == ']') + { + value.assign(temp.str()); + return it; + } + else if(*it == '#') + { + //is color + temp << *it; + } + else if (*it == ':' || *it == '/' || *it == '.' || *it == '&' + || *it == '?' || *it == '$' || *it == '-' || *it == '+' + || *it == '*' || *it == '(' || *it == ')' || *it == ',') + { + //is url + temp << *it; + } + else + { + // some invalid character, so return the point where + // we stopped parsing + return it; + } + } + + // if we get here then we're at the end, so we return the starting + // point so the callerd can create a text node + return end; + } + + template + citerator parseKey(citerator begin, citerator end, std::string& keyname) + { + auto start = begin; + while (bbcpp::IsSpace(*start) && start != end) + { + start++; + } + + if (start == end) + { + // we got to the end and there was nothing but spaces + // so return our end point so the caller can create + // a text node with those spaces + return start; + } + + std::stringstream temp; + + // TODO: need to handle spaces after the key name and before + // the equal sign (ie. "[style color =red]") + for (auto it = start; it != end; it++) + { + if (bbcpp::IsAlNum(*it)) + { + temp << *it; + } + else if (*it == '=') + { + keyname.assign(temp.str()); + return it; + } + else + { + // some invalid character, so return the point where + // we stopped parsing + return it; + } + } + + // if we get here then we're at the end, so we return the starting + // point so the callerd can create a text node + return end; + } + + template + citerator parseKeyValuePairs(citerator begin, citerator end, ParameterMap& pairs) + { + auto current = begin; + std::string tempKey; + std::string tempVal; + + while (current != end) + { + current = parseKey(current, end, tempKey); + if (tempKey.empty()) + { + pairs.clear(); + return current; + } + + if (*current != '=') + { + pairs.clear(); + return current; + } + + current = std::next(current); + current = parseValue(current, end, tempVal); + + if (tempKey.empty() || tempVal.empty()) + { + pairs.clear(); + return current; + } + + pairs.insert(std::make_pair(tempKey, tempVal)); + if (*current == ']') + { + // this is the only valid condition for key/value pairs so we do + // not want to clear `pairs` like in the other cases + return current; + } + } + + return end; + } + + template + citerator parseElement(citerator begin, citerator end) + { + bool closingTag = false; + + // the first non-[ and non-/ character + auto nameStart = std::next(begin); + + std::string elementName; + + // this might be a closing tag so mark it + if (*nameStart == '/') + { + closingTag = true; + nameStart = std::next(nameStart); + } + + auto nameEnd = parseElementName(nameStart, end, elementName); + + // no valid name was found, so bail out + if (elementName.empty()) + { + newText(std::string{*begin}); + return nameEnd; + } + else if (nameEnd == end) + { + newText(std::string(begin,end)); + return end; + } + + if (*nameEnd == ']') + { + // end of element + } + else if (*nameEnd == '=') + { + // possibly a QUOTE value element + // possibly key-value pairs of a QUOTE + ParameterMap pairs; + + auto kvEnd = parseKeyValuePairs(nameStart, end, pairs); + if (pairs.size() == 0) + { + newText(std::string(begin, kvEnd)); + return kvEnd; + } + else + { + newKeyValueElement(elementName, pairs); + // TODO: add 'pairs' + return std::next(kvEnd); + } + } + else if (*nameEnd == ' ') + { + // possibly key-value pairs of a QUOTE + ParameterMap pairs; + + auto kvEnd = parseKeyValuePairs(nameEnd, end, pairs); + if (pairs.size() == 0) + { + newText(std::string(begin, kvEnd)); + return kvEnd; + } + else + { + newKeyValueElement(elementName, pairs); + // TODO: add 'pairs' + return std::next(kvEnd); + } + } + else + { + // some invalid char proceeded the element name, so it's not actually a + // valid element, so create it as text and move on + newText(std::string(begin,nameEnd)); + return nameEnd; + } + + if (closingTag) + { + newClosingElement(elementName); + } + else + { + newElement(elementName); + } + + return std::next(nameEnd); + } + +public: + static BBDocumentPtr create() + { + BBDocumentPtr doc = BBDocumentPtr(new BBDocument()); + return doc; + } + + void load(const std::string& bbcode) + { + load(bbcode.begin(), bbcode.end()); + } + + template + void load(Iterator begin, Iterator end) + { + std::string buffer; + auto bUnknownNodeType = true; + auto current = begin; + auto nodeType = BBNode::NodeType::TEXT; + + Iterator temp; + + while (current != end) + { + if (bUnknownNodeType) + { + if (*current == '[') + { + nodeType = BBNode::NodeType::ELEMENT; + bUnknownNodeType = false; + } + else + { + nodeType = BBNode::NodeType::TEXT; + bUnknownNodeType = false; + } + } + + if (!bUnknownNodeType) + { + switch (nodeType) + { + default: + throw std::runtime_error("Unknown node type in BBDocument::load()"); + break; + + case BBNode::NodeType::TEXT: + { + current = parseText(current, end); + bUnknownNodeType = true; + } + break; + + case BBNode::NodeType::ELEMENT: + { + temp = parseElement(current, end); + if (temp == current) + { + // nothing was parsed, treat as text + nodeType = BBNode::NodeType::TEXT; + bUnknownNodeType = false; + } + else + { + current = temp; + bUnknownNodeType = true; + } + } + break; + } + } + } + } + +private: + BBNodeStack _stack; + + BBText& newText(const std::string& text = std::string()); + BBElement& newElement(const std::string& name); + BBElement& newClosingElement(const std::string& name); + BBElement& newKeyValueElement(const std::string& name, const ParameterMap& pairs); +}; + +namespace +{ + +std::ostream& operator<<(std::ostream& os, const ParameterMap& params) +{ + bool first = true; + os << "{ "; + for (auto& p : params) + { + os << (first ? "" : ", ") << "{" << p.first << "=" << p.second << "}"; + if (first) + { + first = false; + } + } + return (os << " }"); +} + +} + + + +} // namespace diff --git a/libs/bbcpp/LICENSE b/libs/bbcpp/LICENSE new file mode 100644 index 0000000..a8fc851 --- /dev/null +++ b/libs/bbcpp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/bbcpp/README.md b/libs/bbcpp/README.md new file mode 100644 index 0000000..27adadd --- /dev/null +++ b/libs/bbcpp/README.md @@ -0,0 +1,105 @@ +# bbcpp + +[![Build Status][travis-img]][travis] +[![Build Status][appveyor-img]][appveyor] + + +## Introduction + +bbcpp is a C++ library for parsing BBCode, or Bulletin Board Code, a markup language used to format posts in many message boards. + +This library parses BBCode into a tree data structure that can be used to format output. However, this library does not include any output classes, though a basic HTML output class will likely be included. + +## Usage + +```cpp +auto doc = BBDocument::create(); +doc->load("This is [b]an example[/b] of some text."); +``` + +## Element Types + +#### Examples: + [B] - Bold text + [I] - Italicized text + [QUOTE] - Blockquote text (without specifiers as discussed below) + +## Value Elements + +#### Examples + [COLOR="green"] + [FONT="Arial Narrow"] + [SIZE="5"] + [EMAIL="billgates@microsoft.com"] + +## The `QUOTE` Element + +The **bbcpp** parser will accept three different formats for the `QUOTE` tag: + +1. `[QUOTE user=Username postid=1234]`: A key-value pair of values. In theory they are space delimited unless quoted. (Used with phpBB) +1. `[QUOTE="username, post: 1799684, member: 11733"]`: Another key-value pair format except the first argument is assumed to be the username. (Used with XenForo) +1. `[QUOTE=Username;1234]`: `Username` is the name of the user being quoted and `1234` is the postid. (Used with vBulletin) + +### `FONT` + +### `COLOR` + +### `LIST`/`[*]` + +### `IMG` + +### `URL` + +## BBNode Tree + +The following are examples of the node tree built during parsing. + +#### Example 1 + +> `This is [b]an example[/b] of some text` + +``` +#document +│-- @"This is" +│-- [b] +│ │-- @"an example" +│ │-- [/b] +│-- @"of some text" +``` + +#### Example 2 + +> `[QUOTE]This is [b]important[/b] news![/QUOTE]`

+> `Indeed it is!` + +``` +#document +│-- [QUOTE] +│ │-- @"This is " +│ │-- [b] +| | |-- @"important" +| | |-- [/b] +│ │-- @"news!" +│ │-- [/QUOTE] +│-- @"\n\nIndeed it is!" +``` + +#### Example 3 +> `[QUOTE user=Joe userid=1 postid=1234]This is another quote![/QUOTE]`

+> `I'm quoting you!` + +``` +#document +│-- [QUOTE] +| |-- {user=Joe} +| |-- {userid=1} +| |-- {postid=1234} +│ │-- @"This is another quote!" +│ │-- [/QUOTE] +│-- @"\n\nI'm quoting you!" +``` +[travis-img]: https://travis-ci.org/zethon/bbcpp.svg?branch=master +[travis]: https://travis-ci.org/zethon/bbcpp + +[appveyor-img]: https://ci.appveyor.com/api/projects/status/i7p4q2d0vvoyv8aq?svg=true +[appveyor]: https://ci.appveyor.com/project/zethon/bbcpp \ No newline at end of file diff --git a/libs/bbcpp/SCsub b/libs/bbcpp/SCsub new file mode 100644 index 0000000..0ea248c --- /dev/null +++ b/libs/bbcpp/SCsub @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +Import("env") + +core_sources = [] + +env.add_source_files(core_sources, "*.cpp") + +# Build it all as a library +lib = env.add_library("lib_bbcpp", core_sources) +env.Prepend(LIBS=[lib]) diff --git a/libs/bbcpp/bbcpputils.cpp b/libs/bbcpp/bbcpputils.cpp new file mode 100644 index 0000000..58b158b --- /dev/null +++ b/libs/bbcpp/bbcpputils.cpp @@ -0,0 +1,131 @@ +#include "BBDocument.h" +#include "bbcpputils.h" + +namespace bbcpp +{ + +std::string nodeTypeToString(BBNode::NodeType type) +{ + std::string retval = "Unknown"; + + switch (type) + { + case BBNode::NodeType::DOCUMENT: + retval = "Document"; + break; + + case BBNode::NodeType::ELEMENT: + retval = "Element"; + break; + + case BBNode::NodeType::TEXT: + retval = "Text"; + break; + + default: + break; + } + + return retval; +} + +// Helper Functions +std::string getIndentString(const unsigned int indent) +{ + std::stringstream output; + + for (unsigned int i = 0; i < indent; i++) + { + output << "| "; + } + + output << "|-- "; + return output.str(); +} + +void printChildren(const BBNode& parent, unsigned int indent) +{ + for (const auto node : parent.getChildren()) + { + switch (node->getNodeType()) + { + default: + break; + + case BBNode::NodeType::ELEMENT: + { + const auto element = node->downCast(); + std::cout + << getIndentString(indent) + << "[" + << (element->getElementType() == BBElement::CLOSING ? "/" : "") + << element->getNodeName() << "]" + << std::endl; + + if (element->getElementType() == BBElement::PARAMETER) + { + std::cout + << getIndentString(indent + 1) + << element->getParameters() + << std::endl; + } + } + break; + + case BBNode::NodeType::TEXT: + { + const auto textnode = node->downCast(); + std::cout << getIndentString(indent) + << "@\"" << textnode->getText() << "\"" + << std::endl; + } + break; + } + + printChildren(*node, indent+1); + } +} + +void printDocument(const BBDocument& doc) +{ + std::cout << "#document" << std::endl; + + auto indent = 0u; + printChildren(doc, indent); +} + +std::string getRawString(const BBNode& parent) +{ + std::string root = ""; + for (const auto node : parent.getChildren()) + { + switch (node->getNodeType()) + { + default: + break; + + case BBNode::NodeType::ELEMENT: + { + const auto element = node->downCast(); + + if (element->getElementType() == BBElement::PARAMETER) + { + } + } + break; + + case BBNode::NodeType::TEXT: + { + const auto textnode = node->downCast(); + root += textnode->getText(); + } + break; + } + + root += getRawString(*node); + } + +return root; +} + +} // namespace diff --git a/libs/bbcpp/bbcpputils.h b/libs/bbcpp/bbcpputils.h new file mode 100644 index 0000000..899d769 --- /dev/null +++ b/libs/bbcpp/bbcpputils.h @@ -0,0 +1,15 @@ +#pragma once + +#include "BBDocument.h" + +namespace bbcpp +{ + +// Helper Functions +std::string nodeTypeToString(BBNode::NodeType type); +std::string getIndentString(const unsigned int indent); +void printChildren(const BBNode& parent, unsigned int indent); +void printDocument(const BBDocument& doc); +std::string getRawString(const BBNode& node); + +} \ No newline at end of file