Added proper multi part form parsing support for the simple web server.

This commit is contained in:
Relintai 2022-08-22 22:16:32 +02:00
parent 5489ace788
commit b0da59764d
6 changed files with 709 additions and 174 deletions

View File

@ -13,6 +13,7 @@ sources = [
"http_parser.cpp", "http_parser.cpp",
"http_writer.cpp", "http_writer.cpp",
"http_parser/http_parser.c", "http_parser/http_parser.c",
"multipart_parser_c/multipart_parser.c",
] ]
if ARGUMENTS.get('custom_modules_shared', 'no') == 'yes': if ARGUMENTS.get('custom_modules_shared', 'no') == 'yes':

View File

@ -1,7 +1,9 @@
#include "http_parser.h" #include "http_parser.h"
#include "modules/web/http/web_server_request.h"
#include "./http_parser/http_parser.h" #include "./http_parser/http_parser.h"
#include "./multipart_parser_c/multipart_parser.h"
#include "modules/web/http/web_server_request.h"
#include "simple_web_server_request.h" #include "simple_web_server_request.h"
@ -45,8 +47,6 @@ int HTTPParser::read_from_buffer(const char *p_buffer, const int p_data_length)
HTTPParser::HTTPParser() { HTTPParser::HTTPParser() {
_is_ready = false; _is_ready = false;
_content_type = REQUEST_CONTENT_URLENCODED; _content_type = REQUEST_CONTENT_URLENCODED;
_in_multipart_boundary = false;
_in_boundary_header = false;
_multipart_form_is_file = false; _multipart_form_is_file = false;
settings = memnew(http_parser_settings); settings = memnew(http_parser_settings);
@ -65,8 +65,19 @@ HTTPParser::HTTPParser() {
//parser = malloc(sizeof(http_parser)); //parser = malloc(sizeof(http_parser));
parser = memnew(http_parser); parser = memnew(http_parser);
http_parser_init(parser, HTTP_REQUEST); http_parser_init(parser, HTTP_REQUEST);
parser->data = this; parser->data = this;
_multipart_parser_settings = memnew(multipart_parser_settings);
_multipart_parser_settings->on_header_field = _on_multipart_header_field_cb;
_multipart_parser_settings->on_header_value = _on_multipart_header_value_cb;
_multipart_parser_settings->on_part_data = _on_multipart_part_data_cb;
_multipart_parser_settings->on_part_data_begin = _on_multipart_part_data_begin_cb;
_multipart_parser_settings->on_headers_complete = _on_multipart_headers_complete_cb;
_multipart_parser_settings->on_part_data_end = _on_multipart_part_data_end_cb;
_multipart_parser_settings->on_body_end = _on_multipart_body_end_cb;
_multipart_parser = NULL;
} }
HTTPParser::~HTTPParser() { HTTPParser::~HTTPParser() {
@ -91,161 +102,51 @@ String HTTPParser::chr_len_to_str(const char *at, size_t length) {
return ret; return ret;
} }
void HTTPParser::HTTPParser::process_multipart_data() { int HTTPParser::HTTPParser::process_multipart_data(const char *at, size_t p_length) {
int iter = 0; ERR_FAIL_COND_V(!_multipart_parser, p_length);
//process one element per loop
while (true) {
//first boundary -> ignore, with everything before it
if (!_in_multipart_boundary) {
int boundary_index = _partial_data.find(_multipart_boundary);
if (boundary_index == -1) { return multipart_parser_execute(_multipart_parser, at, p_length);
return;
}
boundary_index += _multipart_boundary.size();
_partial_data = _partial_data.substr(boundary_index);
_in_multipart_boundary = true;
_in_boundary_header = true;
continue;
}
//find the first \n\n -> process boundary_header
//cut it out from the string.
if (_in_boundary_header) {
int header_end_index = _partial_data.find("\r\n\r\n");
if (header_end_index != -1) {
String header = _partial_data.substr_index(0, header_end_index);
_partial_data = _partial_data.substr(header_end_index + 4);
header = header.strip_edges();
//ERR_PRINT("HEADER");
//ERR_PRINT(header);
_process_multipart_header(header);
_in_boundary_header = false;
continue;
}
//Boundary header has not yet fully arrived, return
return;
}
//Multipart body
int boundary_index = _partial_data.find(_multipart_boundary);
if (boundary_index == -1) {
//TODO
//if file-> append everything to the HTTPTempFile, except the last boundary.size() - 1 characters from the string.
//should probably only happen after a while to save on memory use like maybe a meg or two (should be configurable)
// Should probably also be configurable whether it happens or not at all
return;
}
//ERR_PRINT("BODY");
String data = _partial_data.substr_index(0, boundary_index - 4); //to strip the 2 \r\n from before the boundary
//ERR_PRINT(data);
if (_multipart_form_is_file) {
if (data == "") {
_in_boundary_header = true;
continue;
}
CharString cs = data.ascii();
PoolByteArray file_data;
file_data.resize(cs.length());
PoolByteArray::Write w = file_data.write();
for (int i = 0; i < cs.length(); i++) {
w[i] = cs[i];
}
w.release();
_request->add_file(_multipart_form_name, _multipart_form_filename, file_data);
} else {
_request->add_parameter(_multipart_form_name, data);
}
boundary_index += _multipart_boundary.size();
_partial_data = _partial_data.substr(boundary_index);
if (_partial_data.begins_with("--")) {
//done
return;
}
_in_boundary_header = true;
//Safety for now
++iter;
ERR_FAIL_COND(iter == 10000);
}
} }
void HTTPParser::_process_multipart_header(const String &header) { void HTTPParser::_process_multipart_header_value(const String &val) {
_multipart_form_name = ""; if (_queued_multipart_header_field == "Content-Disposition") {
_multipart_form_filename = ""; int c = val.get_slice_count(";");
_multipart_form_content_type = "";
_multipart_form_is_file = false;
int nlc = header.get_slice_count("\r\n"); for (int j = 0; j < c; ++j) {
String vs = val.get_slicec(';', j).strip_edges();
for (int i = 0; i < nlc; ++i) { if (vs.get_slice_count("=") != 2) {
String l = header.get_slice("\r\n", i); continue;
int sc = l.get_slice_count(":");
if (sc != 2) {
continue;
}
String key = l.get_slicec(':', 0);
String val = l.get_slicec(':', 1);
if (key == "Content-Disposition") {
int c = val.get_slice_count(";");
for (int j = 0; j < c; ++j) {
String vs = val.get_slicec(';', j).strip_edges();
if (vs.get_slice_count("=") != 2) {
continue;
}
String kk = vs.get_slicec('=', 0);
if (kk == "name") {
_multipart_form_name = vs.get_slicec('=', 1);
if (_multipart_form_name.length() >= 2 && _multipart_form_name.begins_with("\"") && _multipart_form_name.ends_with("\"")) {
_multipart_form_name.remove(0);
_multipart_form_name.remove(_multipart_form_name.size() - 1);
}
} else if (kk == "filename") {
_multipart_form_filename = vs.get_slicec('=', 1);
_multipart_form_is_file = true;
if (_multipart_form_name.length() >= 2 && _multipart_form_name.begins_with("\"") && _multipart_form_name.ends_with("\"")) {
_multipart_form_name.remove(0);
_multipart_form_name.remove(_multipart_form_name.size() - 1);
}
}
} }
} else if (key == "Content-Type") { String kk = vs.get_slicec('=', 0);
_multipart_form_content_type = val;
} else { if (kk == "name") {
//Shouldn't happen, should probably close connection _multipart_form_name = vs.get_slicec('=', 1);
if (_multipart_form_name.length() >= 2 && _multipart_form_name.begins_with("\"") && _multipart_form_name.ends_with("\"")) {
_multipart_form_name.remove(0);
_multipart_form_name.remove(_multipart_form_name.size() - 1);
}
} else if (kk == "filename") {
_multipart_form_filename = vs.get_slicec('=', 1);
_multipart_form_filename = _multipart_form_filename.replace("\"", "");
_multipart_form_is_file = true;
if (_multipart_form_name.length() >= 2 && _multipart_form_name.begins_with("\"") && _multipart_form_name.ends_with("\"")) {
_multipart_form_name.remove(0);
_multipart_form_name.remove(_multipart_form_name.size() - 1);
}
}
} }
} else if (_queued_multipart_header_field == "Content-Type") {
_multipart_form_content_type = val;
} else {
//Shouldn't happen, should probably close connection
} }
_queued_multipart_header_field = "";
} }
void HTTPParser::process_urlenc_data() { void HTTPParser::process_urlenc_data() {
@ -282,6 +183,10 @@ void HTTPParser::process_urlenc_data() {
} }
} }
bool HTTPParser::is_boundary_at(const char *at, size_t length) {
return false;
}
#define MESSAGE_DEBUG 0 #define MESSAGE_DEBUG 0
int HTTPParser::on_message_begin() { int HTTPParser::on_message_begin() {
@ -291,10 +196,6 @@ int HTTPParser::on_message_begin() {
_in_header = true; _in_header = true;
_content_type = REQUEST_CONTENT_URLENCODED; _content_type = REQUEST_CONTENT_URLENCODED;
_multipart_boundary = "";
_in_multipart_boundary = false;
_in_multipart_boundary = false;
_in_boundary_header = false;
_multipart_form_is_file = false; _multipart_form_is_file = false;
_request.instance(); _request.instance();
@ -401,15 +302,15 @@ int HTTPParser::on_header_value(const char *at, size_t length) {
bs += 9; //skip ahead to the end of "boundary=" bs += 9; //skip ahead to the end of "boundary="
_multipart_boundary = s.substr(bs); _multipart_boundary = "--" + s.substr(bs).strip_edges();
_multipart_boundary = _multipart_boundary.strip_edges(); //_multipart_boundary = _multipart_boundary.strip_edges();
//TODO can be inside quoted //TODO can be inside quoted
//Append -- if it doesn't have it already //Append -- if it doesn't have it already
//It shouldn't be longer that 70 chars //It shouldn't be longer that 70 chars
//The CRLF preceeding could also be appended for simpler logic //The CRLF preceeding could also be appended for simpler logic
if (_multipart_boundary == "") { if (_multipart_boundary.empty()) {
//Error! TODO set an error variable and close the connection //Error! TODO set an error variable and close the connection
} }
@ -447,6 +348,16 @@ int HTTPParser::on_headers_complete() {
//Check content length, and send error if bigger than server limit (add) //Check content length, and send error if bigger than server limit (add)
if (_content_type == REQUEST_CONTENT_MULTIPART_FORM_DATA) {
if (_multipart_parser) {
multipart_parser_free(_multipart_parser);
_multipart_parser = NULL;
}
_multipart_parser = multipart_parser_init(_multipart_boundary.ascii().get_data(), _multipart_parser_settings);
multipart_parser_set_data(_multipart_parser, this);
}
_in_header = false; _in_header = false;
_partial_data = ""; _partial_data = "";
@ -455,6 +366,26 @@ int HTTPParser::on_headers_complete() {
int HTTPParser::on_body(const char *at, size_t length) { int HTTPParser::on_body(const char *at, size_t length) {
ERR_FAIL_COND_V(!_request.is_valid(), 0); ERR_FAIL_COND_V(!_request.is_valid(), 0);
if (_content_type == REQUEST_CONTENT_MULTIPART_FORM_DATA) {
int wofs = _queued_multipart_form_data.size();
_queued_multipart_form_data.resize(_queued_multipart_form_data.size() + length);
char *w = _queued_multipart_form_data.ptrw();
for (int i = 0; i < length; ++i) {
w[wofs++] = at[i];
}
int processed = process_multipart_data(_queued_multipart_form_data.ptr(), _queued_multipart_form_data.size());
int size = _queued_multipart_form_data.size();
wofs = 0;
for (int i = processed; i < size; ++i) {
w[wofs++] = w[i];
}
_queued_multipart_form_data.resize(_queued_multipart_form_data.size() - processed);
return 0;
}
String s = chr_len_to_str(at, length); String s = chr_len_to_str(at, length);
#if MESSAGE_DEBUG #if MESSAGE_DEBUG
@ -463,10 +394,6 @@ int HTTPParser::on_body(const char *at, size_t length) {
_partial_data += s; _partial_data += s;
if (_content_type == REQUEST_CONTENT_MULTIPART_FORM_DATA) {
process_multipart_data();
}
return 0; return 0;
} }
@ -488,6 +415,19 @@ int HTTPParser::on_message_complete() {
_requests.push_back(_request); _requests.push_back(_request);
_request.unref(); _request.unref();
if (_multipart_parser) {
multipart_parser_free(_multipart_parser);
_multipart_parser = NULL;
}
_multipart_boundary = "";
_queued_multipart_header_field = "";
_multipart_form_name = "";
_multipart_form_filename = "";
_multipart_form_content_type = "";
return 0; return 0;
} }
int HTTPParser::on_chunk_header() { int HTTPParser::on_chunk_header() {
@ -549,3 +489,119 @@ int HTTPParser::_on_chunk_complete_cb(http_parser *parser) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(parser->data); HTTPParser *p = reinterpret_cast<HTTPParser *>(parser->data);
return p->on_chunk_complete(); return p->on_chunk_complete();
} }
#define MULTIPART_MESSAGE_DEBUG 0
int HTTPParser::on_multipart_header_field_cb(const char *at, size_t length) {
String s = chr_len_to_str(at, length);
_queued_multipart_header_field = s;
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_header_field_cb " + s);
#endif
return 0;
}
int HTTPParser::on_multipart_header_value_cb(const char *at, size_t length) {
String s = chr_len_to_str(at, length);
_process_multipart_header_value(s);
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_header_value_cb " + s);
#endif
return 0;
}
int HTTPParser::on_multipart_part_data_cb(const char *at, size_t length) {
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_part_data_cb");
#endif
int l = static_cast<int>(length);
int mfdofs = _multipart_form_data.size();
_multipart_form_data.resize(mfdofs + length);
char *w = _multipart_form_data.ptrw();
for (int i = 0; i < l; ++i) {
w[mfdofs++] = at[i];
}
return 0;
}
int HTTPParser::on_multipart_part_data_begin_cb() {
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_part_data_begin_cb");
#endif
return 0;
}
int HTTPParser::on_multipart_headers_complete_cb() {
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_headers_complete_cb");
#endif
return 0;
}
int HTTPParser::on_multipart_part_data_end_cb() {
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_part_data_end_cb");
#endif
if (_multipart_form_is_file) {
PoolByteArray file_data;
int len = _multipart_form_data.size();
file_data.resize(len);
PoolByteArray::Write w = file_data.write();
const char *r = _multipart_form_data.ptr();
for (int i = 0; i < len; i++) {
w[i] = r[i];
}
w.release();
_request->add_file(_multipart_form_name, _multipart_form_filename, file_data);
} else {
_request->add_parameter(_multipart_form_name, String(_multipart_form_data.ptr()));
}
_multipart_form_is_file = false;
_multipart_form_data.clear();
return 0;
}
int HTTPParser::on_multipart_body_end_cb() {
#if MULTIPART_MESSAGE_DEBUG
ERR_PRINT("on_multipart_body_end_cb");
#endif
return 0;
}
int HTTPParser::_on_multipart_header_field_cb(multipart_parser *parser, const char *at, size_t length) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_header_field_cb(at, length);
}
int HTTPParser::_on_multipart_header_value_cb(multipart_parser *parser, const char *at, size_t length) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_header_value_cb(at, length);
}
int HTTPParser::_on_multipart_part_data_cb(multipart_parser *parser, const char *at, size_t length) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_part_data_cb(at, length);
}
int HTTPParser::_on_multipart_part_data_begin_cb(multipart_parser *parser) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_part_data_begin_cb();
}
int HTTPParser::_on_multipart_headers_complete_cb(multipart_parser *parser) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_headers_complete_cb();
}
int HTTPParser::_on_multipart_part_data_end_cb(multipart_parser *parser) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_part_data_end_cb();
}
int HTTPParser::_on_multipart_body_end_cb(multipart_parser *parser) {
HTTPParser *p = reinterpret_cast<HTTPParser *>(multipart_parser_get_data(parser));
return p->on_multipart_body_end_cb();
}

View File

@ -2,12 +2,15 @@
#define HTTP_PARSER_H #define HTTP_PARSER_H
#include "core/string/ustring.h" #include "core/string/ustring.h"
#include "core/containers/vector.h"
#include "core/object/reference.h" #include "core/object/reference.h"
class SimpleWebServerRequest; class SimpleWebServerRequest;
struct http_parser; struct http_parser;
struct http_parser_settings; struct http_parser_settings;
struct multipart_parser;
struct multipart_parser_settings;
class HTTPParser : public Reference { class HTTPParser : public Reference {
GDCLASS(HTTPParser, Reference); GDCLASS(HTTPParser, Reference);
@ -44,12 +47,28 @@ protected:
bool _is_ready; bool _is_ready;
HTTPRequestContentType _content_type;
bool _in_header;
String _queued_header_field;
String _multipart_boundary;
String _queued_multipart_header_field;
Vector<char> _queued_multipart_form_data;
String _multipart_form_name;
String _multipart_form_filename;
String _multipart_form_content_type;
bool _multipart_form_is_file;
Vector<char> _multipart_form_data;
private: private:
String chr_len_to_str(const char *at, size_t length); String chr_len_to_str(const char *at, size_t length);
void process_multipart_data(); int process_multipart_data(const char *at, size_t length);
void _process_multipart_header(const String &header); void _process_multipart_header_value(const String &val);
void process_urlenc_data(); void process_urlenc_data();
bool is_boundary_at(const char *at, size_t length);
int on_message_begin(); int on_message_begin();
int on_url(const char *at, size_t length); int on_url(const char *at, size_t length);
@ -73,20 +92,27 @@ private:
static int _on_chunk_header_cb(http_parser *parser); static int _on_chunk_header_cb(http_parser *parser);
static int _on_chunk_complete_cb(http_parser *parser); static int _on_chunk_complete_cb(http_parser *parser);
int on_multipart_header_field_cb(const char *at, size_t length);
int on_multipart_header_value_cb(const char *at, size_t length);
int on_multipart_part_data_cb(const char *at, size_t length);
int on_multipart_part_data_begin_cb();
int on_multipart_headers_complete_cb();
int on_multipart_part_data_end_cb();
int on_multipart_body_end_cb();
static int _on_multipart_header_field_cb(multipart_parser *parser, const char *at, size_t length);
static int _on_multipart_header_value_cb(multipart_parser *parser, const char *at, size_t length);
static int _on_multipart_part_data_cb(multipart_parser *parser, const char *at, size_t length);
static int _on_multipart_part_data_begin_cb(multipart_parser *parser);
static int _on_multipart_headers_complete_cb(multipart_parser *parser);
static int _on_multipart_part_data_end_cb(multipart_parser *parser);
static int _on_multipart_body_end_cb(multipart_parser *parser);
http_parser *parser; http_parser *parser;
http_parser_settings *settings; http_parser_settings *settings;
HTTPRequestContentType _content_type; multipart_parser *_multipart_parser;
String _multipart_boundary; multipart_parser_settings *_multipart_parser_settings;
bool _in_header;
String _queued_header_field;
bool _in_multipart_boundary;
bool _in_boundary_header;
String _multipart_form_name;
String _multipart_form_filename;
String _multipart_form_content_type;
bool _multipart_form_is_file;
}; };
#endif #endif

View File

@ -0,0 +1,98 @@
## Multipart form data parser
### Features
* No dependencies
* Works with chunks of a data - no need to buffer the whole request
* Almost no internal buffering. Buffer size doesn't exceed the size of the boundary (~60-70 bytes)
Tested as part of [Cosmonaut](https://github.com/iafonov/cosmonaut) HTTP server.
Implementation based on [node-formidable](https://github.com/felixge/node-formidable) by [Felix Geisendörfer](https://github.com/felixge).
Inspired by [http-parser](https://github.com/joyent/http-parser) by [Ryan Dahl](https://github.com/ry).
### Usage (C)
This parser library works with several callbacks, which the user may set up at application initialization time.
```c
multipart_parser_settings callbacks;
memset(&callbacks, 0, sizeof(multipart_parser_settings));
callbacks.on_header_field = read_header_name;
callbacks.on_header_value = read_header_value;
```
These functions must match the signatures defined in the multipart-parser header file. For this simple example, we'll just use two of the available callbacks to print all headers the library finds in multipart messages.
Returning a value other than 0 from the callbacks will abort message processing.
```c
int read_header_name(multipart_parser* p, const char *at, size_t length)
{
printf("%.*s: ", length, at);
return 0;
}
int read_header_value(multipart_parser* p, const char *at, size_t length)
{
printf("%.*s\n", length, at);
return 0;
}
```
When a message arrives, callers must parse the multipart boundary from the **Content-Type** header (see the [RFC](http://tools.ietf.org/html/rfc2387#section-5.1) for more information and examples), and then execute the parser.
```c
multipart_parser* parser = multipart_parser_init(boundary, &callbacks);
multipart_parser_execute(parser, body, length);
multipart_parser_free(parser);
```
### Usage (C++)
In C++, when the callbacks are static member functions it may be helpful to pass the instantiated multipart consumer along as context. The following (abbreviated) class called `MultipartConsumer` shows how to pass `this` to callback functions in order to access non-static member data.
```cpp
class MultipartConsumer
{
public:
MultipartConsumer(const std::string& boundary)
{
memset(&m_callbacks, 0, sizeof(multipart_parser_settings));
m_callbacks.on_header_field = ReadHeaderName;
m_callbacks.on_header_value = ReadHeaderValue;
m_parser = multipart_parser_init(boundary.c_str(), &m_callbacks);
multipart_parser_set_data(m_parser, this);
}
~MultipartConsumer()
{
multipart_parser_free(m_parser);
}
int CountHeaders(const std::string& body)
{
multipart_parser_execute(m_parser, body.c_str(), body.size());
return m_headers;
}
private:
static int ReadHeaderName(multipart_parser* p, const char *at, size_t length)
{
MultipartConsumer* me = (MultipartConsumer*)multipart_parser_get_data(p);
me->m_headers++;
}
multipart_parser* m_parser;
multipart_parser_settings m_callbacks;
int m_headers;
};
```
### Contributors
* [Daniel T. Wagner](http://www.danieltwagner.de/)
* [James McLaughlin](http://udp.github.com/)
* [Jay Miller](http://www.cryptofreak.org)
© 2012 [Igor Afonov](http://iafonov.github.com)

View File

@ -0,0 +1,306 @@
/* Based on node-formidable by Felix Geisendörfer
* Igor Afonov - afonov@gmail.com - 2012
* MIT License - http://www.opensource.org/licenses/mit-license.php
*/
#include "multipart_parser.h"
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
static void multipart_log(const char * format, ...)
{
#ifdef DEBUG_MULTIPART
va_list args;
va_start(args, format);
fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
vfprintf(stderr, format, args);
fprintf(stderr, "\n");
#endif
}
#define NOTIFY_CB(FOR) \
do { \
if (p->settings->on_##FOR) { \
if (p->settings->on_##FOR(p) != 0) { \
return i; \
} \
} \
} while (0)
#define EMIT_DATA_CB(FOR, ptr, len) \
do { \
if (p->settings->on_##FOR) { \
if (p->settings->on_##FOR(p, ptr, len) != 0) { \
return i; \
} \
} \
} while (0)
#define LF 10
#define CR 13
struct multipart_parser {
void * data;
size_t index;
size_t boundary_length;
unsigned char state;
const multipart_parser_settings* settings;
char* lookbehind;
char multipart_boundary[1];
};
enum state {
s_uninitialized = 1,
s_start,
s_start_boundary,
s_header_field_start,
s_header_field,
s_headers_almost_done,
s_header_value_start,
s_header_value,
s_header_value_almost_done,
s_part_data_start,
s_part_data,
s_part_data_almost_boundary,
s_part_data_boundary,
s_part_data_almost_end,
s_part_data_end,
s_part_data_final_hyphen,
s_end
};
multipart_parser* multipart_parser_init
(const char *boundary, const multipart_parser_settings* settings) {
multipart_parser* p = malloc(sizeof(multipart_parser) +
strlen(boundary) +
strlen(boundary) + 9);
strcpy(p->multipart_boundary, boundary);
p->boundary_length = strlen(boundary);
p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
p->index = 0;
p->state = s_start;
p->settings = settings;
return p;
}
void multipart_parser_free(multipart_parser* p) {
free(p);
}
void multipart_parser_set_data(multipart_parser *p, void *data) {
p->data = data;
}
void *multipart_parser_get_data(multipart_parser *p) {
return p->data;
}
size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c, cl;
int is_last = 0;
while(i < len) {
c = buf[i];
is_last = (i == (len - 1));
switch (p->state) {
case s_start:
multipart_log("s_start");
p->index = 0;
p->state = s_start_boundary;
/* fallthrough */
case s_start_boundary:
multipart_log("s_start_boundary");
if (p->index == p->boundary_length) {
if (c != CR) {
return i;
}
p->index++;
break;
} else if (p->index == (p->boundary_length + 1)) {
if (c != LF) {
return i;
}
p->index = 0;
NOTIFY_CB(part_data_begin);
p->state = s_header_field_start;
break;
}
if (c != p->multipart_boundary[p->index]) {
return i;
}
p->index++;
break;
case s_header_field_start:
multipart_log("s_header_field_start");
mark = i;
p->state = s_header_field;
/* fallthrough */
case s_header_field:
multipart_log("s_header_field");
if (c == CR) {
p->state = s_headers_almost_done;
break;
}
if (c == ':') {
EMIT_DATA_CB(header_field, buf + mark, i - mark);
p->state = s_header_value_start;
break;
}
cl = tolower(c);
if ((c != '-') && (cl < 'a' || cl > 'z')) {
multipart_log("invalid character in header name");
return i;
}
if (is_last)
EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
break;
case s_headers_almost_done:
multipart_log("s_headers_almost_done");
if (c != LF) {
return i;
}
p->state = s_part_data_start;
break;
case s_header_value_start:
multipart_log("s_header_value_start");
if (c == ' ') {
break;
}
mark = i;
p->state = s_header_value;
/* fallthrough */
case s_header_value:
multipart_log("s_header_value");
if (c == CR) {
EMIT_DATA_CB(header_value, buf + mark, i - mark);
p->state = s_header_value_almost_done;
break;
}
if (is_last)
EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
break;
case s_header_value_almost_done:
multipart_log("s_header_value_almost_done");
if (c != LF) {
return i;
}
p->state = s_header_field_start;
break;
case s_part_data_start:
multipart_log("s_part_data_start");
NOTIFY_CB(headers_complete);
mark = i;
p->state = s_part_data;
/* fallthrough */
case s_part_data:
multipart_log("s_part_data");
if (c == CR) {
EMIT_DATA_CB(part_data, buf + mark, i - mark);
mark = i;
p->state = s_part_data_almost_boundary;
p->lookbehind[0] = CR;
break;
}
if (is_last)
EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
break;
case s_part_data_almost_boundary:
multipart_log("s_part_data_almost_boundary");
if (c == LF) {
p->state = s_part_data_boundary;
p->lookbehind[1] = LF;
p->index = 0;
break;
}
EMIT_DATA_CB(part_data, p->lookbehind, 1);
p->state = s_part_data;
mark = i --;
break;
case s_part_data_boundary:
multipart_log("s_part_data_boundary");
if (p->multipart_boundary[p->index] != c) {
EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
p->state = s_part_data;
mark = i --;
break;
}
p->lookbehind[2 + p->index] = c;
if ((++ p->index) == p->boundary_length) {
NOTIFY_CB(part_data_end);
p->state = s_part_data_almost_end;
}
break;
case s_part_data_almost_end:
multipart_log("s_part_data_almost_end");
if (c == '-') {
p->state = s_part_data_final_hyphen;
break;
}
if (c == CR) {
p->state = s_part_data_end;
break;
}
return i;
case s_part_data_final_hyphen:
multipart_log("s_part_data_final_hyphen");
if (c == '-') {
NOTIFY_CB(body_end);
p->state = s_end;
break;
}
return i;
case s_part_data_end:
multipart_log("s_part_data_end");
if (c == LF) {
p->state = s_header_field_start;
NOTIFY_CB(part_data_begin);
break;
}
return i;
case s_end:
multipart_log("s_end: %02X", (int) c);
break;
default:
multipart_log("Multipart parser unrecoverable error");
return 0;
}
++ i;
}
return len;
}

View File

@ -0,0 +1,48 @@
/* Based on node-formidable by Felix Geisendörfer
* Igor Afonov - afonov@gmail.com - 2012
* MIT License - http://www.opensource.org/licenses/mit-license.php
*/
#ifndef _multipart_parser_h
#define _multipart_parser_h
#ifdef __cplusplus
extern "C"
{
#endif
#include <stdlib.h>
#include <ctype.h>
typedef struct multipart_parser multipart_parser;
typedef struct multipart_parser_settings multipart_parser_settings;
typedef struct multipart_parser_state multipart_parser_state;
typedef int (*multipart_data_cb) (multipart_parser*, const char *at, size_t length);
typedef int (*multipart_notify_cb) (multipart_parser*);
struct multipart_parser_settings {
multipart_data_cb on_header_field;
multipart_data_cb on_header_value;
multipart_data_cb on_part_data;
multipart_notify_cb on_part_data_begin;
multipart_notify_cb on_headers_complete;
multipart_notify_cb on_part_data_end;
multipart_notify_cb on_body_end;
};
multipart_parser* multipart_parser_init
(const char *boundary, const multipart_parser_settings* settings);
void multipart_parser_free(multipart_parser* p);
size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len);
void multipart_parser_set_data(multipart_parser* p, void* data);
void * multipart_parser_get_data(multipart_parser* p);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif