aboutsummaryrefslogtreecommitdiffstats
path: root/websocketpp/http/parser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'websocketpp/http/parser.hpp')
-rw-r--r--websocketpp/http/parser.hpp619
1 files changed, 619 insertions, 0 deletions
diff --git a/websocketpp/http/parser.hpp b/websocketpp/http/parser.hpp
new file mode 100644
index 00000000..90f49ebe
--- /dev/null
+++ b/websocketpp/http/parser.hpp
@@ -0,0 +1,619 @@
+/*
+ * Copyright (c) 2014, Peter Thorson. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the WebSocket++ Project nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HTTP_PARSER_HPP
+#define HTTP_PARSER_HPP
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+
+#include <websocketpp/utilities.hpp>
+#include <websocketpp/http/constants.hpp>
+
+namespace websocketpp {
+namespace http {
+namespace parser {
+
+namespace state {
+ enum value {
+ method,
+ resource,
+ version,
+ headers
+ };
+}
+
+namespace body_encoding {
+ enum value {
+ unknown,
+ plain,
+ chunked
+ };
+}
+
+typedef std::map<std::string, std::string, utility::ci_less > header_list;
+
+/// Read and return the next token in the stream
+/**
+ * Read until a non-token character is found and then return the token and
+ * iterator to the next character to read
+ *
+ * @param begin An iterator to the beginning of the sequence
+ * @param end An iterator to the end of the sequence
+ * @return A pair containing the token and an iterator to the next character in
+ * the stream
+ */
+template <typename InputIterator>
+std::pair<std::string,InputIterator> extract_token(InputIterator begin,
+ InputIterator end)
+{
+ InputIterator it = std::find_if(begin,end,&is_not_token_char);
+ return std::make_pair(std::string(begin,it),it);
+}
+
+/// Read and return the next quoted string in the stream
+/**
+ * Read a double quoted string starting at `begin`. The quotes themselves are
+ * stripped. The quoted value is returned along with an iterator to the next
+ * character to read
+ *
+ * @param begin An iterator to the beginning of the sequence
+ * @param end An iterator to the end of the sequence
+ * @return A pair containing the string read and an iterator to the next
+ * character in the stream
+ */
+template <typename InputIterator>
+std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
+ InputIterator end)
+{
+ std::string s;
+
+ if (end == begin) {
+ return std::make_pair(s,begin);
+ }
+
+ if (*begin != '"') {
+ return std::make_pair(s,begin);
+ }
+
+ InputIterator cursor = begin+1;
+ InputIterator marker = cursor;
+
+ cursor = std::find(cursor,end,'"');
+
+ while (cursor != end) {
+ // either this is the end or a quoted string
+ if (*(cursor-1) == '\\') {
+ s.append(marker,cursor-1);
+ s.append(1,'"');
+ ++cursor;
+ marker = cursor;
+ } else {
+ s.append(marker,cursor);
+ ++cursor;
+ return std::make_pair(s,cursor);
+ }
+
+ cursor = std::find(cursor,end,'"');
+ }
+
+ return std::make_pair("",begin);
+}
+
+/// Read and discard one unit of linear whitespace
+/**
+ * Read one unit of linear white space and return the iterator to the character
+ * afterwards. If `begin` is returned, no whitespace was extracted.
+ *
+ * @param begin An iterator to the beginning of the sequence
+ * @param end An iterator to the end of the sequence
+ * @return An iterator to the character after the linear whitespace read
+ */
+template <typename InputIterator>
+InputIterator extract_lws(InputIterator begin, InputIterator end) {
+ InputIterator it = begin;
+
+ // strip leading CRLF
+ if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
+ is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
+ {
+ it+=3;
+ }
+
+ it = std::find_if(it,end,&is_not_whitespace_char);
+ return it;
+}
+
+/// Read and discard linear whitespace
+/**
+ * Read linear white space until a non-lws character is read and return an
+ * iterator to that character. If `begin` is returned, no whitespace was
+ * extracted.
+ *
+ * @param begin An iterator to the beginning of the sequence
+ * @param end An iterator to the end of the sequence
+ * @return An iterator to the character after the linear whitespace read
+ */
+template <typename InputIterator>
+InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
+ InputIterator old_it;
+ InputIterator new_it = begin;
+
+ do {
+ // Pull value from previous iteration
+ old_it = new_it;
+
+ // look ahead another pass
+ new_it = extract_lws(old_it,end);
+ } while (new_it != end && old_it != new_it);
+
+ return new_it;
+}
+
+/// Extract HTTP attributes
+/**
+ * An http attributes list is a semicolon delimited list of key value pairs in
+ * the format: *( ";" attribute "=" value ) where attribute is a token and value
+ * is a token or quoted string.
+ *
+ * Attributes extracted are appended to the supplied attributes list
+ * `attributes`.
+ *
+ * @param [in] begin An iterator to the beginning of the sequence
+ * @param [in] end An iterator to the end of the sequence
+ * @param [out] attributes A reference to the attributes list to append
+ * attribute/value pairs extracted to
+ * @return An iterator to the character after the last atribute read
+ */
+template <typename InputIterator>
+InputIterator extract_attributes(InputIterator begin, InputIterator end,
+ attribute_list & attributes)
+{
+ InputIterator cursor;
+ bool first = true;
+
+ if (begin == end) {
+ return begin;
+ }
+
+ cursor = begin;
+ std::pair<std::string,InputIterator> ret;
+
+ while (cursor != end) {
+ std::string name;
+
+ cursor = http::parser::extract_all_lws(cursor,end);
+ if (cursor == end) {
+ break;
+ }
+
+ if (first) {
+ // ignore this check for the very first pass
+ first = false;
+ } else {
+ if (*cursor == ';') {
+ // advance past the ';'
+ ++cursor;
+ } else {
+ // non-semicolon in this position indicates end end of the
+ // attribute list, break and return.
+ break;
+ }
+ }
+
+ cursor = http::parser::extract_all_lws(cursor,end);
+ ret = http::parser::extract_token(cursor,end);
+
+ if (ret.first.empty()) {
+ // error: expected a token
+ return begin;
+ } else {
+ name = ret.first;
+ cursor = ret.second;
+ }
+
+ cursor = http::parser::extract_all_lws(cursor,end);
+ if (cursor == end || *cursor != '=') {
+ // if there is an equals sign, read the attribute value. Otherwise
+ // record a blank value and continue
+ attributes[name].clear();
+ continue;
+ }
+
+ // advance past the '='
+ ++cursor;
+
+ cursor = http::parser::extract_all_lws(cursor,end);
+ if (cursor == end) {
+ // error: expected a token or quoted string
+ return begin;
+ }
+
+ ret = http::parser::extract_quoted_string(cursor,end);
+ if (ret.second != cursor) {
+ attributes[name] = ret.first;
+ cursor = ret.second;
+ continue;
+ }
+
+ ret = http::parser::extract_token(cursor,end);
+ if (ret.first.empty()) {
+ // error : expected token or quoted string
+ return begin;
+ } else {
+ attributes[name] = ret.first;
+ cursor = ret.second;
+ }
+ }
+
+ return cursor;
+}
+
+/// Extract HTTP parameters
+/**
+ * An http parameters list is a comma delimited list of tokens followed by
+ * optional semicolon delimited attributes lists.
+ *
+ * Parameters extracted are appended to the supplied parameters list
+ * `parameters`.
+ *
+ * @param [in] begin An iterator to the beginning of the sequence
+ * @param [in] end An iterator to the end of the sequence
+ * @param [out] parameters A reference to the parameters list to append
+ * paramter values extracted to
+ * @return An iterator to the character after the last parameter read
+ */
+template <typename InputIterator>
+InputIterator extract_parameters(InputIterator begin, InputIterator end,
+ parameter_list &parameters)
+{
+ InputIterator cursor;
+
+ if (begin == end) {
+ // error: expected non-zero length range
+ return begin;
+ }
+
+ cursor = begin;
+ std::pair<std::string,InputIterator> ret;
+
+ /**
+ * LWS
+ * token
+ * LWS
+ * *(";" method-param)
+ * LWS
+ * ,=loop again
+ */
+ while (cursor != end) {
+ std::string parameter_name;
+ attribute_list attributes;
+
+ // extract any stray whitespace
+ cursor = http::parser::extract_all_lws(cursor,end);
+ if (cursor == end) {break;}
+
+ ret = http::parser::extract_token(cursor,end);
+
+ if (ret.first.empty()) {
+ // error: expected a token
+ return begin;
+ } else {
+ parameter_name = ret.first;
+ cursor = ret.second;
+ }
+
+ // Safe break point, insert parameter with blank attributes and exit
+ cursor = http::parser::extract_all_lws(cursor,end);
+ if (cursor == end) {
+ //parameters[parameter_name] = attributes;
+ parameters.push_back(std::make_pair(parameter_name,attributes));
+ break;
+ }
+
+ // If there is an attribute list, read it in
+ if (*cursor == ';') {
+ InputIterator acursor;
+
+ ++cursor;
+ acursor = http::parser::extract_attributes(cursor,end,attributes);
+
+ if (acursor == cursor) {
+ // attribute extraction ended in syntax error
+ return begin;
+ }
+
+ cursor = acursor;
+ }
+
+ // insert parameter into output list
+ //parameters[parameter_name] = attributes;
+ parameters.push_back(std::make_pair(parameter_name,attributes));
+
+ cursor = http::parser::extract_all_lws(cursor,end);
+ if (cursor == end) {break;}
+
+ // if next char is ',' then read another parameter, else stop
+ if (*cursor != ',') {
+ break;
+ }
+
+ // advance past comma
+ ++cursor;
+
+ if (cursor == end) {
+ // expected more bytes after a comma
+ return begin;
+ }
+ }
+
+ return cursor;
+}
+
+inline std::string strip_lws(std::string const & input) {
+ std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
+ if (begin == input.end()) {
+ return std::string();
+ }
+
+ std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
+ if (rbegin == input.rend()) {
+ return std::string();
+ }
+
+ return std::string(begin,rbegin.base());
+}
+
+/// Base HTTP parser
+/**
+ * Includes methods and data elements common to all types of HTTP messages such
+ * as headers, versions, bodies, etc.
+ */
+class parser {
+public:
+ parser()
+ : m_header_bytes(0)
+ , m_body_bytes_needed(0)
+ , m_body_bytes_max(max_body_size)
+ , m_body_encoding(body_encoding::unknown) {}
+
+ /// Get the HTTP version string
+ /**
+ * @return The version string for this parser
+ */
+ std::string const & get_version() const {
+ return m_version;
+ }
+
+ /// Set HTTP parser Version
+ /**
+ * Input should be in format: HTTP/x.y where x and y are positive integers.
+ * @todo Does this method need any validation?
+ *
+ * @param [in] version The value to set the HTTP version to.
+ */
+ void set_version(std::string const & version);
+
+ /// Get the value of an HTTP header
+ /**
+ * @todo Make this method case insensitive.
+ *
+ * @param [in] key The name/key of the header to get.
+ * @return The value associated with the given HTTP header key.
+ */
+ std::string const & get_header(std::string const & key) const;
+
+ /// Extract an HTTP parameter list from a parser header.
+ /**
+ * If the header requested doesn't exist or exists and is empty the
+ * parameter list is valid (but empty).
+ *
+ * @param [in] key The name/key of the HTTP header to use as input.
+ * @param [out] out The parameter list to store extracted parameters in.
+ * @return Whether or not the input was a valid parameter list.
+ */
+ bool get_header_as_plist(std::string const & key, parameter_list & out)
+ const;
+
+ /// Append a value to an existing HTTP header
+ /**
+ * This method will set the value of the HTTP header `key` with the
+ * indicated value. If a header with the name `key` already exists, `val`
+ * will be appended to the existing value.
+ *
+ * @todo Make this method case insensitive.
+ * @todo Should there be any restrictions on which keys are allowed?
+ * @todo Exception free varient
+ *
+ * @see replace_header
+ *
+ * @param [in] key The name/key of the header to append to.
+ * @param [in] val The value to append.
+ */
+ void append_header(std::string const & key, std::string const & val);
+
+ /// Set a value for an HTTP header, replacing an existing value
+ /**
+ * This method will set the value of the HTTP header `key` with the
+ * indicated value. If a header with the name `key` already exists, `val`
+ * will replace the existing value.
+ *
+ * @todo Make this method case insensitive.
+ * @todo Should there be any restrictions on which keys are allowed?
+ * @todo Exception free varient
+ *
+ * @see append_header
+ *
+ * @param [in] key The name/key of the header to append to.
+ * @param [in] val The value to append.
+ */
+ void replace_header(std::string const & key, std::string const & val);
+
+ /// Remove a header from the parser
+ /**
+ * Removes the header entirely from the parser. This is different than
+ * setting the value of the header to blank.
+ *
+ * @todo Make this method case insensitive.
+ *
+ * @param [in] key The name/key of the header to remove.
+ */
+ void remove_header(std::string const & key);
+
+ /// Get HTTP body
+ /**
+ * Gets the body of the HTTP object
+ *
+ * @return The body of the HTTP message.
+ */
+ std::string const & get_body() const {
+ return m_body;
+ }
+
+ /// Set body content
+ /**
+ * Set the body content of the HTTP response to the parameter string. Note
+ * set_body will also set the Content-Length HTTP header to the appropriate
+ * value. If you want the Content-Length header to be something else, do so
+ * via replace_header("Content-Length") after calling set_body()
+ *
+ * @param value String data to include as the body content.
+ */
+ void set_body(std::string const & value);
+
+ /// Get body size limit
+ /**
+ * Retrieves the maximum number of bytes to parse & buffer before canceling
+ * a request.
+ *
+ * @since 0.5.0
+ *
+ * @return The maximum length of a message body.
+ */
+ size_t get_max_body_size() const {
+ return m_body_bytes_max;
+ }
+
+ /// Set body size limit
+ /**
+ * Set the maximum number of bytes to parse and buffer before canceling a
+ * request.
+ *
+ * @since 0.5.0
+ *
+ * @param value The size to set the max body length to.
+ */
+ void set_max_body_size(size_t value) {
+ m_body_bytes_max = value;
+ }
+
+ /// Extract an HTTP parameter list from a string.
+ /**
+ * @param [in] in The input string.
+ * @param [out] out The parameter list to store extracted parameters in.
+ * @return Whether or not the input was a valid parameter list.
+ */
+ bool parse_parameter_list(std::string const & in, parameter_list & out)
+ const;
+protected:
+ /// Process a header line
+ /**
+ * @todo Update this method to be exception free.
+ *
+ * @param [in] begin An iterator to the beginning of the sequence.
+ * @param [in] end An iterator to the end of the sequence.
+ */
+ void process_header(std::string::iterator begin, std::string::iterator end);
+
+ /// Prepare the parser to begin parsing body data
+ /**
+ * Inspects headers to determine if the message has a body that needs to be
+ * read. If so, sets up the necessary state, otherwise returns false. If
+ * this method returns true and loading the message body is desired call
+ * `process_body` until it returns zero bytes or an error.
+ *
+ * Must not be called until after all headers have been processed.
+ *
+ * @since 0.5.0
+ *
+ * @return True if more bytes are needed to load the body, false otherwise.
+ */
+ bool prepare_body();
+
+ /// Process body data
+ /**
+ * Parses body data.
+ *
+ * @since 0.5.0
+ *
+ * @param [in] begin An iterator to the beginning of the sequence.
+ * @param [in] end An iterator to the end of the sequence.
+ * @return The number of bytes processed
+ */
+ size_t process_body(char const * buf, size_t len);
+
+ /// Check if the parser is done parsing the body
+ /**
+ * Behavior before a call to `prepare_body` is undefined.
+ *
+ * @since 0.5.0
+ *
+ * @return True if the message body has been completed loaded.
+ */
+ bool body_ready() const {
+ return (m_body_bytes_needed == 0);
+ }
+
+ /// Generate and return the HTTP headers as a string
+ /**
+ * Each headers will be followed by the \r\n sequence including the last one.
+ * A second \r\n sequence (blank header) is not appended by this method
+ *
+ * @return The HTTP headers as a string.
+ */
+ std::string raw_headers() const;
+
+ std::string m_version;
+ header_list m_headers;
+
+ size_t m_header_bytes;
+
+ std::string m_body;
+ size_t m_body_bytes_needed;
+ size_t m_body_bytes_max;
+ body_encoding::value m_body_encoding;
+};
+
+} // namespace parser
+} // namespace http
+} // namespace websocketpp
+
+#include <websocketpp/http/impl/parser.hpp>
+
+#endif // HTTP_PARSER_HPP