diff options
Diffstat (limited to 'websocketpp/http/parser.hpp')
-rw-r--r-- | websocketpp/http/parser.hpp | 619 |
1 files changed, 619 insertions, 0 deletions
diff --git a/websocketpp/http/parser.hpp b/websocketpp/http/parser.hpp new file mode 100644 index 00000000..90f49ebe --- /dev/null +++ b/websocketpp/http/parser.hpp @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2014, Peter Thorson. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the WebSocket++ Project nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef HTTP_PARSER_HPP +#define HTTP_PARSER_HPP + +#include <algorithm> +#include <map> +#include <string> +#include <utility> + +#include <websocketpp/utilities.hpp> +#include <websocketpp/http/constants.hpp> + +namespace websocketpp { +namespace http { +namespace parser { + +namespace state { + enum value { + method, + resource, + version, + headers + }; +} + +namespace body_encoding { + enum value { + unknown, + plain, + chunked + }; +} + +typedef std::map<std::string, std::string, utility::ci_less > header_list; + +/// Read and return the next token in the stream +/** + * Read until a non-token character is found and then return the token and + * iterator to the next character to read + * + * @param begin An iterator to the beginning of the sequence + * @param end An iterator to the end of the sequence + * @return A pair containing the token and an iterator to the next character in + * the stream + */ +template <typename InputIterator> +std::pair<std::string,InputIterator> extract_token(InputIterator begin, + InputIterator end) +{ + InputIterator it = std::find_if(begin,end,&is_not_token_char); + return std::make_pair(std::string(begin,it),it); +} + +/// Read and return the next quoted string in the stream +/** + * Read a double quoted string starting at `begin`. The quotes themselves are + * stripped. The quoted value is returned along with an iterator to the next + * character to read + * + * @param begin An iterator to the beginning of the sequence + * @param end An iterator to the end of the sequence + * @return A pair containing the string read and an iterator to the next + * character in the stream + */ +template <typename InputIterator> +std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin, + InputIterator end) +{ + std::string s; + + if (end == begin) { + return std::make_pair(s,begin); + } + + if (*begin != '"') { + return std::make_pair(s,begin); + } + + InputIterator cursor = begin+1; + InputIterator marker = cursor; + + cursor = std::find(cursor,end,'"'); + + while (cursor != end) { + // either this is the end or a quoted string + if (*(cursor-1) == '\\') { + s.append(marker,cursor-1); + s.append(1,'"'); + ++cursor; + marker = cursor; + } else { + s.append(marker,cursor); + ++cursor; + return std::make_pair(s,cursor); + } + + cursor = std::find(cursor,end,'"'); + } + + return std::make_pair("",begin); +} + +/// Read and discard one unit of linear whitespace +/** + * Read one unit of linear white space and return the iterator to the character + * afterwards. If `begin` is returned, no whitespace was extracted. + * + * @param begin An iterator to the beginning of the sequence + * @param end An iterator to the end of the sequence + * @return An iterator to the character after the linear whitespace read + */ +template <typename InputIterator> +InputIterator extract_lws(InputIterator begin, InputIterator end) { + InputIterator it = begin; + + // strip leading CRLF + if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' && + is_whitespace_char(static_cast<unsigned char>(*(begin+2)))) + { + it+=3; + } + + it = std::find_if(it,end,&is_not_whitespace_char); + return it; +} + +/// Read and discard linear whitespace +/** + * Read linear white space until a non-lws character is read and return an + * iterator to that character. If `begin` is returned, no whitespace was + * extracted. + * + * @param begin An iterator to the beginning of the sequence + * @param end An iterator to the end of the sequence + * @return An iterator to the character after the linear whitespace read + */ +template <typename InputIterator> +InputIterator extract_all_lws(InputIterator begin, InputIterator end) { + InputIterator old_it; + InputIterator new_it = begin; + + do { + // Pull value from previous iteration + old_it = new_it; + + // look ahead another pass + new_it = extract_lws(old_it,end); + } while (new_it != end && old_it != new_it); + + return new_it; +} + +/// Extract HTTP attributes +/** + * An http attributes list is a semicolon delimited list of key value pairs in + * the format: *( ";" attribute "=" value ) where attribute is a token and value + * is a token or quoted string. + * + * Attributes extracted are appended to the supplied attributes list + * `attributes`. + * + * @param [in] begin An iterator to the beginning of the sequence + * @param [in] end An iterator to the end of the sequence + * @param [out] attributes A reference to the attributes list to append + * attribute/value pairs extracted to + * @return An iterator to the character after the last atribute read + */ +template <typename InputIterator> +InputIterator extract_attributes(InputIterator begin, InputIterator end, + attribute_list & attributes) +{ + InputIterator cursor; + bool first = true; + + if (begin == end) { + return begin; + } + + cursor = begin; + std::pair<std::string,InputIterator> ret; + + while (cursor != end) { + std::string name; + + cursor = http::parser::extract_all_lws(cursor,end); + if (cursor == end) { + break; + } + + if (first) { + // ignore this check for the very first pass + first = false; + } else { + if (*cursor == ';') { + // advance past the ';' + ++cursor; + } else { + // non-semicolon in this position indicates end end of the + // attribute list, break and return. + break; + } + } + + cursor = http::parser::extract_all_lws(cursor,end); + ret = http::parser::extract_token(cursor,end); + + if (ret.first.empty()) { + // error: expected a token + return begin; + } else { + name = ret.first; + cursor = ret.second; + } + + cursor = http::parser::extract_all_lws(cursor,end); + if (cursor == end || *cursor != '=') { + // if there is an equals sign, read the attribute value. Otherwise + // record a blank value and continue + attributes[name].clear(); + continue; + } + + // advance past the '=' + ++cursor; + + cursor = http::parser::extract_all_lws(cursor,end); + if (cursor == end) { + // error: expected a token or quoted string + return begin; + } + + ret = http::parser::extract_quoted_string(cursor,end); + if (ret.second != cursor) { + attributes[name] = ret.first; + cursor = ret.second; + continue; + } + + ret = http::parser::extract_token(cursor,end); + if (ret.first.empty()) { + // error : expected token or quoted string + return begin; + } else { + attributes[name] = ret.first; + cursor = ret.second; + } + } + + return cursor; +} + +/// Extract HTTP parameters +/** + * An http parameters list is a comma delimited list of tokens followed by + * optional semicolon delimited attributes lists. + * + * Parameters extracted are appended to the supplied parameters list + * `parameters`. + * + * @param [in] begin An iterator to the beginning of the sequence + * @param [in] end An iterator to the end of the sequence + * @param [out] parameters A reference to the parameters list to append + * paramter values extracted to + * @return An iterator to the character after the last parameter read + */ +template <typename InputIterator> +InputIterator extract_parameters(InputIterator begin, InputIterator end, + parameter_list ¶meters) +{ + InputIterator cursor; + + if (begin == end) { + // error: expected non-zero length range + return begin; + } + + cursor = begin; + std::pair<std::string,InputIterator> ret; + + /** + * LWS + * token + * LWS + * *(";" method-param) + * LWS + * ,=loop again + */ + while (cursor != end) { + std::string parameter_name; + attribute_list attributes; + + // extract any stray whitespace + cursor = http::parser::extract_all_lws(cursor,end); + if (cursor == end) {break;} + + ret = http::parser::extract_token(cursor,end); + + if (ret.first.empty()) { + // error: expected a token + return begin; + } else { + parameter_name = ret.first; + cursor = ret.second; + } + + // Safe break point, insert parameter with blank attributes and exit + cursor = http::parser::extract_all_lws(cursor,end); + if (cursor == end) { + //parameters[parameter_name] = attributes; + parameters.push_back(std::make_pair(parameter_name,attributes)); + break; + } + + // If there is an attribute list, read it in + if (*cursor == ';') { + InputIterator acursor; + + ++cursor; + acursor = http::parser::extract_attributes(cursor,end,attributes); + + if (acursor == cursor) { + // attribute extraction ended in syntax error + return begin; + } + + cursor = acursor; + } + + // insert parameter into output list + //parameters[parameter_name] = attributes; + parameters.push_back(std::make_pair(parameter_name,attributes)); + + cursor = http::parser::extract_all_lws(cursor,end); + if (cursor == end) {break;} + + // if next char is ',' then read another parameter, else stop + if (*cursor != ',') { + break; + } + + // advance past comma + ++cursor; + + if (cursor == end) { + // expected more bytes after a comma + return begin; + } + } + + return cursor; +} + +inline std::string strip_lws(std::string const & input) { + std::string::const_iterator begin = extract_all_lws(input.begin(),input.end()); + if (begin == input.end()) { + return std::string(); + } + + std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend()); + if (rbegin == input.rend()) { + return std::string(); + } + + return std::string(begin,rbegin.base()); +} + +/// Base HTTP parser +/** + * Includes methods and data elements common to all types of HTTP messages such + * as headers, versions, bodies, etc. + */ +class parser { +public: + parser() + : m_header_bytes(0) + , m_body_bytes_needed(0) + , m_body_bytes_max(max_body_size) + , m_body_encoding(body_encoding::unknown) {} + + /// Get the HTTP version string + /** + * @return The version string for this parser + */ + std::string const & get_version() const { + return m_version; + } + + /// Set HTTP parser Version + /** + * Input should be in format: HTTP/x.y where x and y are positive integers. + * @todo Does this method need any validation? + * + * @param [in] version The value to set the HTTP version to. + */ + void set_version(std::string const & version); + + /// Get the value of an HTTP header + /** + * @todo Make this method case insensitive. + * + * @param [in] key The name/key of the header to get. + * @return The value associated with the given HTTP header key. + */ + std::string const & get_header(std::string const & key) const; + + /// Extract an HTTP parameter list from a parser header. + /** + * If the header requested doesn't exist or exists and is empty the + * parameter list is valid (but empty). + * + * @param [in] key The name/key of the HTTP header to use as input. + * @param [out] out The parameter list to store extracted parameters in. + * @return Whether or not the input was a valid parameter list. + */ + bool get_header_as_plist(std::string const & key, parameter_list & out) + const; + + /// Append a value to an existing HTTP header + /** + * This method will set the value of the HTTP header `key` with the + * indicated value. If a header with the name `key` already exists, `val` + * will be appended to the existing value. + * + * @todo Make this method case insensitive. + * @todo Should there be any restrictions on which keys are allowed? + * @todo Exception free varient + * + * @see replace_header + * + * @param [in] key The name/key of the header to append to. + * @param [in] val The value to append. + */ + void append_header(std::string const & key, std::string const & val); + + /// Set a value for an HTTP header, replacing an existing value + /** + * This method will set the value of the HTTP header `key` with the + * indicated value. If a header with the name `key` already exists, `val` + * will replace the existing value. + * + * @todo Make this method case insensitive. + * @todo Should there be any restrictions on which keys are allowed? + * @todo Exception free varient + * + * @see append_header + * + * @param [in] key The name/key of the header to append to. + * @param [in] val The value to append. + */ + void replace_header(std::string const & key, std::string const & val); + + /// Remove a header from the parser + /** + * Removes the header entirely from the parser. This is different than + * setting the value of the header to blank. + * + * @todo Make this method case insensitive. + * + * @param [in] key The name/key of the header to remove. + */ + void remove_header(std::string const & key); + + /// Get HTTP body + /** + * Gets the body of the HTTP object + * + * @return The body of the HTTP message. + */ + std::string const & get_body() const { + return m_body; + } + + /// Set body content + /** + * Set the body content of the HTTP response to the parameter string. Note + * set_body will also set the Content-Length HTTP header to the appropriate + * value. If you want the Content-Length header to be something else, do so + * via replace_header("Content-Length") after calling set_body() + * + * @param value String data to include as the body content. + */ + void set_body(std::string const & value); + + /// Get body size limit + /** + * Retrieves the maximum number of bytes to parse & buffer before canceling + * a request. + * + * @since 0.5.0 + * + * @return The maximum length of a message body. + */ + size_t get_max_body_size() const { + return m_body_bytes_max; + } + + /// Set body size limit + /** + * Set the maximum number of bytes to parse and buffer before canceling a + * request. + * + * @since 0.5.0 + * + * @param value The size to set the max body length to. + */ + void set_max_body_size(size_t value) { + m_body_bytes_max = value; + } + + /// Extract an HTTP parameter list from a string. + /** + * @param [in] in The input string. + * @param [out] out The parameter list to store extracted parameters in. + * @return Whether or not the input was a valid parameter list. + */ + bool parse_parameter_list(std::string const & in, parameter_list & out) + const; +protected: + /// Process a header line + /** + * @todo Update this method to be exception free. + * + * @param [in] begin An iterator to the beginning of the sequence. + * @param [in] end An iterator to the end of the sequence. + */ + void process_header(std::string::iterator begin, std::string::iterator end); + + /// Prepare the parser to begin parsing body data + /** + * Inspects headers to determine if the message has a body that needs to be + * read. If so, sets up the necessary state, otherwise returns false. If + * this method returns true and loading the message body is desired call + * `process_body` until it returns zero bytes or an error. + * + * Must not be called until after all headers have been processed. + * + * @since 0.5.0 + * + * @return True if more bytes are needed to load the body, false otherwise. + */ + bool prepare_body(); + + /// Process body data + /** + * Parses body data. + * + * @since 0.5.0 + * + * @param [in] begin An iterator to the beginning of the sequence. + * @param [in] end An iterator to the end of the sequence. + * @return The number of bytes processed + */ + size_t process_body(char const * buf, size_t len); + + /// Check if the parser is done parsing the body + /** + * Behavior before a call to `prepare_body` is undefined. + * + * @since 0.5.0 + * + * @return True if the message body has been completed loaded. + */ + bool body_ready() const { + return (m_body_bytes_needed == 0); + } + + /// Generate and return the HTTP headers as a string + /** + * Each headers will be followed by the \r\n sequence including the last one. + * A second \r\n sequence (blank header) is not appended by this method + * + * @return The HTTP headers as a string. + */ + std::string raw_headers() const; + + std::string m_version; + header_list m_headers; + + size_t m_header_bytes; + + std::string m_body; + size_t m_body_bytes_needed; + size_t m_body_bytes_max; + body_encoding::value m_body_encoding; +}; + +} // namespace parser +} // namespace http +} // namespace websocketpp + +#include <websocketpp/http/impl/parser.hpp> + +#endif // HTTP_PARSER_HPP |