aboutsummaryrefslogtreecommitdiffstats
path: root/websocketpp/http/parser.hpp
blob: 90f49ebe570c655c315223d4cd613f93e14891d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
/*
 * Copyright (c) 2014, Peter Thorson. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the WebSocket++ Project nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef HTTP_PARSER_HPP
#define HTTP_PARSER_HPP

#include <algorithm>
#include <map>
#include <string>
#include <utility>

#include <websocketpp/utilities.hpp>
#include <websocketpp/http/constants.hpp>

namespace websocketpp {
namespace http {
namespace parser {

namespace state {
    enum value {
        method,
        resource,
        version,
        headers
    };
}

namespace body_encoding {
    enum value {
        unknown,
        plain,
        chunked
    };
}

typedef std::map<std::string, std::string, utility::ci_less > header_list;

/// Read and return the next token in the stream
/**
 * Read until a non-token character is found and then return the token and
 * iterator to the next character to read
 *
 * @param begin An iterator to the beginning of the sequence
 * @param end An iterator to the end of the sequence
 * @return A pair containing the token and an iterator to the next character in
 * the stream
 */
template <typename InputIterator>
std::pair<std::string,InputIterator> extract_token(InputIterator begin,
    InputIterator end)
{
    InputIterator it = std::find_if(begin,end,&is_not_token_char);
    return std::make_pair(std::string(begin,it),it);
}

/// Read and return the next quoted string in the stream
/**
 * Read a double quoted string starting at `begin`. The quotes themselves are
 * stripped. The quoted value is returned along with an iterator to the next
 * character to read
 *
 * @param begin An iterator to the beginning of the sequence
 * @param end An iterator to the end of the sequence
 * @return A pair containing the string read and an iterator to the next
 * character in the stream
 */
template <typename InputIterator>
std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
    InputIterator end)
{
    std::string s;

    if (end == begin) {
        return std::make_pair(s,begin);
    }

    if (*begin != '"') {
        return std::make_pair(s,begin);
    }

    InputIterator cursor = begin+1;
    InputIterator marker = cursor;

    cursor = std::find(cursor,end,'"');

    while (cursor != end) {
        // either this is the end or a quoted string
        if (*(cursor-1) == '\\') {
            s.append(marker,cursor-1);
            s.append(1,'"');
            ++cursor;
            marker = cursor;
        } else {
            s.append(marker,cursor);
            ++cursor;
            return std::make_pair(s,cursor);
        }

        cursor = std::find(cursor,end,'"');
    }

    return std::make_pair("",begin);
}

/// Read and discard one unit of linear whitespace
/**
 * Read one unit of linear white space and return the iterator to the character
 * afterwards. If `begin` is returned, no whitespace was extracted.
 *
 * @param begin An iterator to the beginning of the sequence
 * @param end An iterator to the end of the sequence
 * @return An iterator to the character after the linear whitespace read
 */
template <typename InputIterator>
InputIterator extract_lws(InputIterator begin, InputIterator end) {
    InputIterator it = begin;

    // strip leading CRLF
    if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
        is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
    {
        it+=3;
    }

    it = std::find_if(it,end,&is_not_whitespace_char);
    return it;
}

/// Read and discard linear whitespace
/**
 * Read linear white space until a non-lws character is read and return an
 * iterator to that character. If `begin` is returned, no whitespace was
 * extracted.
 *
 * @param begin An iterator to the beginning of the sequence
 * @param end An iterator to the end of the sequence
 * @return An iterator to the character after the linear whitespace read
 */
template <typename InputIterator>
InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
    InputIterator old_it;
    InputIterator new_it = begin;

    do {
        // Pull value from previous iteration
        old_it = new_it;

        // look ahead another pass
        new_it = extract_lws(old_it,end);
    } while (new_it != end && old_it != new_it);

    return new_it;
}

/// Extract HTTP attributes
/**
 * An http attributes list is a semicolon delimited list of key value pairs in
 * the format: *( ";" attribute "=" value ) where attribute is a token and value
 * is a token or quoted string.
 *
 * Attributes extracted are appended to the supplied attributes list
 * `attributes`.
 *
 * @param [in] begin An iterator to the beginning of the sequence
 * @param [in] end An iterator to the end of the sequence
 * @param [out] attributes A reference to the attributes list to append
 * attribute/value pairs extracted to
 * @return An iterator to the character after the last atribute read
 */
template <typename InputIterator>
InputIterator extract_attributes(InputIterator begin, InputIterator end,
    attribute_list & attributes)
{
    InputIterator cursor;
    bool first = true;

    if (begin == end) {
        return begin;
    }

    cursor = begin;
    std::pair<std::string,InputIterator> ret;

    while (cursor != end) {
        std::string name;

        cursor = http::parser::extract_all_lws(cursor,end);
        if (cursor == end) {
            break;
        }

        if (first) {
            // ignore this check for the very first pass
            first = false;
        } else {
            if (*cursor == ';') {
                // advance past the ';'
                ++cursor;
            } else {
                // non-semicolon in this position indicates end end of the
                // attribute list, break and return.
                break;
            }
        }

        cursor = http::parser::extract_all_lws(cursor,end);
        ret = http::parser::extract_token(cursor,end);

        if (ret.first.empty()) {
            // error: expected a token
            return begin;
        } else {
            name = ret.first;
            cursor = ret.second;
        }

        cursor = http::parser::extract_all_lws(cursor,end);
        if (cursor == end || *cursor != '=') {
            // if there is an equals sign, read the attribute value. Otherwise
            // record a blank value and continue
            attributes[name].clear();
            continue;
        }

        // advance past the '='
        ++cursor;

        cursor = http::parser::extract_all_lws(cursor,end);
        if (cursor == end) {
            // error: expected a token or quoted string
            return begin;
        }

        ret = http::parser::extract_quoted_string(cursor,end);
        if (ret.second != cursor) {
            attributes[name] = ret.first;
            cursor = ret.second;
            continue;
        }

        ret = http::parser::extract_token(cursor,end);
        if (ret.first.empty()) {
            // error : expected token or quoted string
            return begin;
        } else {
            attributes[name] = ret.first;
            cursor = ret.second;
        }
    }

    return cursor;
}

/// Extract HTTP parameters
/**
 * An http parameters list is a comma delimited list of tokens followed by
 * optional semicolon delimited attributes lists.
 *
 * Parameters extracted are appended to the supplied parameters list
 * `parameters`.
 *
 * @param [in] begin An iterator to the beginning of the sequence
 * @param [in] end An iterator to the end of the sequence
 * @param [out] parameters A reference to the parameters list to append
 * paramter values extracted to
 * @return An iterator to the character after the last parameter read
 */
template <typename InputIterator>
InputIterator extract_parameters(InputIterator begin, InputIterator end,
    parameter_list &parameters)
{
    InputIterator cursor;

    if (begin == end) {
        // error: expected non-zero length range
        return begin;
    }

    cursor = begin;
    std::pair<std::string,InputIterator> ret;

    /**
     * LWS
     * token
     * LWS
     * *(";" method-param)
     * LWS
     * ,=loop again
     */
    while (cursor != end) {
        std::string parameter_name;
        attribute_list attributes;

        // extract any stray whitespace
        cursor = http::parser::extract_all_lws(cursor,end);
        if (cursor == end) {break;}

        ret = http::parser::extract_token(cursor,end);

        if (ret.first.empty()) {
            // error: expected a token
            return begin;
        } else {
            parameter_name = ret.first;
            cursor = ret.second;
        }

        // Safe break point, insert parameter with blank attributes and exit
        cursor = http::parser::extract_all_lws(cursor,end);
        if (cursor == end) {
            //parameters[parameter_name] = attributes;
            parameters.push_back(std::make_pair(parameter_name,attributes));
            break;
        }

        // If there is an attribute list, read it in
        if (*cursor == ';') {
            InputIterator acursor;

            ++cursor;
            acursor = http::parser::extract_attributes(cursor,end,attributes);

            if (acursor == cursor) {
                // attribute extraction ended in syntax error
                return begin;
            }

            cursor = acursor;
        }

        // insert parameter into output list
        //parameters[parameter_name] = attributes;
        parameters.push_back(std::make_pair(parameter_name,attributes));

        cursor = http::parser::extract_all_lws(cursor,end);
        if (cursor == end) {break;}

        // if next char is ',' then read another parameter, else stop
        if (*cursor != ',') {
            break;
        }

        // advance past comma
        ++cursor;

        if (cursor == end) {
            // expected more bytes after a comma
            return begin;
        }
    }

    return cursor;
}

inline std::string strip_lws(std::string const & input) {
    std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
    if (begin == input.end()) {
        return std::string();
    }

    std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
    if (rbegin == input.rend()) {
        return std::string();
    }

    return std::string(begin,rbegin.base());
}

/// Base HTTP parser
/**
 * Includes methods and data elements common to all types of HTTP messages such
 * as headers, versions, bodies, etc.
 */
class parser {
public:
    parser()
      : m_header_bytes(0)
      , m_body_bytes_needed(0)
      , m_body_bytes_max(max_body_size)
      , m_body_encoding(body_encoding::unknown) {}
    
    /// Get the HTTP version string
    /**
     * @return The version string for this parser
     */
    std::string const & get_version() const {
        return m_version;
    }

    /// Set HTTP parser Version
    /**
     * Input should be in format: HTTP/x.y where x and y are positive integers.
     * @todo Does this method need any validation?
     *
     * @param [in] version The value to set the HTTP version to.
     */
    void set_version(std::string const & version);

    /// Get the value of an HTTP header
    /**
     * @todo Make this method case insensitive.
     *
     * @param [in] key The name/key of the header to get.
     * @return The value associated with the given HTTP header key.
     */
    std::string const & get_header(std::string const & key) const;

    /// Extract an HTTP parameter list from a parser header.
    /**
     * If the header requested doesn't exist or exists and is empty the
     * parameter list is valid (but empty).
     *
     * @param [in] key The name/key of the HTTP header to use as input.
     * @param [out] out The parameter list to store extracted parameters in.
     * @return Whether or not the input was a valid parameter list.
     */
    bool get_header_as_plist(std::string const & key, parameter_list & out)
        const;

    /// Append a value to an existing HTTP header
    /**
     * This method will set the value of the HTTP header `key` with the
     * indicated value. If a header with the name `key` already exists, `val`
     * will be appended to the existing value.
     *
     * @todo Make this method case insensitive.
     * @todo Should there be any restrictions on which keys are allowed?
     * @todo Exception free varient
     *
     * @see replace_header
     *
     * @param [in] key The name/key of the header to append to.
     * @param [in] val The value to append.
     */
    void append_header(std::string const & key, std::string const & val);

    /// Set a value for an HTTP header, replacing an existing value
    /**
     * This method will set the value of the HTTP header `key` with the
     * indicated value. If a header with the name `key` already exists, `val`
     * will replace the existing value.
     *
     * @todo Make this method case insensitive.
     * @todo Should there be any restrictions on which keys are allowed?
     * @todo Exception free varient
     *
     * @see append_header
     *
     * @param [in] key The name/key of the header to append to.
     * @param [in] val The value to append.
     */
    void replace_header(std::string const & key, std::string const & val);

    /// Remove a header from the parser
    /**
     * Removes the header entirely from the parser. This is different than
     * setting the value of the header to blank.
     *
     * @todo Make this method case insensitive.
     *
     * @param [in] key The name/key of the header to remove.
     */
    void remove_header(std::string const & key);

    /// Get HTTP body
    /**
     * Gets the body of the HTTP object
     *
     * @return The body of the HTTP message.
     */
    std::string const & get_body() const {
        return m_body;
    }

    /// Set body content
    /**
     * Set the body content of the HTTP response to the parameter string. Note
     * set_body will also set the Content-Length HTTP header to the appropriate
     * value. If you want the Content-Length header to be something else, do so
     * via replace_header("Content-Length") after calling set_body()
     *
     * @param value String data to include as the body content.
     */
    void set_body(std::string const & value);

    /// Get body size limit
    /**
     * Retrieves the maximum number of bytes to parse & buffer before canceling
     * a request.
     *
     * @since 0.5.0
     *
     * @return The maximum length of a message body.
     */
    size_t get_max_body_size() const {
        return m_body_bytes_max;
    }

    /// Set body size limit
    /**
     * Set the maximum number of bytes to parse and buffer before canceling a
     * request.
     *
     * @since 0.5.0
     *
     * @param value The size to set the max body length to.
     */
    void set_max_body_size(size_t value) {
        m_body_bytes_max = value;
    }

    /// Extract an HTTP parameter list from a string.
    /**
     * @param [in] in The input string.
     * @param [out] out The parameter list to store extracted parameters in.
     * @return Whether or not the input was a valid parameter list.
     */
    bool parse_parameter_list(std::string const & in, parameter_list & out)
        const;
protected:
    /// Process a header line
    /**
     * @todo Update this method to be exception free.
     *
     * @param [in] begin An iterator to the beginning of the sequence.
     * @param [in] end An iterator to the end of the sequence.
     */
    void process_header(std::string::iterator begin, std::string::iterator end);

    /// Prepare the parser to begin parsing body data
    /**
     * Inspects headers to determine if the message has a body that needs to be
     * read. If so, sets up the necessary state, otherwise returns false. If
     * this method returns true and loading the message body is desired call
     * `process_body` until it returns zero bytes or an error.
     *
     * Must not be called until after all headers have been processed.
     *
     * @since 0.5.0
     *
     * @return True if more bytes are needed to load the body, false otherwise.
     */
    bool prepare_body();

    /// Process body data
    /**
     * Parses body data.
     *
     * @since 0.5.0
     *
     * @param [in] begin An iterator to the beginning of the sequence.
     * @param [in] end An iterator to the end of the sequence.
     * @return The number of bytes processed
     */
    size_t process_body(char const * buf, size_t len);

    /// Check if the parser is done parsing the body
    /**
     * Behavior before a call to `prepare_body` is undefined.
     *
     * @since 0.5.0
     *
     * @return True if the message body has been completed loaded.
     */
    bool body_ready() const {
        return (m_body_bytes_needed == 0);
    }

    /// Generate and return the HTTP headers as a string
    /**
     * Each headers will be followed by the \r\n sequence including the last one.
     * A second \r\n sequence (blank header) is not appended by this method
     *
     * @return The HTTP headers as a string.
     */
    std::string raw_headers() const;

    std::string m_version;
    header_list m_headers;
    
    size_t                  m_header_bytes;
    
    std::string             m_body;
    size_t                  m_body_bytes_needed;
    size_t                  m_body_bytes_max;
    body_encoding::value    m_body_encoding;
};

} // namespace parser
} // namespace http
} // namespace websocketpp

#include <websocketpp/http/impl/parser.hpp>

#endif // HTTP_PARSER_HPP