http: return more than url to server app

Provide all bytes as received from transport as data in the http message to server. Additionally provide offset and length of target path, target query, headers and body. Offers apis for parsing of headers, percent decoding, target path/query syntax verification. Type: improvement Change-Id: Idbe6f13afa378650cc5212ea7d3f9319183ebbbe Signed-off-by: Matus Fabian <matfabia@cisco.com>
author: Matus Fabian <matfabia@cisco.com> 2024-06-04 19:00:00 +0200
committer: Florin Coras <florin.coras@gmail.com> 2024-06-13 06:35:26 +0000
commit: 82ad9660becfcdd93c906d909d7e478733c5fbbe (patch)
tree: 9eb2615037a0e49d87ed73dc2ca8447eeeafc32c /src/plugins/http
parent: eaa7d91ad77f9c6691b42b0e9f631166b4bcf44f (diff)
3 files changed, 924 insertions, 47 deletions
diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c
index 893dd877c29..72b4812fd42 100644
--- a/src/plugins/http/http.c
+++ b/src/plugins/http/http.c
@@ -83,6 +83,16 @@ format_http_state (u8 *s, va_list *va)
     }                                                                         \
   while (0)
 
+static inline int
+http_state_is_tx_valid (http_conn_t *hc)
+{
+  http_state_t state = hc->http_state;
+  return (state == HTTP_STATE_APP_IO_MORE_DATA ||
+	  state == HTTP_STATE_CLIENT_IO_MORE_DATA ||
+	  state == HTTP_STATE_WAIT_APP_REPLY ||
+	  state == HTTP_STATE_WAIT_APP_METHOD);
+}
+
 static inline http_worker_t *
 http_worker_get (u32 thread_index)
 {
@@ -383,7 +393,7 @@ static const char *http_response_template = "HTTP/1.1 %s\r\n"
 
 static const char *http_request_template = "GET %s HTTP/1.1\r\n"
 					   "User-Agent: %s\r\n"
-					   "Accept: */*\r\n";
+					   "Accept: */*\r\n\r\n";
 
 static u32
 http_send_data (http_conn_t *hc, u8 *data, u32 length, u32 offset)
@@ -449,8 +459,18 @@ http_read_message (http_conn_t *hc)
   return 0;
 }
 
-static int
-v_find_index (u8 *vec, u32 offset, char *str)
+/**
+ * @brief Find the first occurrence of the string in the vector.
+ *
+ * @param vec The vector to be scanned.
+ * @param offset Search offset in the vector.
+ * @param num Maximum number of characters to be searched if non-zero.
+ * @param str The string to be searched.
+ *
+ * @return @c -1 if the string is not found within the vector; index otherwise.
+ */
+static inline int
+v_find_index (u8 *vec, u32 offset, u32 num, char *str)
 {
   int start_index = offset;
   u32 slen = (u32) strnlen_s_inline (str, 16);
@@ -461,7 +481,15 @@ v_find_index (u8 *vec, u32 offset, char *str)
   if (vlen <= slen)
     return -1;
 
-  for (; start_index < (vlen - slen); start_index++)
+  int end_index = vlen - slen;
+  if (num)
+    {
+      if (num < slen)
+	return -1;
+      end_index = clib_min (end_index, offset + num - slen);
+    }
+
+  for (; start_index <= end_index; start_index++)
     {
       if (!memcmp (vec + start_index, str, slen))
 	return start_index;
@@ -470,6 +498,259 @@ v_find_index (u8 *vec, u32 offset, char *str)
   return -1;
 }
 
+static void
+http_identify_optional_query (http_conn_t *hc)
+{
+  u32 pos = vec_search (hc->rx_buf, '?');
+  if (~0 != pos)
+    {
+      hc->target_query_offset = pos + 1;
+      hc->target_query_len =
+	hc->target_path_offset + hc->target_path_len - hc->target_query_offset;
+      hc->target_path_len = hc->target_path_len - hc->target_query_len - 1;
+    }
+}
+
+static int
+http_get_target_form (http_conn_t *hc)
+{
+  int i;
+
+  /* "*" */
+  if ((hc->rx_buf[hc->target_path_offset] == '*') &&
+      (hc->target_path_len == 1))
+    {
+      hc->target_form = HTTP_TARGET_ASTERISK_FORM;
+      return 0;
+    }
+
+  /* 1*( "/" segment ) [ "?" query ] */
+  if (hc->rx_buf[hc->target_path_offset] == '/')
+    {
+      /* drop leading slash */
+      hc->target_path_len--;
+      hc->target_path_offset++;
+      hc->target_form = HTTP_TARGET_ORIGIN_FORM;
+      http_identify_optional_query (hc);
+      return 0;
+    }
+
+  /* scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */
+  i = v_find_index (hc->rx_buf, hc->target_path_offset, hc->target_path_len,
+		    "://");
+  if (i > 0)
+    {
+      hc->target_form = HTTP_TARGET_ABSOLUTE_FORM;
+      http_identify_optional_query (hc);
+      return 0;
+    }
+
+  /* host ":" port */
+  for (i = hc->target_path_offset;
+       i < (hc->target_path_offset + hc->target_path_len); i++)
+    {
+      if ((hc->rx_buf[i] == ':') && (isdigit (hc->rx_buf[i + 1])))
+	{
+	  hc->target_form = HTTP_TARGET_AUTHORITY_FORM;
+	  return 0;
+	}
+    }
+
+  return -1;
+}
+
+static int
+http_parse_request_line (http_conn_t *hc, http_status_code_t *ec)
+{
+  int i, target_len;
+  u32 next_line_offset;
+
+  /* request-line = method SP request-target SP HTTP-version CRLF */
+  i = v_find_index (hc->rx_buf, 0, 0, "\r\n");
+  if (i < 0)
+    {
+      clib_warning ("request line incomplete");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  HTTP_DBG (0, "request line length: %d", i);
+  next_line_offset = i + 2;
+
+  /* there should be at least one more CRLF */
+  if (vec_len (hc->rx_buf) < (next_line_offset + 2))
+    {
+      clib_warning ("malformed message, too short");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+
+  /* parse method */
+  if ((i = v_find_index (hc->rx_buf, 0, next_line_offset, "GET ")) >= 0)
+    {
+      HTTP_DBG (0, "GET method");
+      hc->method = HTTP_REQ_GET;
+      hc->target_path_offset = i + 4;
+    }
+  else if ((i = v_find_index (hc->rx_buf, 0, next_line_offset, "POST ")) >= 0)
+    {
+      HTTP_DBG (0, "POST method");
+      hc->method = HTTP_REQ_POST;
+      hc->target_path_offset = i + 5;
+    }
+  else
+    {
+      clib_warning ("method not implemented: %8v", hc->rx_buf);
+      *ec = HTTP_STATUS_NOT_IMPLEMENTED;
+      return -1;
+    }
+
+  /* find version */
+  i = v_find_index (hc->rx_buf, next_line_offset - 11, 11, " HTTP/");
+  if (i < 0)
+    {
+      clib_warning ("HTTP version not present");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  /* verify major version */
+  if (isdigit (hc->rx_buf[i + 6]))
+    {
+      if (hc->rx_buf[i + 6] != '1')
+	{
+	  clib_warning ("HTTP major version '%c' not supported",
+			hc->rx_buf[i + 6]);
+	  *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
+	  return -1;
+	}
+    }
+  else
+    {
+      clib_warning ("HTTP major version '%c' is not digit", hc->rx_buf[i + 6]);
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+
+  /* parse request-target */
+  target_len = i - hc->target_path_offset;
+  if (target_len < 1)
+    {
+      clib_warning ("request-target not present");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  hc->target_path_len = target_len;
+  hc->target_query_offset = 0;
+  hc->target_query_len = 0;
+  if (http_get_target_form (hc))
+    {
+      clib_warning ("invalid target");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  HTTP_DBG (0, "request-target path length: %u", hc->target_path_len);
+  HTTP_DBG (0, "request-target path offset: %u", hc->target_path_offset);
+  HTTP_DBG (0, "request-target query length: %u", hc->target_query_len);
+  HTTP_DBG (0, "request-target query offset: %u", hc->target_query_offset);
+
+  /* set buffer offset to nex line start */
+  hc->rx_buf_offset = next_line_offset;
+
+  return 0;
+}
+
+static int
+http_identify_headers (http_conn_t *hc, http_status_code_t *ec)
+{
+  int i;
+
+  /* check if we have any header */
+  if ((hc->rx_buf[hc->rx_buf_offset] == '\r') &&
+      (hc->rx_buf[hc->rx_buf_offset + 1] == '\n'))
+    {
+      /* just another CRLF -> no headers */
+      HTTP_DBG (0, "no headers");
+      hc->headers_len = 0;
+      return 0;
+    }
+
+  /* find empty line indicating end of header section */
+  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, 0, "\r\n\r\n");
+  if (i < 0)
+    {
+      clib_warning ("cannot find header section end");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  hc->headers_offset = hc->rx_buf_offset;
+  hc->headers_len = i - hc->rx_buf_offset + 2;
+  HTTP_DBG (0, "headers length: %u", hc->headers_len);
+  HTTP_DBG (0, "headers offset: %u", hc->headers_offset);
+
+  return 0;
+}
+
+static int
+http_identify_message_body (http_conn_t *hc, http_status_code_t *ec)
+{
+  unformat_input_t input;
+  int i, len;
+  u8 *line;
+
+  hc->body_len = 0;
+
+  if (hc->headers_len == 0)
+    {
+      HTTP_DBG (0, "no header, no message-body");
+      return 0;
+    }
+
+  /* TODO check for chunked transfer coding */
+
+  /* try to find Content-Length header */
+  i = v_find_index (hc->rx_buf, hc->headers_offset, hc->headers_len,
+		    "Content-Length:");
+  if (i < 0)
+    {
+      HTTP_DBG (0, "Content-Length header not present, no message-body");
+      return 0;
+    }
+  hc->rx_buf_offset = i + 15;
+
+  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, hc->headers_len, "\r\n");
+  if (i < 0)
+    {
+      clib_warning ("end of line missing");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  len = i - hc->rx_buf_offset;
+  if (len < 1)
+    {
+      clib_warning ("invalid header, content length value missing");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+
+  line = vec_new (u8, len);
+  clib_memcpy (line, hc->rx_buf + hc->rx_buf_offset, len);
+  HTTP_DBG (0, "%v", line);
+
+  unformat_init_vector (&input, line);
+  if (!unformat (&input, "%lu", &hc->body_len))
+    {
+      clib_warning ("failed to unformat content length value");
+      *ec = HTTP_STATUS_BAD_REQUEST;
+      return -1;
+    }
+  unformat_free (&input);
+
+  hc->body_offset = hc->headers_offset + hc->headers_len + 2;
+  HTTP_DBG (0, "body length: %u", hc->body_len);
+  HTTP_DBG (0, "body offset: %u", hc->body_offset);
+
+  return 0;
+}
+
 static int
 http_parse_header (http_conn_t *hc, int *content_length)
 {
@@ -477,7 +758,7 @@ http_parse_header (http_conn_t *hc, int *content_length)
   int i, len;
   u8 *line;
 
-  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, CONTENT_LEN_STR);
+  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, 0, CONTENT_LEN_STR);
   if (i < 0)
     {
       clib_warning ("cannot find '%s' in the header!", CONTENT_LEN_STR);
@@ -486,7 +767,7 @@ http_parse_header (http_conn_t *hc, int *content_length)
 
   hc->rx_buf_offset = i;
 
-  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "\n");
+  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, 0, "\n");
   if (i < 0)
     {
       clib_warning ("end of line missing; incomplete data");
@@ -507,7 +788,7 @@ http_parse_header (http_conn_t *hc, int *content_length)
 
   /* skip rest of the header */
   hc->rx_buf_offset += len;
-  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "<html>");
+  i = v_find_index (hc->rx_buf, hc->rx_buf_offset, 0, "<html>");
   if (i < 0)
     {
       clib_warning ("<html> tag not found");
@@ -541,7 +822,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
       goto error;
     }
 
-  if ((i = v_find_index (hc->rx_buf, 0, "200 OK")) >= 0)
+  if ((i = v_find_index (hc->rx_buf, 0, 0, "200 OK")) >= 0)
     {
       msg.type = HTTP_MSG_REPLY;
       msg.content_type = HTTP_CONTENT_TEXT_HTML;
@@ -614,9 +895,8 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
   app_worker_t *app_wrk;
   http_msg_t msg;
   session_t *as;
-  int i, rv;
+  int rv;
   u32 len;
-  u8 *buf;
 
   rv = http_read_message (hc);
 
@@ -624,50 +904,45 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
   if (rv)
     return HTTP_SM_STOP;
 
+  HTTP_DBG (0, "%v", hc->rx_buf);
+
   if (vec_len (hc->rx_buf) < 8)
     {
       ec = HTTP_STATUS_BAD_REQUEST;
       goto error;
     }
 
-  if ((i = v_find_index (hc->rx_buf, 0, "GET ")) >= 0)
-    {
-      hc->method = HTTP_REQ_GET;
-      hc->rx_buf_offset = i + 5;
+  rv = http_parse_request_line (hc, &ec);
+  if (rv)
+    goto error;
 
-      i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "HTTP");
-      if (i < 0)
-	{
-	  ec = HTTP_STATUS_BAD_REQUEST;
-	  goto error;
-	}
+  rv = http_identify_headers (hc, &ec);
+  if (rv)
+    goto error;
 
-      HTTP_DBG (0, "GET method %v", hc->rx_buf);
-      len = i - hc->rx_buf_offset - 1;
-    }
-  else if ((i = v_find_index (hc->rx_buf, 0, "POST ")) >= 0)
-    {
-      hc->method = HTTP_REQ_POST;
-      hc->rx_buf_offset = i + 6;
-      len = vec_len (hc->rx_buf) - hc->rx_buf_offset - 1;
-      HTTP_DBG (0, "POST method %v", hc->rx_buf);
-    }
-  else
-    {
-      HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
-      ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
-      goto error;
-    }
+  rv = http_identify_message_body (hc, &ec);
+  if (rv)
+    goto error;
 
-  buf = &hc->rx_buf[hc->rx_buf_offset];
+  len = vec_len (hc->rx_buf);
 
   msg.type = HTTP_MSG_REQUEST;
   msg.method_type = hc->method;
   msg.content_type = HTTP_CONTENT_TEXT_HTML;
   msg.data.type = HTTP_MSG_DATA_INLINE;
   msg.data.len = len;
-
-  svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { buf, len } };
+  msg.data.target_form = hc->target_form;
+  msg.data.target_path_offset = hc->target_path_offset;
+  msg.data.target_path_len = hc->target_path_len;
+  msg.data.target_query_offset = hc->target_query_offset;
+  msg.data.target_query_len = hc->target_query_len;
+  msg.data.headers_offset = hc->headers_offset;
+  msg.data.headers_len = hc->headers_len;
+  msg.data.body_offset = hc->body_offset;
+  msg.data.body_len = hc->body_len;
+
+  svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+			     { hc->rx_buf, len } };
 
   as = session_get_from_handle (hc->h_pa_session_handle);
   rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
@@ -748,6 +1023,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
     case HTTP_STATUS_METHOD_NOT_ALLOWED:
     case HTTP_STATUS_BAD_REQUEST:
     case HTTP_STATUS_INTERNAL_ERROR:
+    case HTTP_STATUS_FORBIDDEN:
     case HTTP_STATUS_OK:
       header =
 	format (0, http_response_template, http_status_code_str[msg.code],
diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h
index 7fbefd667f4..e3ee93b6291 100644
--- a/src/plugins/http/http.h
+++ b/src/plugins/http/http.h
@@ -16,6 +16,8 @@
 #ifndef SRC_PLUGINS_HTTP_HTTP_H_
 #define SRC_PLUGINS_HTTP_HTTP_H_
 
+#include <ctype.h>
+
 #include <vnet/plugin/plugin.h>
 #include <vpp/app/version.h>
 
@@ -83,6 +85,14 @@ typedef enum http_msg_type_
   HTTP_MSG_REPLY
 } http_msg_type_t;
 
+typedef enum http_target_form_
+{
+  HTTP_TARGET_ORIGIN_FORM,
+  HTTP_TARGET_ABSOLUTE_FORM,
+  HTTP_TARGET_AUTHORITY_FORM,
+  HTTP_TARGET_ASTERISK_FORM
+} http_target_form_t;
+
 #define foreach_http_content_type                                             \
   _ (APP_7Z, ".7z", "application / x - 7z - compressed")                      \
   _ (APP_DOC, ".doc", "application / msword")                                 \
@@ -172,12 +182,50 @@ typedef enum http_content_type_
 } http_content_type_t;
 
 #define foreach_http_status_code                                              \
+  _ (100, CONTINUE, "100 Continue")                                           \
+  _ (101, SWITCHING_PROTOCOLS, "101 Switching Protocols")                     \
   _ (200, OK, "200 OK")                                                       \
+  _ (201, CREATED, "201 Created")                                             \
+  _ (202, ACCEPTED, "202 Accepted")                                           \
+  _ (203, NON_UTHORITATIVE_INFORMATION, "203 Non-Authoritative Information")  \
+  _ (204, NO_CONTENT, "204 No Content")                                       \
+  _ (205, RESET_CONTENT, "205 Reset Content")                                 \
+  _ (206, PARTIAL_CONTENT, "206 Partial Content")                             \
+  _ (300, MULTIPLE_CHOICES, "300 Multiple Choices")                           \
   _ (301, MOVED, "301 Moved Permanently")                                     \
+  _ (302, FOUND, "302 Found")                                                 \
+  _ (303, SEE_OTHER, "303 See Other")                                         \
+  _ (304, NOT_MODIFIED, "304 Not Modified")                                   \
+  _ (305, USE_PROXY, "305 Use Proxy")                                         \
+  _ (307, TEMPORARY_REDIRECT, "307 Temporary Redirect")                       \
+  _ (308, PERMANENT_REDIRECT, "308 Permanent Redirect")                       \
   _ (400, BAD_REQUEST, "400 Bad Request")                                     \
+  _ (401, UNAUTHORIZED, "401 Unauthorized")                                   \
+  _ (402, PAYMENT_REQUIRED, "402 Payment Required")                           \
+  _ (403, FORBIDDEN, "403 Forbidden")                                         \
   _ (404, NOT_FOUND, "404 Not Found")                                         \
   _ (405, METHOD_NOT_ALLOWED, "405 Method Not Allowed")                       \
-  _ (500, INTERNAL_ERROR, "500 Internal Server Error")
+  _ (406, NOT_ACCEPTABLE, "406 Not Acceptable")                               \
+  _ (407, PROXY_AUTHENTICATION_REQUIRED, "407 Proxy Authentication Required") \
+  _ (408, REQUEST_TIMEOUT, "408 Request Timeout")                             \
+  _ (409, CONFLICT, "409 Conflict")                                           \
+  _ (410, GONE, "410 Gone")                                                   \
+  _ (411, LENGTH_REQUIRED, "411 Length Required")                             \
+  _ (412, PRECONDITION_FAILED, "412 Precondition Failed")                     \
+  _ (413, CONTENT_TOO_LARGE, "413 Content Too Large")                         \
+  _ (414, URI_TOO_LONG, "414 URI Too Long")                                   \
+  _ (415, UNSUPPORTED_MEDIA_TYPE, "415 Unsupported Media Type")               \
+  _ (416, RANGE_NOT_SATISFIABLE, "416 Range Not Satisfiable")                 \
+  _ (417, EXPECTATION_FAILED, "417 Expectation Failed")                       \
+  _ (421, MISDIRECTED_REQUEST, "421 Misdirected Request")                     \
+  _ (422, UNPROCESSABLE_CONTENT, "422 Unprocessable_Content")                 \
+  _ (426, UPGRADE_REQUIRED, "426 Upgrade Required")                           \
+  _ (500, INTERNAL_ERROR, "500 Internal Server Error")                        \
+  _ (501, NOT_IMPLEMENTED, "501 Not Implemented")                             \
+  _ (502, BAD_GATEWAY, "502 Bad Gateway")                                     \
+  _ (503, SERVICE_UNAVAILABLE, "503 Service Unavailable")                     \
+  _ (504, GATEWAY_TIMEOUT, "504 Gateway Timeout")                             \
+  _ (505, HTTP_VERSION_NOT_SUPPORTED, "505 HTTP Version Not Supported")
 
 typedef enum http_status_code_
 {
@@ -187,6 +235,51 @@ typedef enum http_status_code_
     HTTP_N_STATUS
 } http_status_code_t;
 
+#define HTTP_HEADER_ACCEPT		      "Accept"
+#define HTTP_HEADER_ACCEPT_CHARSET	      "Accept-Charset"
+#define HTTP_HEADER_ACCEPT_ENCODING	      "Accept-Encoding"
+#define HTTP_HEADER_ACCEPT_LANGUAGE	      "Accept-Language"
+#define HTTP_HEADER_ACCEPT_RANGES	      "Accept-Ranges"
+#define HTTP_HEADER_ALLOW		      "Allow"
+#define HTTP_HEADER_AUTHENTICATION_INFO	      "Authentication-Info"
+#define HTTP_HEADER_AUTHORIZATION	      "Authorization"
+#define HTTP_HEADER_CLOSE		      "Close"
+#define HTTP_HEADER_CONNECTION		      "Connection"
+#define HTTP_HEADER_CONTENT_ENCODING	      "Content-Encoding"
+#define HTTP_HEADER_CONTENT_LANGUAGE	      "Content-Language"
+#define HTTP_HEADER_CONTENT_LENGTH	      "Content-Length"
+#define HTTP_HEADER_CONTENT_LOCATION	      "Content-Location"
+#define HTTP_HEADER_CONTENT_RANGE	      "Content-Range"
+#define HTTP_HEADER_CONTENT_TYPE	      "Content-Type"
+#define HTTP_HEADER_DATE		      "Date"
+#define HTTP_HEADER_ETAG		      "ETag"
+#define HTTP_HEADER_EXPECT		      "Expect"
+#define HTTP_HEADER_FROM		      "From"
+#define HTTP_HEADER_HOST		      "Host"
+#define HTTP_HEADER_IF_MATCH		      "If-Match"
+#define HTTP_HEADER_IF_MODIFIED_SINCE	      "If-Modified-Since"
+#define HTTP_HEADER_IF_NONE_MATCH	      "If-None-Match"
+#define HTTP_HEADER_IF_RANGE		      "If-Range"
+#define HTTP_HEADER_IF_UNMODIFIED_SINCE	      "If-Unmodified-Since"
+#define HTTP_HEADER_LAST_MODIFIED	      "Last-Modified"
+#define HTTP_HEADER_LOCATION		      "Location"
+#define HTTP_HEADER_MAX_FORWARDS	      "Max-Forwards"
+#define HTTP_HEADER_PROXY_AUTHENTICATE	      "Proxy-Authenticate"
+#define HTTP_HEADER_PROXY_AUTHENTICATION_INFO "Proxy-Authentication-Info"
+#define HTTP_HEADER_PROXY_AUTHORIZATION	      "Proxy-Authorization"
+#define HTTP_HEADER_RANGE		      "Range"
+#define HTTP_HEADER_REFERER		      "Referer"
+#define HTTP_HEADER_RETRY_AFTER		      "Retry-After"
+#define HTTP_HEADER_SERVER		      "Server"
+#define HTTP_HEADER_TE			      "TE"
+#define HTTP_HEADER_TRAILER		      "Trailer"
+#define HTTP_HEADER_TRANSFER_ENCODING	      "Transfer-Encoding"
+#define HTTP_HEADER_UPGRADE		      "Upgrade"
+#define HTTP_HEADER_USER_AGENT		      "User-Agent"
+#define HTTP_HEADER_VARY		      "Vary"
+#define HTTP_HEADER_VIA			      "Via"
+#define HTTP_HEADER_WWW_AUTHENTICATE	      "WWW-Authenticate"
+
 typedef enum http_msg_data_type_
 {
   HTTP_MSG_DATA_INLINE,
@@ -197,6 +290,15 @@ typedef struct http_msg_data_
 {
   http_msg_data_type_t type;
   u64 len;
+  http_target_form_t target_form;
+  u32 target_path_offset;
+  u32 target_path_len;
+  u32 target_query_offset;
+  u32 target_query_len;
+  u32 headers_offset;
+  u32 headers_len;
+  u32 body_offset;
+  u32 body_len;
   u8 data[0];
 } http_msg_data_t;
 
@@ -239,6 +341,15 @@ typedef struct http_tc_
   http_buffer_t tx_buf;
   u32 to_recv;
   u32 bytes_dequeued;
+  http_target_form_t target_form;
+  u32 target_path_offset;
+  u32 target_path_len;
+  u32 target_query_offset;
+  u32 target_query_len;
+  u32 headers_offset;
+  u32 headers_len;
+  u32 body_offset;
+  u32 body_len;
 } http_conn_t;
 
 typedef struct http_worker_
@@ -267,14 +378,104 @@ typedef struct http_main_
   u32 fifo_size;
 } http_main_t;
 
-static inline int
-http_state_is_tx_valid (http_conn_t *hc)
+always_inline int
+_validate_target_syntax (u8 *target, int is_query, int *is_encoded)
+{
+  int i, encoded = 0;
+
+  static uword valid_chars[4] = {
+    /* !$&'()*+,-./0123456789:;= */
+    0x2fffffd200000000,
+    /* @ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ */
+    0x47fffffe87ffffff,
+    0x0000000000000000,
+    0x0000000000000000,
+  };
+
+  for (i = 0; i < vec_len (target); i++)
+    {
+      if (clib_bitmap_get_no_check (valid_chars, target[i]))
+	continue;
+      /* target was already split after first question mark,
+       * for query it is valid character */
+      if (is_query && target[i] == '?')
+	continue;
+      /* pct-encoded = "%" HEXDIG HEXDIG */
+      if (target[i] == '%')
+	{
+	  if ((i + 2) > vec_len (target))
+	    return -1;
+	  if (!isxdigit (target[i + 1]) || !isxdigit (target[i + 2]))
+	    return -1;
+	  i += 2;
+	  encoded = 1;
+	  continue;
+	}
+      clib_warning ("invalid character %d", target[i]);
+      return -1;
+    }
+  if (is_encoded)
+    *is_encoded = encoded;
+  return 0;
+}
+
+/**
+ * An "absolute-path" rule validation (RFC9110 section 4.1).
+ *
+ * @param path       Target path to validate.
+ * @param is_encoded Return flag that indicates if percent-encoded (optional).
+ *
+ * @return @c 0 on success.
+ */
+always_inline int
+http_validate_abs_path_syntax (u8 *path, int *is_encoded)
+{
+  return _validate_target_syntax (path, 0, is_encoded);
+}
+
+/**
+ * A "query" rule validation (RFC3986 section 2.1).
+ *
+ * @param query      Target query to validate.
+ * @param is_encoded Return flag that indicates if percent-encoded (optional).
+ *
+ * @return @c 0 on success.
+ */
+always_inline int
+http_validate_query_syntax (u8 *query, int *is_encoded)
+{
+  return _validate_target_syntax (query, 1, is_encoded);
+}
+
+#define htoi(x) (isdigit (x) ? (x - '0') : (tolower (x) - 'a' + 10))
+
+/**
+ * Decode percent-encoded data.
+ *
+ * @param src Data to decode.
+ *
+ * @return New vector with decoded data.
+ *
+ * The caller is always responsible to free the returned vector.
+ */
+always_inline u8 *
+http_percent_decode (u8 *src)
 {
-  http_state_t state = hc->http_state;
-  return (state == HTTP_STATE_APP_IO_MORE_DATA ||
-	  state == HTTP_STATE_CLIENT_IO_MORE_DATA ||
-	  state == HTTP_STATE_WAIT_APP_REPLY ||
-	  state == HTTP_STATE_WAIT_APP_METHOD);
+  int i;
+  u8 *decoded_uri = 0;
+
+  for (i = 0; i < vec_len (src); i++)
+    {
+      if (src[i] == '%')
+	{
+	  u8 c = (htoi (src[i + 1]) << 4) | htoi (src[i + 2]);
+	  vec_add1 (decoded_uri, c);
+	  i += 2;
+	}
+      else
+	vec_add1 (decoded_uri, src[i]);
+    }
+  return decoded_uri;
 }
 
 /**
@@ -345,6 +546,250 @@ http_path_remove_dot_segments (u8 *path)
   return new_path;
 }
 
+always_inline int
+_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start,
+		   u32 *field_name_len)
+{
+  u32 name_len = 0;
+  u8 *p;
+
+  static uword tchar[4] = {
+    /* !#$%'*+-.0123456789 */
+    0x03ff6cba00000000,
+    /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */
+    0x57ffffffc7fffffe,
+    0x0000000000000000,
+    0x0000000000000000,
+  };
+
+  p = *pos;
+
+  *field_name_start = p;
+  while (p != end)
+    {
+      if (clib_bitmap_get_no_check (tchar, *p))
+	{
+	  name_len++;
+	  p++;
+	}
+      else if (*p == ':')
+	{
+	  if (name_len == 0)
+	    {
+	      clib_warning ("empty field name");
+	      return -1;
+	    }
+	  *field_name_len = name_len;
+	  p++;
+	  *pos = p;
+	  return 0;
+	}
+      else
+	{
+	  clib_warning ("invalid character %d", *p);
+	  return -1;
+	}
+    }
+  clib_warning ("field name end not found");
+  return -1;
+}
+
+always_inline int
+_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start,
+		    u32 *field_value_len)
+{
+  u32 value_len = 0;
+  u8 *p;
+
+  p = *pos;
+
+  /* skip leading whitespace */
+  while (1)
+    {
+      if (p == end)
+	{
+	  clib_warning ("field value not found");
+	  return -1;
+	}
+      else if (*p != ' ' && *p != '\t')
+	{
+	  break;
+	}
+      p++;
+    }
+
+  *field_value_start = p;
+  while (p != end)
+    {
+      if (*p == '\r')
+	{
+	  if ((end - p) < 1)
+	    {
+	      clib_warning ("incorrect field line end");
+	      return -1;
+	    }
+	  p++;
+	  if (*p == '\n')
+	    {
+	      if (value_len == 0)
+		{
+		  clib_warning ("empty field value");
+		  return -1;
+		}
+	      p++;
+	      *pos = p;
+	      /* skip trailing whitespace */
+	      p = *field_value_start + value_len - 1;
+	      while (*p == ' ' || *p == '\t')
+		{
+		  p--;
+		  value_len--;
+		}
+	      *field_value_len = value_len;
+	      return 0;
+	    }
+	  clib_warning ("CR without LF");
+	  return -1;
+	}
+      if (*p < ' ' && *p != '\t')
+	{
+	  clib_warning ("invalid character %d", *p);
+	  return -1;
+	}
+      p++;
+      value_len++;
+    }
+
+  clib_warning ("field value end not found");
+  return -1;
+}
+
+typedef struct
+{
+  u8 *name;
+  u8 *value;
+} http_header_t;
+
+typedef struct
+{
+  http_header_t *headers;
+  uword *value_by_name;
+} http_header_table_t;
+
+/**
+ * Free header table's memory.
+ *
+ * @param ht Header table to free.
+ */
+always_inline void
+http_free_header_table (http_header_table_t *ht)
+{
+  http_header_t *header;
+  vec_foreach (header, ht->headers)
+    {
+      vec_free (header->name);
+      vec_free (header->value);
+    }
+  vec_free (ht->headers);
+  hash_free (ht->value_by_name);
+  clib_mem_free (ht);
+}
+
+/**
+ * Parse headers in given vector.
+ *
+ * @param headers Vector to parse.
+ * @param [out] header_table Parsed headers in case of success.
+ *
+ * @return @c 0 on success.
+ *
+ * The caller is responsible to free the returned @c header_table
+ * using @c http_free_header_table .
+ */
+always_inline int
+http_parse_headers (u8 *headers, http_header_table_t **header_table)
+{
+  u8 *pos, *end, *name_start, *value_start, *name;
+  u32 name_len, value_len;
+  int rv;
+  http_header_t *header;
+  http_header_table_t *ht;
+  uword *p;
+
+  end = headers + vec_len (headers);
+  pos = headers;
+
+  ht = clib_mem_alloc (sizeof (*ht));
+  ht->value_by_name = hash_create_string (0, sizeof (uword));
+  ht->headers = 0;
+  do
+    {
+      rv = _parse_field_name (&pos, end, &name_start, &name_len);
+      if (rv != 0)
+	{
+	  http_free_header_table (ht);
+	  return rv;
+	}
+      rv = _parse_field_value (&pos, end, &value_start, &value_len);
+      if (rv != 0)
+	{
+	  http_free_header_table (ht);
+	  return rv;
+	}
+      name = vec_new (u8, name_len);
+      clib_memcpy (name, name_start, name_len);
+      vec_terminate_c_string (name);
+      /* check if header is repeated */
+      p = hash_get_mem (ht->value_by_name, name);
+      if (p)
+	{
+	  /* if yes combine values */
+	  header = vec_elt_at_index (ht->headers, p[0]);
+	  vec_pop (header->value); /* drop null byte */
+	  header->value = format (header->value, ", %U%c", format_ascii_bytes,
+				  value_start, value_len, 0);
+	  vec_free (name);
+	  continue;
+	}
+      /* or create new record */
+      vec_add2 (ht->headers, header, sizeof (*header));
+      header->name = name;
+      header->value = vec_new (u8, value_len);
+      clib_memcpy (header->value, value_start, value_len);
+      vec_terminate_c_string (header->value);
+      hash_set_mem (ht->value_by_name, header->name, header - ht->headers);
+    }
+  while (pos != end);
+
+  *header_table = ht;
+
+  return 0;
+}
+
+/**
+ * Try to find given header name in header table.
+ *
+ * @param header_table Header table to search.
+ * @param name Header name to match.
+ *
+ * @return Header's value in case of success, @c 0 otherwise.
+ */
+always_inline const char *
+http_get_header (http_header_table_t *header_table, const char *name)
+{
+  uword *p;
+  http_header_t *header;
+
+  p = hash_get_mem (header_table->value_by_name, name);
+  if (p)
+    {
+      header = vec_elt_at_index (header_table->headers, p[0]);
+      return (const char *) header->value;
+    }
+
+  return 0;
+}
+
 #endif /* SRC_PLUGINS_HTTP_HTTP_H_ */
 
 /*
diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst
new file mode 100644
index 00000000000..c4c4d2c8234
--- /dev/null
+++ b/src/plugins/http/http_plugin.rst
@@ -0,0 +1,156 @@
+.. _http_plugin:
+
+.. toctree::
+
+HTTP Plugin
+===========
+
+Overview
+--------
+
+This plugin adds the HTTP protocol to VPP's Host Stack.
+As a result parsing of HTTP/1 request or response is available for internal VPP applications.
+
+Usage
+-----
+
+The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``,
+``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_parse_headers``, ``http_get_header``,
+``http_free_header_table``.
+
+It relies on the hoststack constructs and uses ``http_msg_data_t`` data structure for passing metadata to/from applications.
+
+Server application
+^^^^^^^^^^^^^^^^^^
+
+Server application sets ``TRANSPORT_PROTO_HTTP`` as ``transport_proto`` in session endpoint configuration when registering to listen.
+
+Receiving data
+""""""""""""""
+
+HTTP plugin sends message header with metadata for parsing, in form of offset and length, followed by all data bytes as received from transport.
+
+Application will get pre-parsed following items:
+
+* HTTP method
+* target form
+* target path offset and length
+* target query offset and length
+* header section offset and length
+* body offset and length
+
+The example below reads HTTP message header in ``builtin_app_rx_callback``, which is first step application should do:
+
+.. code-block:: C
+
+  #include <http/http.h>
+  http_msg_t msg;
+  rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+  ASSERT (rv == sizeof (msg));
+
+As next step application might validate message and method type, for example application only expects to receive GET requests:
+
+.. code-block:: C
+
+  if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+    {
+      /* your error handling */
+    }
+
+Now application can start reading HTTP data. First let's read the target path:
+
+.. code-block:: C
+
+  u8 *target_path;
+  vec_validate (target_path, msg.data.target_path_len - 1);
+  rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, msg.data.target_path_len, target_path);
+  ASSERT (rv == msg.data.target_path_len);
+
+Application might also want to know target form which is stored in ``msg.data.target_form``, you can read more about target forms in RFC9112 section 3.2.
+In case of origin form HTTP plugin always sets ``target_path_offset`` after leading slash character.
+
+Example bellow validates "absolute-path" rule, as described in RFC9110 section 4.1, in case of target in origin form, additionally application can get information if percent encoding is used and decode path:
+
+.. code-block:: C
+
+  int is_encoded = 0;
+  if (msg.data.target_form == HTTP_TARGET_ORIGIN_FORM)
+    {
+      if (http_validate_abs_path_syntax (target_path, &is_encoded))
+        {
+          /* your error handling */
+        }
+      if (is_encoded)
+        {
+          u8 *decoded = http_percent_decode (target_path);
+          vec_free (target_path);
+          target_path = decoded;
+        }
+    }
+
+More on topic when to decode in RFC3986 section 2.4.
+
+When application serves static files, it is highly recommended to sanitize target path by removing dot segments (you don't want to risk path traversal attack):
+
+.. code-block:: C
+
+  u8 *sanitized_path;
+  sanitized_path = http_path_remove_dot_segments (target_path);
+
+Let's move to target query which is optional. Percent encoding might be used too, but we skip it for brevity:
+
+.. code-block:: C
+
+  u8 *target_query = 0;
+  if (msg.data.target_query_len)
+    {
+      vec_validate (target_query, msg.data.target_query_len - 1);
+      rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset,
+			  msg.data.target_query_len, target_query);
+      ASSERT (rv == msg.data.target_query_len);
+      if (http_validate_query_syntax (target_query, 0))
+        {
+          /* your error handling */
+        }
+    }
+
+And now for something completely different, headers.
+Headers are parsed using a generic algorithm, independent of the individual header names.
+When header is repeated, its combined value consists of all values separated by comma, concatenated in order as received.
+Following example shows how to parse headers:
+
+.. code-block:: C
+
+  if (msg.data.headers_len)
+    {
+      u8 *headers = 0;
+      http_header_table_t *ht;
+      vec_validate (headers, msg.data.headers_len - 1);
+      rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset,
+			  msg.data.headers_len, headers);
+      ASSERT (rv == msg.data.headers_len);
+      if (http_parse_headers (headers, &ht))
+        {
+          /* your error handling */
+        }
+      /* get Accept header */
+      const char *accept_value = http_get_header (ht, HTTP_HEADER_ACCEPT);
+      if (accept_value)
+        {
+          /* do something interesting */
+        }
+      http_free_header_table (ht);
+      vec_free (headers);
+    }
+
+Finally application reads body:
+
+.. code-block:: C
+
+  u8 *body = 0;
+  if (msg.data.body_len)
+    {
+      vec_validate (body, msg.data.body_len - 1);
+      rv = svm_fifo_peek (ts->rx_fifo, msg.data.body_offset, msg.data.body_len, body);
+      ASSERT (rv == msg.data.body_len);
+    }
author	Matus Fabian <matfabia@cisco.com>	2024-06-04 19:00:00 +0200
committer	Florin Coras <florin.coras@gmail.com>	2024-06-13 06:35:26 +0000
commit	82ad9660becfcdd93c906d909d7e478733c5fbbe (patch)
tree	9eb2615037a0e49d87ed73dc2ca8447eeeafc32c /src/plugins/http
parent	eaa7d91ad77f9c6691b42b0e9f631166b4bcf44f (diff)