diff options
Diffstat (limited to 'app/nginx/src/http/modules/ngx_http_charset_filter_module.c')
-rw-r--r-- | app/nginx/src/http/modules/ngx_http_charset_filter_module.c | 1685 |
1 files changed, 0 insertions, 1685 deletions
diff --git a/app/nginx/src/http/modules/ngx_http_charset_filter_module.c b/app/nginx/src/http/modules/ngx_http_charset_filter_module.c deleted file mode 100644 index e52b96e..0000000 --- a/app/nginx/src/http/modules/ngx_http_charset_filter_module.c +++ /dev/null @@ -1,1685 +0,0 @@ - -/* - * Copyright (C) Igor Sysoev - * Copyright (C) Nginx, Inc. - */ - - -#include <ngx_config.h> -#include <ngx_core.h> -#include <ngx_http.h> - - -#define NGX_HTTP_CHARSET_OFF -2 -#define NGX_HTTP_NO_CHARSET -3 -#define NGX_HTTP_CHARSET_VAR 0x10000 - -/* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */ -#define NGX_UTF_LEN 4 - -#define NGX_HTML_ENTITY_LEN (sizeof("") - 1) - - -typedef struct { - u_char **tables; - ngx_str_t name; - - unsigned length:16; - unsigned utf8:1; -} ngx_http_charset_t; - - -typedef struct { - ngx_int_t src; - ngx_int_t dst; -} ngx_http_charset_recode_t; - - -typedef struct { - ngx_int_t src; - ngx_int_t dst; - u_char *src2dst; - u_char *dst2src; -} ngx_http_charset_tables_t; - - -typedef struct { - ngx_array_t charsets; /* ngx_http_charset_t */ - ngx_array_t tables; /* ngx_http_charset_tables_t */ - ngx_array_t recodes; /* ngx_http_charset_recode_t */ -} ngx_http_charset_main_conf_t; - - -typedef struct { - ngx_int_t charset; - ngx_int_t source_charset; - ngx_flag_t override_charset; - - ngx_hash_t types; - ngx_array_t *types_keys; -} ngx_http_charset_loc_conf_t; - - -typedef struct { - u_char *table; - ngx_int_t charset; - ngx_str_t charset_name; - - ngx_chain_t *busy; - ngx_chain_t *free_bufs; - ngx_chain_t *free_buffers; - - size_t saved_len; - u_char saved[NGX_UTF_LEN]; - - unsigned length:16; - unsigned from_utf8:1; - unsigned to_utf8:1; -} ngx_http_charset_ctx_t; - - -typedef struct { - ngx_http_charset_tables_t *table; - ngx_http_charset_t *charset; - ngx_uint_t characters; -} ngx_http_charset_conf_ctx_t; - - -static ngx_int_t ngx_http_destination_charset(ngx_http_request_t *r, - ngx_str_t *name); -static ngx_int_t ngx_http_main_request_charset(ngx_http_request_t *r, - ngx_str_t *name); -static ngx_int_t ngx_http_source_charset(ngx_http_request_t *r, - ngx_str_t *name); -static ngx_int_t ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name); -static ngx_inline void ngx_http_set_charset(ngx_http_request_t *r, - ngx_str_t *charset); -static ngx_int_t ngx_http_charset_ctx(ngx_http_request_t *r, - ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset); -static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table); -static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, - ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx); -static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, - ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx); - -static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool, - ngx_http_charset_ctx_t *ctx); -static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool, - ngx_http_charset_ctx_t *ctx, size_t size); - -static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, - void *conf); -static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, - void *conf); - -static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, - void *conf); -static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name); - -static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf); -static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf); -static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, - void *parent, void *child); -static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf); - - -static ngx_str_t ngx_http_charset_default_types[] = { - ngx_string("text/html"), - ngx_string("text/xml"), - ngx_string("text/plain"), - ngx_string("text/vnd.wap.wml"), - ngx_string("application/javascript"), - ngx_string("application/rss+xml"), - ngx_null_string -}; - - -static ngx_command_t ngx_http_charset_filter_commands[] = { - - { ngx_string("charset"), - NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF - |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1, - ngx_http_set_charset_slot, - NGX_HTTP_LOC_CONF_OFFSET, - offsetof(ngx_http_charset_loc_conf_t, charset), - NULL }, - - { ngx_string("source_charset"), - NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF - |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1, - ngx_http_set_charset_slot, - NGX_HTTP_LOC_CONF_OFFSET, - offsetof(ngx_http_charset_loc_conf_t, source_charset), - NULL }, - - { ngx_string("override_charset"), - NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF - |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG, - ngx_conf_set_flag_slot, - NGX_HTTP_LOC_CONF_OFFSET, - offsetof(ngx_http_charset_loc_conf_t, override_charset), - NULL }, - - { ngx_string("charset_types"), - NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, - ngx_http_types_slot, - NGX_HTTP_LOC_CONF_OFFSET, - offsetof(ngx_http_charset_loc_conf_t, types_keys), - &ngx_http_charset_default_types[0] }, - - { ngx_string("charset_map"), - NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2, - ngx_http_charset_map_block, - NGX_HTTP_MAIN_CONF_OFFSET, - 0, - NULL }, - - ngx_null_command -}; - - -static ngx_http_module_t ngx_http_charset_filter_module_ctx = { - NULL, /* preconfiguration */ - ngx_http_charset_postconfiguration, /* postconfiguration */ - - ngx_http_charset_create_main_conf, /* create main configuration */ - NULL, /* init main configuration */ - - NULL, /* create server configuration */ - NULL, /* merge server configuration */ - - ngx_http_charset_create_loc_conf, /* create location configuration */ - ngx_http_charset_merge_loc_conf /* merge location configuration */ -}; - - -ngx_module_t ngx_http_charset_filter_module = { - NGX_MODULE_V1, - &ngx_http_charset_filter_module_ctx, /* module context */ - ngx_http_charset_filter_commands, /* module directives */ - NGX_HTTP_MODULE, /* module type */ - NULL, /* init master */ - NULL, /* init module */ - NULL, /* init process */ - NULL, /* init thread */ - NULL, /* exit thread */ - NULL, /* exit process */ - NULL, /* exit master */ - NGX_MODULE_V1_PADDING -}; - - -static ngx_http_output_header_filter_pt ngx_http_next_header_filter; -static ngx_http_output_body_filter_pt ngx_http_next_body_filter; - - -static ngx_int_t -ngx_http_charset_header_filter(ngx_http_request_t *r) -{ - ngx_int_t charset, source_charset; - ngx_str_t dst, src; - ngx_http_charset_t *charsets; - ngx_http_charset_main_conf_t *mcf; - - if (r == r->main) { - charset = ngx_http_destination_charset(r, &dst); - - } else { - charset = ngx_http_main_request_charset(r, &dst); - } - - if (charset == NGX_ERROR) { - return NGX_ERROR; - } - - if (charset == NGX_DECLINED) { - return ngx_http_next_header_filter(r); - } - - /* charset: charset index or NGX_HTTP_NO_CHARSET */ - - source_charset = ngx_http_source_charset(r, &src); - - if (source_charset == NGX_ERROR) { - return NGX_ERROR; - } - - /* - * source_charset: charset index, NGX_HTTP_NO_CHARSET, - * or NGX_HTTP_CHARSET_OFF - */ - - ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, - "charset: \"%V\" > \"%V\"", &src, &dst); - - if (source_charset == NGX_HTTP_CHARSET_OFF) { - ngx_http_set_charset(r, &dst); - - return ngx_http_next_header_filter(r); - } - - if (charset == NGX_HTTP_NO_CHARSET - || source_charset == NGX_HTTP_NO_CHARSET) - { - if (source_charset != charset - || ngx_strncasecmp(dst.data, src.data, dst.len) != 0) - { - goto no_charset_map; - } - - ngx_http_set_charset(r, &dst); - - return ngx_http_next_header_filter(r); - } - - if (source_charset == charset) { - r->headers_out.content_type.len = r->headers_out.content_type_len; - - ngx_http_set_charset(r, &dst); - - return ngx_http_next_header_filter(r); - } - - /* source_charset != charset */ - - if (r->headers_out.content_encoding - && r->headers_out.content_encoding->value.len) - { - return ngx_http_next_header_filter(r); - } - - mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); - charsets = mcf->charsets.elts; - - if (charsets[source_charset].tables == NULL - || charsets[source_charset].tables[charset] == NULL) - { - goto no_charset_map; - } - - r->headers_out.content_type.len = r->headers_out.content_type_len; - - ngx_http_set_charset(r, &dst); - - return ngx_http_charset_ctx(r, charsets, charset, source_charset); - -no_charset_map: - - ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, - "no \"charset_map\" between the charsets \"%V\" and \"%V\"", - &src, &dst); - - return ngx_http_next_header_filter(r); -} - - -static ngx_int_t -ngx_http_destination_charset(ngx_http_request_t *r, ngx_str_t *name) -{ - ngx_int_t charset; - ngx_http_charset_t *charsets; - ngx_http_variable_value_t *vv; - ngx_http_charset_loc_conf_t *mlcf; - ngx_http_charset_main_conf_t *mcf; - - if (r->headers_out.content_type.len == 0) { - return NGX_DECLINED; - } - - if (r->headers_out.override_charset - && r->headers_out.override_charset->len) - { - *name = *r->headers_out.override_charset; - - charset = ngx_http_get_charset(r, name); - - if (charset != NGX_HTTP_NO_CHARSET) { - return charset; - } - - ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, - "unknown charset \"%V\" to override", name); - - return NGX_DECLINED; - } - - mlcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module); - charset = mlcf->charset; - - if (charset == NGX_HTTP_CHARSET_OFF) { - return NGX_DECLINED; - } - - if (r->headers_out.charset.len) { - if (mlcf->override_charset == 0) { - return NGX_DECLINED; - } - - } else { - if (ngx_http_test_content_type(r, &mlcf->types) == NULL) { - return NGX_DECLINED; - } - } - - if (charset < NGX_HTTP_CHARSET_VAR) { - mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); - charsets = mcf->charsets.elts; - *name = charsets[charset].name; - return charset; - } - - vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR); - - if (vv == NULL || vv->not_found) { - return NGX_ERROR; - } - - name->len = vv->len; - name->data = vv->data; - - return ngx_http_get_charset(r, name); -} - - -static ngx_int_t -ngx_http_main_request_charset(ngx_http_request_t *r, ngx_str_t *src) -{ - ngx_int_t charset; - ngx_str_t *main_charset; - ngx_http_charset_ctx_t *ctx; - - ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module); - - if (ctx) { - *src = ctx->charset_name; - return ctx->charset; - } - - main_charset = &r->main->headers_out.charset; - - if (main_charset->len == 0) { - return NGX_DECLINED; - } - - ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t)); - if (ctx == NULL) { - return NGX_ERROR; - } - - ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module); - - charset = ngx_http_get_charset(r, main_charset); - - ctx->charset = charset; - ctx->charset_name = *main_charset; - *src = *main_charset; - - return charset; -} - - -static ngx_int_t -ngx_http_source_charset(ngx_http_request_t *r, ngx_str_t *name) -{ - ngx_int_t charset; - ngx_http_charset_t *charsets; - ngx_http_variable_value_t *vv; - ngx_http_charset_loc_conf_t *lcf; - ngx_http_charset_main_conf_t *mcf; - - if (r->headers_out.charset.len) { - *name = r->headers_out.charset; - return ngx_http_get_charset(r, name); - } - - lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module); - - charset = lcf->source_charset; - - if (charset == NGX_HTTP_CHARSET_OFF) { - name->len = 0; - return charset; - } - - if (charset < NGX_HTTP_CHARSET_VAR) { - mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); - charsets = mcf->charsets.elts; - *name = charsets[charset].name; - return charset; - } - - vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR); - - if (vv == NULL || vv->not_found) { - return NGX_ERROR; - } - - name->len = vv->len; - name->data = vv->data; - - return ngx_http_get_charset(r, name); -} - - -static ngx_int_t -ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name) -{ - ngx_uint_t i, n; - ngx_http_charset_t *charset; - ngx_http_charset_main_conf_t *mcf; - - mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); - - charset = mcf->charsets.elts; - n = mcf->charsets.nelts; - - for (i = 0; i < n; i++) { - if (charset[i].name.len != name->len) { - continue; - } - - if (ngx_strncasecmp(charset[i].name.data, name->data, name->len) == 0) { - return i; - } - } - - return NGX_HTTP_NO_CHARSET; -} - - -static ngx_inline void -ngx_http_set_charset(ngx_http_request_t *r, ngx_str_t *charset) -{ - if (r != r->main) { - return; - } - - if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY - || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY) - { - /* - * do not set charset for the redirect because NN 4.x - * use this charset instead of the next page charset - */ - - r->headers_out.charset.len = 0; - return; - } - - r->headers_out.charset = *charset; -} - - -static ngx_int_t -ngx_http_charset_ctx(ngx_http_request_t *r, ngx_http_charset_t *charsets, - ngx_int_t charset, ngx_int_t source_charset) -{ - ngx_http_charset_ctx_t *ctx; - - ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t)); - if (ctx == NULL) { - return NGX_ERROR; - } - - ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module); - - ctx->table = charsets[source_charset].tables[charset]; - ctx->charset = charset; - ctx->charset_name = charsets[charset].name; - ctx->length = charsets[charset].length; - ctx->from_utf8 = charsets[source_charset].utf8; - ctx->to_utf8 = charsets[charset].utf8; - - r->filter_need_in_memory = 1; - - if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) { - ngx_http_clear_content_length(r); - - } else { - r->filter_need_temporary = 1; - } - - return ngx_http_next_header_filter(r); -} - - -static ngx_int_t -ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in) -{ - ngx_int_t rc; - ngx_buf_t *b; - ngx_chain_t *cl, *out, **ll; - ngx_http_charset_ctx_t *ctx; - - ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module); - - if (ctx == NULL || ctx->table == NULL) { - return ngx_http_next_body_filter(r, in); - } - - if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) { - - out = NULL; - ll = &out; - - for (cl = in; cl; cl = cl->next) { - b = cl->buf; - - if (ngx_buf_size(b) == 0) { - - *ll = ngx_alloc_chain_link(r->pool); - if (*ll == NULL) { - return NGX_ERROR; - } - - (*ll)->buf = b; - (*ll)->next = NULL; - - ll = &(*ll)->next; - - continue; - } - - if (ctx->to_utf8) { - *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx); - - } else { - *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx); - } - - if (*ll == NULL) { - return NGX_ERROR; - } - - while (*ll) { - ll = &(*ll)->next; - } - } - - rc = ngx_http_next_body_filter(r, out); - - if (out) { - if (ctx->busy == NULL) { - ctx->busy = out; - - } else { - for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ } - cl->next = out; - } - } - - while (ctx->busy) { - - cl = ctx->busy; - b = cl->buf; - - if (ngx_buf_size(b) != 0) { - break; - } - - ctx->busy = cl->next; - - if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) { - continue; - } - - if (b->shadow) { - b->shadow->pos = b->shadow->last; - } - - if (b->pos) { - cl->next = ctx->free_buffers; - ctx->free_buffers = cl; - continue; - } - - cl->next = ctx->free_bufs; - ctx->free_bufs = cl; - } - - return rc; - } - - for (cl = in; cl; cl = cl->next) { - (void) ngx_http_charset_recode(cl->buf, ctx->table); - } - - return ngx_http_next_body_filter(r, in); -} - - -static ngx_uint_t -ngx_http_charset_recode(ngx_buf_t *b, u_char *table) -{ - u_char *p, *last; - - last = b->last; - - for (p = b->pos; p < last; p++) { - - if (*p != table[*p]) { - goto recode; - } - } - - return 0; - -recode: - - do { - if (*p != table[*p]) { - *p = table[*p]; - } - - p++; - - } while (p < last); - - b->in_file = 0; - - return 1; -} - - -static ngx_chain_t * -ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf, - ngx_http_charset_ctx_t *ctx) -{ - size_t len, size; - u_char c, *p, *src, *dst, *saved, **table; - uint32_t n; - ngx_buf_t *b; - ngx_uint_t i; - ngx_chain_t *out, *cl, **ll; - - src = buf->pos; - - if (ctx->saved_len == 0) { - - for ( /* void */ ; src < buf->last; src++) { - - if (*src < 0x80) { - continue; - } - - len = src - buf->pos; - - if (len > 512) { - out = ngx_http_charset_get_buf(pool, ctx); - if (out == NULL) { - return NULL; - } - - b = out->buf; - - b->temporary = buf->temporary; - b->memory = buf->memory; - b->mmap = buf->mmap; - b->flush = buf->flush; - - b->pos = buf->pos; - b->last = src; - - out->buf = b; - out->next = NULL; - - size = buf->last - src; - - saved = src; - n = ngx_utf8_decode(&saved, size); - - if (n == 0xfffffffe) { - /* incomplete UTF-8 symbol */ - - ngx_memcpy(ctx->saved, src, size); - ctx->saved_len = size; - - b->shadow = buf; - - return out; - } - - } else { - out = NULL; - size = len + buf->last - src; - src = buf->pos; - } - - if (size < NGX_HTML_ENTITY_LEN) { - size += NGX_HTML_ENTITY_LEN; - } - - cl = ngx_http_charset_get_buffer(pool, ctx, size); - if (cl == NULL) { - return NULL; - } - - if (out) { - out->next = cl; - - } else { - out = cl; - } - - b = cl->buf; - dst = b->pos; - - goto recode; - } - - out = ngx_alloc_chain_link(pool); - if (out == NULL) { - return NULL; - } - - out->buf = buf; - out->next = NULL; - - return out; - } - - /* process incomplete UTF sequence from previous buffer */ - - ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0, - "http charset utf saved: %z", ctx->saved_len); - - p = src; - - for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) { - ctx->saved[i] = *p++; - - if (p == buf->last) { - break; - } - } - - saved = ctx->saved; - n = ngx_utf8_decode(&saved, i); - - c = '\0'; - - if (n < 0x10000) { - table = (u_char **) ctx->table; - p = table[n >> 8]; - - if (p) { - c = p[n & 0xff]; - } - - } else if (n == 0xfffffffe) { - - /* incomplete UTF-8 symbol */ - - if (i < NGX_UTF_LEN) { - out = ngx_http_charset_get_buf(pool, ctx); - if (out == NULL) { - return NULL; - } - - b = out->buf; - - b->pos = buf->pos; - b->last = buf->last; - b->sync = 1; - b->shadow = buf; - - ngx_memcpy(&ctx->saved[ctx->saved_len], src, i); - ctx->saved_len += i; - - return out; - } - } - - size = buf->last - buf->pos; - - if (size < NGX_HTML_ENTITY_LEN) { - size += NGX_HTML_ENTITY_LEN; - } - - cl = ngx_http_charset_get_buffer(pool, ctx, size); - if (cl == NULL) { - return NULL; - } - - out = cl; - - b = cl->buf; - dst = b->pos; - - if (c) { - *dst++ = c; - - } else if (n == 0xfffffffe) { - *dst++ = '?'; - - ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0, - "http charset invalid utf 0"); - - saved = &ctx->saved[NGX_UTF_LEN]; - - } else if (n > 0x10ffff) { - *dst++ = '?'; - - ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0, - "http charset invalid utf 1"); - - } else { - dst = ngx_sprintf(dst, "&#%uD;", n); - } - - src += (saved - ctx->saved) - ctx->saved_len; - ctx->saved_len = 0; - -recode: - - ll = &cl->next; - - table = (u_char **) ctx->table; - - while (src < buf->last) { - - if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) { - b->last = dst; - - size = buf->last - src + NGX_HTML_ENTITY_LEN; - - cl = ngx_http_charset_get_buffer(pool, ctx, size); - if (cl == NULL) { - return NULL; - } - - *ll = cl; - ll = &cl->next; - - b = cl->buf; - dst = b->pos; - } - - if (*src < 0x80) { - *dst++ = *src++; - continue; - } - - len = buf->last - src; - - n = ngx_utf8_decode(&src, len); - - if (n < 0x10000) { - - p = table[n >> 8]; - - if (p) { - c = p[n & 0xff]; - - if (c) { - *dst++ = c; - continue; - } - } - - dst = ngx_sprintf(dst, "&#%uD;", n); - - continue; - } - - if (n == 0xfffffffe) { - /* incomplete UTF-8 symbol */ - - ngx_memcpy(ctx->saved, src, len); - ctx->saved_len = len; - - if (b->pos == dst) { - b->sync = 1; - b->temporary = 0; - } - - break; - } - - if (n > 0x10ffff) { - *dst++ = '?'; - - ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0, - "http charset invalid utf 2"); - - continue; - } - - /* n > 0xffff */ - - dst = ngx_sprintf(dst, "&#%uD;", n); - } - - b->last = dst; - - b->last_buf = buf->last_buf; - b->last_in_chain = buf->last_in_chain; - b->flush = buf->flush; - - b->shadow = buf; - - return out; -} - - -static ngx_chain_t * -ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf, - ngx_http_charset_ctx_t *ctx) -{ - size_t len, size; - u_char *p, *src, *dst, *table; - ngx_buf_t *b; - ngx_chain_t *out, *cl, **ll; - - table = ctx->table; - - for (src = buf->pos; src < buf->last; src++) { - if (table[*src * NGX_UTF_LEN] == '\1') { - continue; - } - - goto recode; - } - - out = ngx_alloc_chain_link(pool); - if (out == NULL) { - return NULL; - } - - out->buf = buf; - out->next = NULL; - - return out; - -recode: - - /* - * we assume that there are about half of characters to be recoded, - * so we preallocate "size / 2 + size / 2 * ctx->length" - */ - - len = src - buf->pos; - - if (len > 512) { - out = ngx_http_charset_get_buf(pool, ctx); - if (out == NULL) { - return NULL; - } - - b = out->buf; - - b->temporary = buf->temporary; - b->memory = buf->memory; - b->mmap = buf->mmap; - b->flush = buf->flush; - - b->pos = buf->pos; - b->last = src; - - out->buf = b; - out->next = NULL; - - size = buf->last - src; - size = size / 2 + size / 2 * ctx->length; - - } else { - out = NULL; - - size = buf->last - src; - size = len + size / 2 + size / 2 * ctx->length; - - src = buf->pos; - } - - cl = ngx_http_charset_get_buffer(pool, ctx, size); - if (cl == NULL) { - return NULL; - } - - if (out) { - out->next = cl; - - } else { - out = cl; - } - - ll = &cl->next; - - b = cl->buf; - dst = b->pos; - - while (src < buf->last) { - - p = &table[*src++ * NGX_UTF_LEN]; - len = *p++; - - if ((size_t) (b->end - dst) < len) { - b->last = dst; - - size = buf->last - src; - size = len + size / 2 + size / 2 * ctx->length; - - cl = ngx_http_charset_get_buffer(pool, ctx, size); - if (cl == NULL) { - return NULL; - } - - *ll = cl; - ll = &cl->next; - - b = cl->buf; - dst = b->pos; - } - - while (len) { - *dst++ = *p++; - len--; - } - } - - b->last = dst; - - b->last_buf = buf->last_buf; - b->last_in_chain = buf->last_in_chain; - b->flush = buf->flush; - - b->shadow = buf; - - return out; -} - - -static ngx_chain_t * -ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx) -{ - ngx_chain_t *cl; - - cl = ctx->free_bufs; - - if (cl) { - ctx->free_bufs = cl->next; - - cl->buf->shadow = NULL; - cl->next = NULL; - - return cl; - } - - cl = ngx_alloc_chain_link(pool); - if (cl == NULL) { - return NULL; - } - - cl->buf = ngx_calloc_buf(pool); - if (cl->buf == NULL) { - return NULL; - } - - cl->next = NULL; - - cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module; - - return cl; -} - - -static ngx_chain_t * -ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx, - size_t size) -{ - ngx_buf_t *b; - ngx_chain_t *cl, **ll; - - for (ll = &ctx->free_buffers, cl = ctx->free_buffers; - cl; - ll = &cl->next, cl = cl->next) - { - b = cl->buf; - - if ((size_t) (b->end - b->start) >= size) { - *ll = cl->next; - cl->next = NULL; - - b->pos = b->start; - b->temporary = 1; - b->shadow = NULL; - - return cl; - } - } - - cl = ngx_alloc_chain_link(pool); - if (cl == NULL) { - return NULL; - } - - cl->buf = ngx_create_temp_buf(pool, size); - if (cl->buf == NULL) { - return NULL; - } - - cl->next = NULL; - - cl->buf->temporary = 1; - cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module; - - return cl; -} - - -static char * -ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) -{ - ngx_http_charset_main_conf_t *mcf = conf; - - char *rv; - u_char *p, *dst2src, **pp; - ngx_int_t src, dst; - ngx_uint_t i, n; - ngx_str_t *value; - ngx_conf_t pvcf; - ngx_http_charset_t *charset; - ngx_http_charset_tables_t *table; - ngx_http_charset_conf_ctx_t ctx; - - value = cf->args->elts; - - src = ngx_http_add_charset(&mcf->charsets, &value[1]); - if (src == NGX_ERROR) { - return NGX_CONF_ERROR; - } - - dst = ngx_http_add_charset(&mcf->charsets, &value[2]); - if (dst == NGX_ERROR) { - return NGX_CONF_ERROR; - } - - if (src == dst) { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, - "\"charset_map\" between the same charsets " - "\"%V\" and \"%V\"", &value[1], &value[2]); - return NGX_CONF_ERROR; - } - - table = mcf->tables.elts; - for (i = 0; i < mcf->tables.nelts; i++) { - if ((src == table->src && dst == table->dst) - || (src == table->dst && dst == table->src)) - { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, - "duplicate \"charset_map\" between " - "\"%V\" and \"%V\"", &value[1], &value[2]); - return NGX_CONF_ERROR; - } - } - - table = ngx_array_push(&mcf->tables); - if (table == NULL) { - return NGX_CONF_ERROR; - } - - table->src = src; - table->dst = dst; - - if (ngx_strcasecmp(value[2].data, (u_char *) "utf-8") == 0) { - table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN); - if (table->src2dst == NULL) { - return NGX_CONF_ERROR; - } - - table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *)); - if (table->dst2src == NULL) { - return NGX_CONF_ERROR; - } - - dst2src = ngx_pcalloc(cf->pool, 256); - if (dst2src == NULL) { - return NGX_CONF_ERROR; - } - - pp = (u_char **) &table->dst2src[0]; - pp[0] = dst2src; - - for (i = 0; i < 128; i++) { - p = &table->src2dst[i * NGX_UTF_LEN]; - p[0] = '\1'; - p[1] = (u_char) i; - dst2src[i] = (u_char) i; - } - - for (/* void */; i < 256; i++) { - p = &table->src2dst[i * NGX_UTF_LEN]; - p[0] = '\1'; - p[1] = '?'; - } - - } else { - table->src2dst = ngx_palloc(cf->pool, 256); - if (table->src2dst == NULL) { - return NGX_CONF_ERROR; - } - - table->dst2src = ngx_palloc(cf->pool, 256); - if (table->dst2src == NULL) { - return NGX_CONF_ERROR; - } - - for (i = 0; i < 128; i++) { - table->src2dst[i] = (u_char) i; - table->dst2src[i] = (u_char) i; - } - - for (/* void */; i < 256; i++) { - table->src2dst[i] = '?'; - table->dst2src[i] = '?'; - } - } - - charset = mcf->charsets.elts; - - ctx.table = table; - ctx.charset = &charset[dst]; - ctx.characters = 0; - - pvcf = *cf; - cf->ctx = &ctx; - cf->handler = ngx_http_charset_map; - cf->handler_conf = conf; - - rv = ngx_conf_parse(cf, NULL); - - *cf = pvcf; - - if (ctx.characters) { - n = ctx.charset->length; - ctx.charset->length /= ctx.characters; - - if (((n * 10) / ctx.characters) % 10 > 4) { - ctx.charset->length++; - } - } - - return rv; -} - - -static char * -ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf) -{ - u_char *p, *dst2src, **pp; - uint32_t n; - ngx_int_t src, dst; - ngx_str_t *value; - ngx_uint_t i; - ngx_http_charset_tables_t *table; - ngx_http_charset_conf_ctx_t *ctx; - - if (cf->args->nelts != 2) { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number"); - return NGX_CONF_ERROR; - } - - value = cf->args->elts; - - src = ngx_hextoi(value[0].data, value[0].len); - if (src == NGX_ERROR || src > 255) { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, - "invalid value \"%V\"", &value[0]); - return NGX_CONF_ERROR; - } - - ctx = cf->ctx; - table = ctx->table; - - if (ctx->charset->utf8) { - p = &table->src2dst[src * NGX_UTF_LEN]; - - *p++ = (u_char) (value[1].len / 2); - - for (i = 0; i < value[1].len; i += 2) { - dst = ngx_hextoi(&value[1].data[i], 2); - if (dst == NGX_ERROR || dst > 255) { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, - "invalid value \"%V\"", &value[1]); - return NGX_CONF_ERROR; - } - - *p++ = (u_char) dst; - } - - i /= 2; - - ctx->charset->length += i; - ctx->characters++; - - p = &table->src2dst[src * NGX_UTF_LEN] + 1; - - n = ngx_utf8_decode(&p, i); - - if (n > 0xffff) { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, - "invalid value \"%V\"", &value[1]); - return NGX_CONF_ERROR; - } - - pp = (u_char **) &table->dst2src[0]; - - dst2src = pp[n >> 8]; - - if (dst2src == NULL) { - dst2src = ngx_pcalloc(cf->pool, 256); - if (dst2src == NULL) { - return NGX_CONF_ERROR; - } - - pp[n >> 8] = dst2src; - } - - dst2src[n & 0xff] = (u_char) src; - - } else { - dst = ngx_hextoi(value[1].data, value[1].len); - if (dst == NGX_ERROR || dst > 255) { - ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, - "invalid value \"%V\"", &value[1]); - return NGX_CONF_ERROR; - } - - table->src2dst[src] = (u_char) dst; - table->dst2src[dst] = (u_char) src; - } - - return NGX_CONF_OK; -} - - -static char * -ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) -{ - char *p = conf; - - ngx_int_t *cp; - ngx_str_t *value, var; - ngx_http_charset_main_conf_t *mcf; - - cp = (ngx_int_t *) (p + cmd->offset); - - if (*cp != NGX_CONF_UNSET) { - return "is duplicate"; - } - - value = cf->args->elts; - - if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset) - && ngx_strcmp(value[1].data, "off") == 0) - { - *cp = NGX_HTTP_CHARSET_OFF; - return NGX_CONF_OK; - } - - - if (value[1].data[0] == '$') { - var.len = value[1].len - 1; - var.data = value[1].data + 1; - - *cp = ngx_http_get_variable_index(cf, &var); - - if (*cp == NGX_ERROR) { - return NGX_CONF_ERROR; - } - - *cp += NGX_HTTP_CHARSET_VAR; - - return NGX_CONF_OK; - } - - mcf = ngx_http_conf_get_module_main_conf(cf, - ngx_http_charset_filter_module); - - *cp = ngx_http_add_charset(&mcf->charsets, &value[1]); - if (*cp == NGX_ERROR) { - return NGX_CONF_ERROR; - } - - return NGX_CONF_OK; -} - - -static ngx_int_t -ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name) -{ - ngx_uint_t i; - ngx_http_charset_t *c; - - c = charsets->elts; - for (i = 0; i < charsets->nelts; i++) { - if (name->len != c[i].name.len) { - continue; - } - - if (ngx_strcasecmp(name->data, c[i].name.data) == 0) { - break; - } - } - - if (i < charsets->nelts) { - return i; - } - - c = ngx_array_push(charsets); - if (c == NULL) { - return NGX_ERROR; - } - - c->tables = NULL; - c->name = *name; - c->length = 0; - - if (ngx_strcasecmp(name->data, (u_char *) "utf-8") == 0) { - c->utf8 = 1; - - } else { - c->utf8 = 0; - } - - return i; -} - - -static void * -ngx_http_charset_create_main_conf(ngx_conf_t *cf) -{ - ngx_http_charset_main_conf_t *mcf; - - mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t)); - if (mcf == NULL) { - return NULL; - } - - if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t)) - != NGX_OK) - { - return NULL; - } - - if (ngx_array_init(&mcf->tables, cf->pool, 1, - sizeof(ngx_http_charset_tables_t)) - != NGX_OK) - { - return NULL; - } - - if (ngx_array_init(&mcf->recodes, cf->pool, 2, - sizeof(ngx_http_charset_recode_t)) - != NGX_OK) - { - return NULL; - } - - return mcf; -} - - -static void * -ngx_http_charset_create_loc_conf(ngx_conf_t *cf) -{ - ngx_http_charset_loc_conf_t *lcf; - - lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t)); - if (lcf == NULL) { - return NULL; - } - - /* - * set by ngx_pcalloc(): - * - * lcf->types = { NULL }; - * lcf->types_keys = NULL; - */ - - lcf->charset = NGX_CONF_UNSET; - lcf->source_charset = NGX_CONF_UNSET; - lcf->override_charset = NGX_CONF_UNSET; - - return lcf; -} - - -static char * -ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) -{ - ngx_http_charset_loc_conf_t *prev = parent; - ngx_http_charset_loc_conf_t *conf = child; - - ngx_uint_t i; - ngx_http_charset_recode_t *recode; - ngx_http_charset_main_conf_t *mcf; - - if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types, - &prev->types_keys, &prev->types, - ngx_http_charset_default_types) - != NGX_OK) - { - return NGX_CONF_ERROR; - } - - ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0); - ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_CHARSET_OFF); - ngx_conf_merge_value(conf->source_charset, prev->source_charset, - NGX_HTTP_CHARSET_OFF); - - if (conf->charset == NGX_HTTP_CHARSET_OFF - || conf->source_charset == NGX_HTTP_CHARSET_OFF - || conf->charset == conf->source_charset) - { - return NGX_CONF_OK; - } - - if (conf->source_charset >= NGX_HTTP_CHARSET_VAR - || conf->charset >= NGX_HTTP_CHARSET_VAR) - { - return NGX_CONF_OK; - } - - mcf = ngx_http_conf_get_module_main_conf(cf, - ngx_http_charset_filter_module); - recode = mcf->recodes.elts; - for (i = 0; i < mcf->recodes.nelts; i++) { - if (conf->source_charset == recode[i].src - && conf->charset == recode[i].dst) - { - return NGX_CONF_OK; - } - } - - recode = ngx_array_push(&mcf->recodes); - if (recode == NULL) { - return NGX_CONF_ERROR; - } - - recode->src = conf->source_charset; - recode->dst = conf->charset; - - return NGX_CONF_OK; -} - - -static ngx_int_t -ngx_http_charset_postconfiguration(ngx_conf_t *cf) -{ - u_char **src, **dst; - ngx_int_t c; - ngx_uint_t i, t; - ngx_http_charset_t *charset; - ngx_http_charset_recode_t *recode; - ngx_http_charset_tables_t *tables; - ngx_http_charset_main_conf_t *mcf; - - mcf = ngx_http_conf_get_module_main_conf(cf, - ngx_http_charset_filter_module); - - recode = mcf->recodes.elts; - tables = mcf->tables.elts; - charset = mcf->charsets.elts; - - for (i = 0; i < mcf->recodes.nelts; i++) { - - c = recode[i].src; - - for (t = 0; t < mcf->tables.nelts; t++) { - - if (c == tables[t].src && recode[i].dst == tables[t].dst) { - goto next; - } - - if (c == tables[t].dst && recode[i].dst == tables[t].src) { - goto next; - } - } - - ngx_log_error(NGX_LOG_EMERG, cf->log, 0, - "no \"charset_map\" between the charsets \"%V\" and \"%V\"", - &charset[c].name, &charset[recode[i].dst].name); - return NGX_ERROR; - - next: - continue; - } - - - for (t = 0; t < mcf->tables.nelts; t++) { - - src = charset[tables[t].src].tables; - - if (src == NULL) { - src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts); - if (src == NULL) { - return NGX_ERROR; - } - - charset[tables[t].src].tables = src; - } - - dst = charset[tables[t].dst].tables; - - if (dst == NULL) { - dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts); - if (dst == NULL) { - return NGX_ERROR; - } - - charset[tables[t].dst].tables = dst; - } - - src[tables[t].dst] = tables[t].src2dst; - dst[tables[t].src] = tables[t].dst2src; - } - - ngx_http_next_header_filter = ngx_http_top_header_filter; - ngx_http_top_header_filter = ngx_http_charset_header_filter; - - ngx_http_next_body_filter = ngx_http_top_body_filter; - ngx_http_top_body_filter = ngx_http_charset_body_filter; - - return NGX_OK; -} |