diff options
author | 2017-01-12 13:47:39 +0200 | |
---|---|---|
committer | 2017-01-15 17:10:16 +0200 | |
commit | 420216e583706fbd7bf214818fcce0143a05e982 (patch) | |
tree | 0fd39bac06af7e12889406b0f20cd40527e0d18f /scripts/external_libs/elasticsearch | |
parent | 4e5a651c8e052cdbcad73f6af5ce065ffd6dbce4 (diff) |
add elk
Signed-off-by: Hanoh Haim <hhaim@cisco.com>
Diffstat (limited to 'scripts/external_libs/elasticsearch')
22 files changed, 5079 insertions, 0 deletions
diff --git a/scripts/external_libs/elasticsearch/elasticsearch/__init__.py b/scripts/external_libs/elasticsearch/elasticsearch/__init__.py new file mode 100644 index 00000000..bb26253a --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/__init__.py @@ -0,0 +1,25 @@ +from __future__ import absolute_import + +VERSION = (5, 0, 1) +__version__ = VERSION +__versionstr__ = '.'.join(map(str, VERSION)) + +import sys + +if (2, 7) <= sys.version_info < (3, 2): + # On Python 2.7 and Python3 < 3.2, install no-op handler to silence + # `No handlers could be found for logger "elasticsearch"` message per + # <https://docs.python.org/2/howto/logging.html#configuring-logging-for-a-library> + import logging + logger = logging.getLogger('elasticsearch') + logger.addHandler(logging.NullHandler()) + +from .client import Elasticsearch +from .transport import Transport +from .connection_pool import ConnectionPool, ConnectionSelector, \ + RoundRobinSelector +from .serializer import JSONSerializer +from .connection import Connection, RequestsHttpConnection, \ + Urllib3HttpConnection +from .exceptions import * + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/__init__.py b/scripts/external_libs/elasticsearch/elasticsearch/client/__init__.py new file mode 100644 index 00000000..94aa9a61 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/__init__.py @@ -0,0 +1,1517 @@ +from __future__ import unicode_literals +import logging + +from ..transport import Transport +from ..exceptions import TransportError +from ..compat import string_types, urlparse +from .indices import IndicesClient +from .ingest import IngestClient +from .cluster import ClusterClient +from .cat import CatClient +from .nodes import NodesClient +from .snapshot import SnapshotClient +from .tasks import TasksClient +from .utils import query_params, _make_path, SKIP_IN_PATH + +logger = logging.getLogger('elasticsearch') + +def _normalize_hosts(hosts): + """ + Helper function to transform hosts argument to + :class:`~elasticsearch.Elasticsearch` to a list of dicts. + """ + # if hosts are empty, just defer to defaults down the line + if hosts is None: + return [{}] + + # passed in just one string + if isinstance(hosts, string_types): + hosts = [hosts] + + out = [] + # normalize hosts to dicts + for host in hosts: + if isinstance(host, string_types): + if '://' not in host: + host = "//%s" % host + + parsed_url = urlparse(host) + h = {"host": parsed_url.hostname} + + if parsed_url.port: + h["port"] = parsed_url.port + + if parsed_url.scheme == "https": + h['port'] = parsed_url.port or 443 + h['use_ssl'] = True + h['scheme'] = 'http' + elif parsed_url.scheme: + h['scheme'] = parsed_url.scheme + + if parsed_url.username or parsed_url.password: + h['http_auth'] = '%s:%s' % (parsed_url.username, parsed_url.password) + + if parsed_url.path and parsed_url.path != '/': + h['url_prefix'] = parsed_url.path + + out.append(h) + else: + out.append(host) + return out + + +class Elasticsearch(object): + """ + Elasticsearch low-level client. Provides a straightforward mapping from + Python to ES REST endpoints. + + The instance has attributes ``cat``, ``cluster``, ``indices``, ``ingest``, + ``nodes``, ``snapshot`` and ``tasks`` that provide access to instances of + :class:`~elasticsearch.client.CatClient`, + :class:`~elasticsearch.client.ClusterClient`, + :class:`~elasticsearch.client.IndicesClient`, + :class:`~elasticsearch.client.IngestClient`, + :class:`~elasticsearch.client.NodesClient`, + :class:`~elasticsearch.client.SnapshotClient` and + :class:`~elasticsearch.client.TasksClient` respectively. This is the + preferred (and only supported) way to get access to those classes and their + methods. + + You can specify your own connection class which should be used by providing + the ``connection_class`` parameter:: + + # create connection to localhost using the ThriftConnection + es = Elasticsearch(connection_class=ThriftConnection) + + If you want to turn on :ref:`sniffing` you have several options (described + in :class:`~elasticsearch.Transport`):: + + # create connection that will automatically inspect the cluster to get + # the list of active nodes. Start with nodes running on 'esnode1' and + # 'esnode2' + es = Elasticsearch( + ['esnode1', 'esnode2'], + # sniff before doing anything + sniff_on_start=True, + # refresh nodes after a node fails to respond + sniff_on_connection_fail=True, + # and also every 60 seconds + sniffer_timeout=60 + ) + + Different hosts can have different parameters, use a dictionary per node to + specify those:: + + # connect to localhost directly and another node using SSL on port 443 + # and an url_prefix. Note that ``port`` needs to be an int. + es = Elasticsearch([ + {'host': 'localhost'}, + {'host': 'othernode', 'port': 443, 'url_prefix': 'es', 'use_ssl': True}, + ]) + + If using SSL, there are several parameters that control how we deal with + certificates (see :class:`~elasticsearch.Urllib3HttpConnection` for + detailed description of the options):: + + es = Elasticsearch( + ['localhost:443', 'other_host:443'], + # turn on SSL + use_ssl=True, + # make sure we verify SSL certificates (off by default) + verify_certs=True, + # provide a path to CA certs on disk + ca_certs='/path/to/CA_certs' + ) + + SSL client authentication is supported + (see :class:`~elasticsearch.Urllib3HttpConnection` for + detailed description of the options):: + + es = Elasticsearch( + ['localhost:443', 'other_host:443'], + # turn on SSL + use_ssl=True, + # make sure we verify SSL certificates (off by default) + verify_certs=True, + # provide a path to CA certs on disk + ca_certs='/path/to/CA_certs', + # PEM formatted SSL client certificate + client_cert='/path/to/clientcert.pem', + # PEM formatted SSL client key + client_key='/path/to/clientkey.pem' + ) + + Alternatively you can use RFC-1738 formatted URLs, as long as they are not + in conflict with other options:: + + es = Elasticsearch( + [ + 'http://user:secret@localhost:9200/', + 'https://user:secret@other_host:443/production' + ], + verify_certs=True + ) + + """ + def __init__(self, hosts=None, transport_class=Transport, **kwargs): + """ + :arg hosts: list of nodes we should connect to. Node should be a + dictionary ({"host": "localhost", "port": 9200}), the entire dictionary + will be passed to the :class:`~elasticsearch.Connection` class as + kwargs, or a string in the format of ``host[:port]`` which will be + translated to a dictionary automatically. If no value is given the + :class:`~elasticsearch.Urllib3HttpConnection` class defaults will be used. + + :arg transport_class: :class:`~elasticsearch.Transport` subclass to use. + + :arg kwargs: any additional arguments will be passed on to the + :class:`~elasticsearch.Transport` class and, subsequently, to the + :class:`~elasticsearch.Connection` instances. + """ + self.transport = transport_class(_normalize_hosts(hosts), **kwargs) + + # namespaced clients for compatibility with API names + self.indices = IndicesClient(self) + self.ingest = IngestClient(self) + self.cluster = ClusterClient(self) + self.cat = CatClient(self) + self.nodes = NodesClient(self) + self.snapshot = SnapshotClient(self) + self.tasks = TasksClient(self) + + def __repr__(self): + try: + # get a lost of all connections + cons = self.transport.hosts + # truncate to 10 if there are too many + if len(cons) > 5: + cons = cons[:5] + ['...'] + return '<Elasticsearch(%r)>' % cons + except: + # probably operating on custom transport and connection_pool, ignore + return super(Elasticsearch, self).__repr__() + + def _bulk_body(self, body): + # if not passed in a string, serialize items and join by newline + if not isinstance(body, string_types): + body = '\n'.join(map(self.transport.serializer.dumps, body)) + + # bulk body must end with a newline + if not body.endswith('\n'): + body += '\n' + + return body + + @query_params() + def ping(self, params=None): + """ + Returns True if the cluster is up, False otherwise. + `<http://www.elastic.co/guide/>`_ + """ + try: + return self.transport.perform_request('HEAD', '/', params=params) + except TransportError: + return False + + @query_params() + def info(self, params=None): + """ + Get the basic info from the current cluster. + `<http://www.elastic.co/guide/>`_ + """ + return self.transport.perform_request('GET', '/', params=params) + + @query_params('parent', 'pipeline', 'refresh', 'routing', + 'timeout', 'timestamp', 'ttl', 'version', 'version_type') + def create(self, index, doc_type, id, body, params=None): + """ + Adds a typed JSON document in a specific index, making it searchable. + Behind the scenes this method calls index(..., op_type='create') + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document + :arg id: Document ID + :arg body: The document + :arg parent: ID of the parent document + :arg pipeline: The pipeline id to preprocess incoming documents with + :arg refresh: If `true` then refresh the affected shards to make this + operation visible to search, if `wait_for` then wait for a refresh + to make this operation visible to search, if `false` (the default) + then do nothing with refreshes., valid choices are: u'true', + u'false', u'wait_for' + :arg routing: Specific routing value + :arg timeout: Explicit operation timeout + :arg timestamp: Explicit timestamp for the document + :arg ttl: Expiration time for the document + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: + u'internal', u'external', u'external_gte', u'force' + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the index operation. Defaults to 1, + meaning the primary shard only. Set to `all` for all shard copies, + otherwise set to any non-negative value less than or equal to the + total number of copies for the shard (number of replicas + 1) + """ + for param in (index, doc_type, id, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path(index, doc_type, + id, '_create'), params=params, body=body) + + @query_params('op_type', 'parent', 'pipeline', 'refresh', 'routing', + 'timeout', 'timestamp', 'ttl', 'version', 'version_type', + 'wait_for_active_shards') + def index(self, index, doc_type, body, id=None, params=None): + """ + Adds or updates a typed JSON document in a specific index, making it searchable. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document + :arg body: The document + :arg id: Document ID + :arg op_type: Explicit operation type, default 'index', valid choices + are: 'index', 'create' + :arg parent: ID of the parent document + :arg pipeline: The pipeline id to preprocess incoming documents with + :arg refresh: If `true` then refresh the affected shards to make this + operation visible to search, if `wait_for` then wait for a refresh + to make this operation visible to search, if `false` (the default) + then do nothing with refreshes., valid choices are: u'true', + u'false', u'wait_for' + :arg routing: Specific routing value + :arg timeout: Explicit operation timeout + :arg timestamp: Explicit timestamp for the document + :arg ttl: Expiration time for the document + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the index operation. Defaults to 1, + meaning the primary shard only. Set to `all` for all shard copies, + otherwise set to any non-negative value less than or equal to the + total number of copies for the shard (number of replicas + 1) + """ + for param in (index, doc_type, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('POST' if id in SKIP_IN_PATH else 'PUT', + _make_path(index, doc_type, id), params=params, body=body) + + @query_params('parent', 'preference', 'realtime', 'refresh', 'routing') + def exists(self, index, doc_type, id, params=None): + """ + Returns a boolean indicating whether or not given document exists in Elasticsearch. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document (use `_all` to fetch the first + document matching the ID across all types) + :arg id: The document ID + :arg parent: The ID of the parent document + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg realtime: Specify whether to perform the operation in realtime or + search mode + :arg refresh: Refresh the shard containing the document before + performing the operation + :arg routing: Specific routing value + """ + for param in (index, doc_type, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('HEAD', _make_path(index, + doc_type, id), params=params) + + @query_params('_source', '_source_exclude', '_source_include', 'parent', + 'preference', 'realtime', 'refresh', 'routing', 'stored_fields', + 'version', 'version_type') + def get(self, index, id, doc_type='_all', params=None): + """ + Get a typed JSON document from the index based on its id. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document (use `_all` to fetch the first + document matching the ID across all types) + :arg id: The document ID + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg parent: The ID of the parent document + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg realtime: Specify whether to perform the operation in realtime or + search mode + :arg refresh: Refresh the shard containing the document before + performing the operation + :arg routing: Specific routing value + :arg stored_fields: A comma-separated list of stored fields to return in + the response + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + """ + for param in (index, doc_type, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, id), params=params) + + @query_params('_source', '_source_exclude', '_source_include', 'parent', + 'preference', 'realtime', 'refresh', 'routing', 'version', + 'version_type') + def get_source(self, index, doc_type, id, params=None): + """ + Get the source of a document by it's index, type and id. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document; use `_all` to fetch the first + document matching the ID across all types + :arg id: The document ID + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg parent: The ID of the parent document + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg realtime: Specify whether to perform the operation in realtime or + search mode + :arg refresh: Refresh the shard containing the document before + performing the operation + :arg routing: Specific routing value + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + """ + for param in (index, doc_type, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, id, '_source'), params=params) + + @query_params('_source', '_source_exclude', '_source_include', 'preference', + 'realtime', 'refresh', 'stored_fields') + def mget(self, body, index=None, doc_type=None, params=None): + """ + Get multiple documents based on an index, type (optional) and ids. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html>`_ + + :arg body: Document identifiers; can be either `docs` (containing full + document information) or `ids` (when index and type is provided in + the URL. + :arg index: The name of the index + :arg doc_type: The type of the document + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg realtime: Specify whether to perform the operation in realtime or + search mode + :arg refresh: Refresh the shard containing the document before + performing the operation + :arg stored_fields: A comma-separated list of stored fields to return in + the response + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_mget'), params=params, body=body) + + @query_params('_source', '_source_exclude', '_source_include', 'fields', + 'lang', 'parent', 'refresh', 'retry_on_conflict', 'routing', 'timeout', + 'timestamp', 'ttl', 'version', 'version_type', 'wait_for_active_shards') + def update(self, index, doc_type, id, body=None, params=None): + """ + Update a document based on a script or partial data provided. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document + :arg id: Document ID + :arg body: The request definition using either `script` or partial `doc` + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg fields: A comma-separated list of fields to return in the response + :arg lang: The script language (default: groovy) + :arg parent: ID of the parent document. Is is only used for routing and + when for the upsert request + :arg refresh: If `true` then refresh the effected shards to make this + operation visible to search, if `wait_for` then wait for a refresh + to make this operation visible to search, if `false` (the default) + then do nothing with refreshes., valid choices are: 'true', 'false', + 'wait_for' + :arg retry_on_conflict: Specify how many times should the operation be + retried when a conflict occurs (default: 0) + :arg routing: Specific routing value + :arg timeout: Explicit operation timeout + :arg timestamp: Explicit timestamp for the document + :arg ttl: Expiration time for the document + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'force' + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the update operation. Defaults to + 1, meaning the primary shard only. Set to `all` for all shard + copies, otherwise set to any non-negative value less than or equal + to the total number of copies for the shard (number of replicas + 1) + """ + for param in (index, doc_type, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('POST', _make_path(index, + doc_type, id, '_update'), params=params, body=body) + + @query_params('_source', '_source_exclude', '_source_include', + 'allow_no_indices', 'analyze_wildcard', 'analyzer', 'default_operator', + 'df', 'docvalue_fields', 'expand_wildcards', 'explain', + 'fielddata_fields', 'from_', 'ignore_unavailable', 'lenient', + 'lowercase_expanded_terms', 'preference', 'q', 'request_cache', + 'routing', 'scroll', 'search_type', 'size', 'sort', 'stats', + 'stored_fields', 'suggest_field', 'suggest_mode', 'suggest_size', + 'suggest_text', 'terminate_after', 'timeout', 'track_scores', 'version') + def search(self, index=None, doc_type=None, body=None, params=None): + """ + Execute a search query and get back search hits that match the query. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html>`_ + + :arg index: A comma-separated list of index names to search; use `_all` + or empty string to perform the operation on all indices + :arg doc_type: A comma-separated list of document types to search; leave + empty to perform the operation on all types + :arg body: The search definition using the Query DSL + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg analyze_wildcard: Specify whether wildcard and prefix queries + should be analyzed (default: false) + :arg analyzer: The analyzer to use for the query string + :arg default_operator: The default operator for query string query (AND + or OR), default 'OR', valid choices are: 'AND', 'OR' + :arg df: The field to use as default where no field prefix is given in + the query string + :arg docvalue_fields: A comma-separated list of fields to return as the + docvalue representation of a field for each hit + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg explain: Specify whether to return detailed information about score + computation as part of a hit + :arg fielddata_fields: A comma-separated list of fields to return as the + docvalue representation of a field for each hit + :arg from\_: Starting offset (default: 0) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg lenient: Specify whether format-based query failures (such as + providing text to a numeric field) should be ignored + :arg lowercase_expanded_terms: Specify whether query terms should be + lowercased + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg q: Query in the Lucene query string syntax + :arg request_cache: Specify if request cache should be used for this + request or not, defaults to index level setting + :arg routing: A comma-separated list of specific routing values + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + :arg search_type: Search operation type, valid choices are: + 'query_then_fetch', 'dfs_query_then_fetch' + :arg size: Number of hits to return (default: 10) + :arg sort: A comma-separated list of <field>:<direction> pairs + :arg stats: Specific 'tag' of the request for logging and statistical + purposes + :arg stored_fields: A comma-separated list of stored fields to return as + part of a hit + :arg suggest_field: Specify which field to use for suggestions + :arg suggest_mode: Specify suggest mode, default 'missing', valid + choices are: 'missing', 'popular', 'always' + :arg suggest_size: How many suggestions to return in response + :arg suggest_text: The source text for which the suggestions should be + returned + :arg terminate_after: The maximum number of documents to collect for + each shard, upon reaching which the query execution will terminate + early. + :arg timeout: Explicit operation timeout + :arg track_scores: Whether to calculate and return scores even if they + are not used for sorting + :arg version: Specify whether to return document version as part of a + hit + """ + # from is a reserved word so it cannot be used, use from_ instead + if 'from_' in params: + params['from'] = params.pop('from_') + + if doc_type and not index: + index = '_all' + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_search'), params=params, body=body) + + @query_params('_source', '_source_exclude', '_source_include', + 'allow_no_indices', 'analyze_wildcard', 'analyzer', 'conflicts', + 'default_operator', 'df', 'docvalue_fields', 'expand_wildcards', + 'explain', 'fielddata_fields', 'from_', 'ignore_unavailable', 'lenient', + 'lowercase_expanded_terms', 'pipeline', 'preference', 'q', 'refresh', + 'request_cache', 'requests_per_second', 'routing', 'scroll', + 'scroll_size', 'search_timeout', 'search_type', 'size', 'sort', 'stats', + 'stored_fields', 'suggest_field', 'suggest_mode', 'suggest_size', + 'suggest_text', 'terminate_after', 'timeout', 'track_scores', 'version', + 'version_type', 'wait_for_active_shards', 'wait_for_completion') + def update_by_query(self, index, doc_type=None, body=None, params=None): + """ + Perform an update on all documents matching a query. + `<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html>`_ + + :arg index: A comma-separated list of index names to search; use `_all` + or empty string to perform the operation on all indices + :arg doc_type: A comma-separated list of document types to search; leave + empty to perform the operation on all types + :arg body: The search definition using the Query DSL + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg analyze_wildcard: Specify whether wildcard and prefix queries + should be analyzed (default: false) + :arg analyzer: The analyzer to use for the query string + :arg conflicts: What to do when the reindex hits version conflicts?, + default 'abort', valid choices are: 'abort', 'proceed' + :arg default_operator: The default operator for query string query (AND + or OR), default 'OR', valid choices are: 'AND', 'OR' + :arg df: The field to use as default where no field prefix is given in + the query string + :arg docvalue_fields: A comma-separated list of fields to return as the + docvalue representation of a field for each hit + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg explain: Specify whether to return detailed information about score + computation as part of a hit + :arg fielddata_fields: A comma-separated list of fields to return as the + docvalue representation of a field for each hit + :arg from\_: Starting offset (default: 0) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg lenient: Specify whether format-based query failures (such as + providing text to a numeric field) should be ignored + :arg lowercase_expanded_terms: Specify whether query terms should be + lowercased + :arg pipeline: Ingest pipeline to set on index requests made by this + action. (default: none) + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg q: Query in the Lucene query string syntax + :arg refresh: Should the effected indexes be refreshed? + :arg request_cache: Specify if request cache should be used for this + request or not, defaults to index level setting + :arg requests_per_second: The throttle to set on this request in sub- + requests per second. -1 means set no throttle as does "unlimited" + which is the only non-float this accepts., default 0 + :arg routing: A comma-separated list of specific routing values + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + :arg scroll_size: Size on the scroll request powering the + update_by_query + :arg search_timeout: Explicit timeout for each search request. Defaults + to no timeout. + :arg search_type: Search operation type, valid choices are: + 'query_then_fetch', 'dfs_query_then_fetch' + :arg size: Number of hits to return (default: 10) + :arg sort: A comma-separated list of <field>:<direction> pairs + :arg stats: Specific 'tag' of the request for logging and statistical + purposes + :arg stored_fields: A comma-separated list of stored fields to return as + part of a hit + :arg suggest_field: Specify which field to use for suggestions + :arg suggest_mode: Specify suggest mode, default 'missing', valid + choices are: 'missing', 'popular', 'always' + :arg suggest_size: How many suggestions to return in response + :arg suggest_text: The source text for which the suggestions should be + returned + :arg terminate_after: The maximum number of documents to collect for + each shard, upon reaching which the query execution will terminate + early. + :arg timeout: Time each individual bulk request should wait for shards + that are unavailable., default '1m' + :arg track_scores: Whether to calculate and return scores even if they + are not used for sorting + :arg version: Specify whether to return document version as part of a + hit + :arg version_type: Should the document increment the version number + (internal) on hit or not (reindex) + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the update by query operation. + Defaults to 1, meaning the primary shard only. Set to `all` for all + shard copies, otherwise set to any non-negative value less than or + equal to the total number of copies for the shard (number of + replicas + 1) + :arg wait_for_completion: Should the request should block until the + reindex is complete., default False + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('POST', _make_path(index, + doc_type, '_update_by_query'), params=params, body=body) + + @query_params('refresh', 'requests_per_second', 'timeout', + 'wait_for_active_shards', 'wait_for_completion') + def reindex(self, body, params=None): + """ + Reindex all documents from one index to another. + `<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html>`_ + + :arg body: The search definition using the Query DSL and the prototype + for the index request. + :arg refresh: Should the effected indexes be refreshed? + :arg requests_per_second: The throttle to set on this request in sub- + requests per second. -1 means set no throttle as does "unlimited" + which is the only non-float this accepts., default 0 + :arg timeout: Time each individual bulk request should wait for shards + that are unavailable., default '1m' + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the reindex operation. Defaults to + 1, meaning the primary shard only. Set to `all` for all shard + copies, otherwise set to any non-negative value less than or equal + to the total number of copies for the shard (number of replicas + 1) + :arg wait_for_completion: Should the request should block until the + reindex is complete., default False + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('POST', '/_reindex', + params=params, body=body) + + @query_params('requests_per_second') + def reindex_rethrottle(self, task_id=None, params=None): + """ + Change the value of ``requests_per_second`` of a running ``reindex`` task. + `<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html>`_ + + :arg task_id: The task id to rethrottle + :arg requests_per_second: The throttle to set on this request in + floating sub-requests per second. -1 means set no throttle. + """ + return self.transport.perform_request('POST', _make_path('_reindex', + task_id, '_rethrottle'), params=params) + + @query_params('_source', '_source_exclude', '_source_include', + 'allow_no_indices', 'analyze_wildcard', 'analyzer', 'conflicts', + 'default_operator', 'df', 'docvalue_fields', 'expand_wildcards', + 'explain', 'from_', 'ignore_unavailable', 'lenient', + 'lowercase_expanded_terms', 'preference', 'q', 'refresh', + 'request_cache', 'requests_per_second', 'routing', 'scroll', + 'scroll_size', 'search_timeout', 'search_type', 'size', 'sort', 'stats', + 'stored_fields', 'suggest_field', 'suggest_mode', 'suggest_size', + 'suggest_text', 'terminate_after', 'timeout', 'track_scores', 'version', + 'wait_for_active_shards', 'wait_for_completion') + def delete_by_query(self, index, body, doc_type=None, params=None): + """ + Delete all documents matching a query. + `<https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html>`_ + + :arg index: A comma-separated list of index names to search; use `_all` + or empty string to perform the operation on all indices + :arg body: The search definition using the Query DSL + :arg doc_type: A comma-separated list of document types to search; leave + empty to perform the operation on all types + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg analyze_wildcard: Specify whether wildcard and prefix queries + should be analyzed (default: false) + :arg analyzer: The analyzer to use for the query string + :arg conflicts: What to do when the delete-by-query hits version + conflicts?, default 'abort', valid choices are: 'abort', 'proceed' + :arg default_operator: The default operator for query string query (AND + or OR), default 'OR', valid choices are: 'AND', 'OR' + :arg df: The field to use as default where no field prefix is given in + the query string + :arg docvalue_fields: A comma-separated list of fields to return as the + docvalue representation of a field for each hit + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg explain: Specify whether to return detailed information about score + computation as part of a hit + :arg from\_: Starting offset (default: 0) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg lenient: Specify whether format-based query failures (such as + providing text to a numeric field) should be ignored + :arg lowercase_expanded_terms: Specify whether query terms should be + lowercased + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg q: Query in the Lucene query string syntax + :arg refresh: Should the effected indexes be refreshed? + :arg request_cache: Specify if request cache should be used for this + request or not, defaults to index level setting + :arg requests_per_second: The throttle for this request in sub-requests + per second. -1 means set no throttle., default 0 + :arg routing: A comma-separated list of specific routing values + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + :arg scroll_size: Size on the scroll request powering the + update_by_query + :arg search_timeout: Explicit timeout for each search request. Defaults + to no timeout. + :arg search_type: Search operation type, valid choices are: + 'query_then_fetch', 'dfs_query_then_fetch' + :arg size: Number of hits to return (default: 10) + :arg sort: A comma-separated list of <field>:<direction> pairs + :arg stats: Specific 'tag' of the request for logging and statistical + purposes + :arg stored_fields: A comma-separated list of stored fields to return as + part of a hit + :arg suggest_field: Specify which field to use for suggestions + :arg suggest_mode: Specify suggest mode, default 'missing', valid + choices are: 'missing', 'popular', 'always' + :arg suggest_size: How many suggestions to return in response + :arg suggest_text: The source text for which the suggestions should be + returned + :arg terminate_after: The maximum number of documents to collect for + each shard, upon reaching which the query execution will terminate + early. + :arg timeout: Time each individual bulk request should wait for shards + that are unavailable., default '1m' + :arg track_scores: Whether to calculate and return scores even if they + are not used for sorting + :arg version: Specify whether to return document version as part of a + hit + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the delete by query operation. + Defaults to 1, meaning the primary shard only. Set to `all` for all + shard copies, otherwise set to any non-negative value less than or + equal to the total number of copies for the shard (number of + replicas + 1) + :arg wait_for_completion: Should the request should block until the + delete-by-query is complete., default False + """ + for param in (index, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('POST', _make_path(index, + doc_type, '_delete_by_query'), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'local', 'preference', 'routing') + def search_shards(self, index=None, doc_type=None, params=None): + """ + The search shards api returns the indices and shards that a search + request would be executed against. This can give useful feedback for working + out issues or planning optimizations with routing and shard preferences. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-shards.html>`_ + + :arg index: A comma-separated list of index names to search; use `_all` + or empty string to perform the operation on all indices + :arg doc_type: A comma-separated list of document types to search; leave + empty to perform the operation on all types + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg routing: Specific routing value + """ + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_search_shards'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'preference', 'routing', 'scroll', 'search_type') + def search_template(self, index=None, doc_type=None, body=None, params=None): + """ + A query that accepts a query template and a map of key/value pairs to + fill in template parameters. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html>`_ + + :arg index: A comma-separated list of index names to search; use `_all` + or empty string to perform the operation on all indices + :arg doc_type: A comma-separated list of document types to search; leave + empty to perform the operation on all types + :arg body: The search definition template and its params + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg routing: A comma-separated list of specific routing values + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + :arg search_type: Search operation type, valid choices are: + 'query_then_fetch', 'query_and_fetch', 'dfs_query_then_fetch', + 'dfs_query_and_fetch' + """ + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_search', 'template'), params=params, body=body) + + @query_params('_source', '_source_exclude', '_source_include', + 'analyze_wildcard', 'analyzer', 'default_operator', 'df', 'lenient', + 'lowercase_expanded_terms', 'parent', 'preference', 'q', 'routing', + 'stored_fields') + def explain(self, index, doc_type, id, body=None, params=None): + """ + The explain api computes a score explanation for a query and a specific + document. This can give useful feedback whether a document matches or + didn't match a specific query. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-explain.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document + :arg id: The document ID + :arg body: The query definition using the Query DSL + :arg _source: True or false to return the _source field or not, or a + list of fields to return + :arg _source_exclude: A list of fields to exclude from the returned + _source field + :arg _source_include: A list of fields to extract and return from the + _source field + :arg analyze_wildcard: Specify whether wildcards and prefix queries in + the query string query should be analyzed (default: false) + :arg analyzer: The analyzer for the query string query + :arg default_operator: The default operator for query string query (AND + or OR), default 'OR', valid choices are: 'AND', 'OR' + :arg df: The default field for query string query (default: _all) + :arg lenient: Specify whether format-based query failures (such as + providing text to a numeric field) should be ignored + :arg lowercase_expanded_terms: Specify whether query terms should be + lowercased + :arg parent: The ID of the parent document + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg q: Query in the Lucene query string syntax + :arg routing: Specific routing value + :arg stored_fields: A comma-separated list of stored fields to return in + the response + """ + for param in (index, doc_type, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, id, '_explain'), params=params, body=body) + + @query_params('scroll') + def scroll(self, scroll_id=None, body=None, params=None): + """ + Scroll a search request created by specifying the scroll parameter. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html>`_ + + :arg scroll_id: The scroll ID + :arg body: The scroll ID if not passed by URL or query parameter. + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + """ + if scroll_id in SKIP_IN_PATH and body in SKIP_IN_PATH: + raise ValueError("You need to supply scroll_id or body.") + elif scroll_id and not body: + body = scroll_id + elif scroll_id: + params['scroll_id'] = scroll_id + + return self.transport.perform_request('GET', '/_search/scroll', + params=params, body=body) + + @query_params() + def clear_scroll(self, scroll_id=None, body=None, params=None): + """ + Clear the scroll request created by specifying the scroll parameter to + search. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html>`_ + + :arg scroll_id: A comma-separated list of scroll IDs to clear + :arg body: A comma-separated list of scroll IDs to clear if none was + specified via the scroll_id parameter + """ + if scroll_id in SKIP_IN_PATH and body in SKIP_IN_PATH: + raise ValueError("You need to supply scroll_id or body.") + elif scroll_id and not body: + body = scroll_id + elif scroll_id: + params['scroll_id'] = scroll_id + + return self.transport.perform_request('DELETE', '/_search/scroll', + params=params, body=body) + + @query_params('parent', 'refresh', 'routing', 'timeout', 'version', + 'version_type', 'wait_for_active_shards') + def delete(self, index, doc_type, id, params=None): + """ + Delete a typed JSON document from a specific index based on its id. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete.html>`_ + + :arg index: The name of the index + :arg doc_type: The type of the document + :arg id: The document ID + :arg parent: ID of parent document + :arg refresh: If `true` then refresh the effected shards to make this + operation visible to search, if `wait_for` then wait for a refresh + to make this operation visible to search, if `false` (the default) + then do nothing with refreshes., valid choices are: 'true', 'false', + 'wait_for' + :arg routing: Specific routing value + :arg timeout: Explicit operation timeout + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the delete operation. Defaults to + 1, meaning the primary shard only. Set to `all` for all shard + copies, otherwise set to any non-negative value less than or equal + to the total number of copies for the shard (number of replicas + 1) + """ + for param in (index, doc_type, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('DELETE', _make_path(index, + doc_type, id), params=params) + + @query_params('allow_no_indices', 'analyze_wildcard', 'analyzer', + 'default_operator', 'df', 'expand_wildcards', 'ignore_unavailable', + 'lenient', 'lowercase_expanded_terms', 'min_score', 'preference', 'q', + 'routing') + def count(self, index=None, doc_type=None, body=None, params=None): + """ + Execute a query and get the number of matches for that query. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-count.html>`_ + + :arg index: A comma-separated list of indices to restrict the results + :arg doc_type: A comma-separated list of types to restrict the results + :arg body: A query to restrict the results specified with the Query DSL + (optional) + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg analyze_wildcard: Specify whether wildcard and prefix queries + should be analyzed (default: false) + :arg analyzer: The analyzer to use for the query string + :arg default_operator: The default operator for query string query (AND + or OR), default 'OR', valid choices are: 'AND', 'OR' + :arg df: The field to use as default where no field prefix is given in + the query string + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg lenient: Specify whether format-based query failures (such as + providing text to a numeric field) should be ignored + :arg lowercase_expanded_terms: Specify whether query terms should be + lowercased + :arg min_score: Include only documents with a specific `_score` value in + the result + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg q: Query in the Lucene query string syntax + :arg routing: Specific routing value + """ + if doc_type and not index: + index = '_all' + + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_count'), params=params, body=body) + + @query_params('_source', '_source_exclude', '_source_include', 'fields', + 'pipeline', 'refresh', 'routing', 'timeout', 'wait_for_active_shards') + def bulk(self, body, index=None, doc_type=None, params=None): + """ + Perform many index/delete operations in a single API call. + + See the :func:`~elasticsearch.helpers.bulk` helper function for a more + friendly API. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html>`_ + + :arg body: The operation definition and data (action-data pairs), + separated by newlines + :arg index: Default index for items which don't provide one + :arg doc_type: Default document type for items which don't provide one + :arg _source: True or false to return the _source field or not, or + default list of fields to return, can be overridden on each sub- + request + :arg _source_exclude: Default list of fields to exclude from the + returned _source field, can be overridden on each sub-request + :arg _source_include: Default list of fields to extract and return from + the _source field, can be overridden on each sub-request + :arg fields: Default comma-separated list of fields to return in the + response for updates, can be overridden on each sub-request + :arg pipeline: The pipeline id to preprocess incoming documents with + :arg refresh: If `true` then refresh the effected shards to make this + operation visible to search, if `wait_for` then wait for a refresh + to make this operation visible to search, if `false` (the default) + then do nothing with refreshes., valid choices are: 'true', 'false', + 'wait_for' + :arg routing: Specific routing value + :arg timeout: Explicit operation timeout + :arg wait_for_active_shards: Sets the number of shard copies that must + be active before proceeding with the bulk operation. Defaults to 1, + meaning the primary shard only. Set to `all` for all shard copies, + otherwise set to any non-negative value less than or equal to the + total number of copies for the shard (number of replicas + 1) + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('POST', _make_path(index, + doc_type, '_bulk'), params=params, body=self._bulk_body(body)) + + @query_params('max_concurrent_searches', 'search_type') + def msearch(self, body, index=None, doc_type=None, params=None): + """ + Execute several search requests within the same API. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html>`_ + + :arg body: The request definitions (metadata-search request definition + pairs), separated by newlines + :arg index: A comma-separated list of index names to use as default + :arg doc_type: A comma-separated list of document types to use as + default + :arg max_concurrent_searches: Controls the maximum number of concurrent + searches the multi search api will execute + :arg search_type: Search operation type, valid choices are: + 'query_then_fetch', 'query_and_fetch', 'dfs_query_then_fetch', + 'dfs_query_and_fetch' + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_msearch'), params=params, body=self._bulk_body(body)) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'preference', 'routing') + def suggest(self, body, index=None, params=None): + """ + The suggest feature suggests similar looking terms based on a provided + text by using a suggester. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters.html>`_ + + :arg body: The request definition + :arg index: A comma-separated list of index names to restrict the + operation; use `_all` or empty string to perform the operation on + all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg routing: Specific routing value + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('POST', _make_path(index, + '_suggest'), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'percolate_format', 'percolate_index', 'percolate_preference', + 'percolate_routing', 'percolate_type', 'preference', 'routing', + 'version', 'version_type') + def percolate(self, index, doc_type, id=None, body=None, params=None): + """ + The percolator allows to register queries against an index, and then + send percolate requests which include a doc, and getting back the + queries that match on that doc out of the set of registered queries. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-percolate.html>`_ + + :arg index: The index of the document being percolated. + :arg doc_type: The type of the document being percolated. + :arg id: Substitute the document in the request body with a document + that is known by the specified id. On top of the id, the index and + type parameter will be used to retrieve the document from within the + cluster. + :arg body: The percolator request definition using the percolate DSL + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg percolate_format: Return an array of matching query IDs instead of + objects, valid choices are: 'ids' + :arg percolate_index: The index to percolate the document into. Defaults + to index. + :arg percolate_preference: Which shard to prefer when executing the + percolate request. + :arg percolate_routing: The routing value to use when percolating the + existing document. + :arg percolate_type: The type to percolate document into. Defaults to + type. + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg routing: A comma-separated list of specific routing values + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + """ + for param in (index, doc_type): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, id, '_percolate'), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable') + def mpercolate(self, body, index=None, doc_type=None, params=None): + """ + The percolator allows to register queries against an index, and then + send percolate requests which include a doc, and getting back the + queries that match on that doc out of the set of registered queries. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-percolate.html>`_ + + :arg body: The percolate request definitions (header & body pair), + separated by newlines + :arg index: The index of the document being count percolated to use as + default + :arg doc_type: The type of the document being percolated to use as + default. + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_mpercolate'), params=params, body=self._bulk_body(body)) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'percolate_index', 'percolate_type', 'preference', 'routing', 'version', + 'version_type') + def count_percolate(self, index, doc_type, id=None, body=None, params=None): + """ + The percolator allows to register queries against an index, and then + send percolate requests which include a doc, and getting back the + queries that match on that doc out of the set of registered queries. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-percolate.html>`_ + + :arg index: The index of the document being count percolated. + :arg doc_type: The type of the document being count percolated. + :arg id: Substitute the document in the request body with a document + that is known by the specified id. On top of the id, the index and + type parameter will be used to retrieve the document from within the + cluster. + :arg body: The count percolator request definition using the percolate + DSL + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg percolate_index: The index to count percolate the document into. + Defaults to index. + :arg percolate_type: The type to count percolate document into. Defaults + to type. + :arg preference: Specify the node or shard the operation should be + performed on (default: random) + :arg routing: A comma-separated list of specific routing values + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + """ + for param in (index, doc_type): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, id, '_percolate', 'count'), params=params, body=body) + + @query_params('field_statistics', 'fields', 'offsets', 'parent', 'payloads', + 'positions', 'preference', 'realtime', 'routing', 'term_statistics', + 'version', 'version_type') + def termvectors(self, index, doc_type, id=None, body=None, params=None): + """ + Returns information and statistics on terms in the fields of a + particular document. The document could be stored in the index or + artificially provided by the user (Added in 1.4). Note that for + documents stored in the index, this is a near realtime API as the term + vectors are not available until the next refresh. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html>`_ + + :arg index: The index in which the document resides. + :arg doc_type: The type of the document. + :arg id: The id of the document, when not specified a doc param should + be supplied. + :arg body: Define parameters and or supply a document to get termvectors + for. See documentation. + :arg field_statistics: Specifies if document count, sum of document + frequencies and sum of total term frequencies should be returned., + default True + :arg fields: A comma-separated list of fields to return. + :arg offsets: Specifies if term offsets should be returned., default + True + :arg parent: Parent id of documents. + :arg payloads: Specifies if term payloads should be returned., default + True + :arg positions: Specifies if term positions should be returned., default + True + :arg preference: Specify the node or shard the operation should be + performed on (default: random). + :arg realtime: Specifies if request is real-time as opposed to near- + real-time (default: true). + :arg routing: Specific routing value. + :arg term_statistics: Specifies if total term frequency and document + frequency should be returned., default False + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + """ + for param in (index, doc_type): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path(index, + doc_type, id, '_termvectors'), params=params, body=body) + + @query_params('field_statistics', 'fields', 'ids', 'offsets', 'parent', + 'payloads', 'positions', 'preference', 'realtime', 'routing', + 'term_statistics', 'version', 'version_type') + def mtermvectors(self, index=None, doc_type=None, body=None, params=None): + """ + Multi termvectors API allows to get multiple termvectors based on an + index, type and id. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-termvectors.html>`_ + + :arg index: The index in which the document resides. + :arg doc_type: The type of the document. + :arg body: Define ids, documents, parameters or a list of parameters per + document here. You must at least provide a list of document ids. See + documentation. + :arg field_statistics: Specifies if document count, sum of document + frequencies and sum of total term frequencies should be returned. + Applies to all returned documents unless otherwise specified in body + "params" or "docs"., default True + :arg fields: A comma-separated list of fields to return. Applies to all + returned documents unless otherwise specified in body "params" or + "docs". + :arg ids: A comma-separated list of documents ids. You must define ids + as parameter or set "ids" or "docs" in the request body + :arg offsets: Specifies if term offsets should be returned. Applies to + all returned documents unless otherwise specified in body "params" + or "docs"., default True + :arg parent: Parent id of documents. Applies to all returned documents + unless otherwise specified in body "params" or "docs". + :arg payloads: Specifies if term payloads should be returned. Applies to + all returned documents unless otherwise specified in body "params" + or "docs"., default True + :arg positions: Specifies if term positions should be returned. Applies + to all returned documents unless otherwise specified in body + "params" or "docs"., default True + :arg preference: Specify the node or shard the operation should be + performed on (default: random) .Applies to all returned documents + unless otherwise specified in body "params" or "docs". + :arg realtime: Specifies if requests are real-time as opposed to near- + real-time (default: true). + :arg routing: Specific routing value. Applies to all returned documents + unless otherwise specified in body "params" or "docs". + :arg term_statistics: Specifies if total term frequency and document + frequency should be returned. Applies to all returned documents + unless otherwise specified in body "params" or "docs"., default + False + :arg version: Explicit version number for concurrency control + :arg version_type: Specific version type, valid choices are: 'internal', + 'external', 'external_gte', 'force' + """ + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_mtermvectors'), params=params, body=body) + + @query_params() + def put_script(self, lang, id, body, params=None): + """ + Create a script in given language with specified ID. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting.html>`_ + + :arg lang: Script language + :arg id: Script ID + :arg body: The document + """ + for param in (lang, id, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path('_scripts', + lang, id), params=params, body=body) + + @query_params() + def get_script(self, lang, id, params=None): + """ + Retrieve a script from the API. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting.html>`_ + + :arg lang: Script language + :arg id: Script ID + """ + for param in (lang, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path('_scripts', + lang, id), params=params) + + @query_params() + def delete_script(self, lang, id, params=None): + """ + Remove a stored script from elasticsearch. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting.html>`_ + + :arg lang: Script language + :arg id: Script ID + """ + for param in (lang, id): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('DELETE', + _make_path('_scripts', lang, id), params=params) + + @query_params() + def put_template(self, id, body, params=None): + """ + Create a search template. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html>`_ + + :arg id: Template ID + :arg body: The document + """ + for param in (id, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path('_search', + 'template', id), params=params, body=body) + + @query_params() + def get_template(self, id, params=None): + """ + Retrieve a search template. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html>`_ + + :arg id: Template ID + """ + if id in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'id'.") + return self.transport.perform_request('GET', _make_path('_search', + 'template', id), params=params) + + @query_params() + def delete_template(self, id, params=None): + """ + Delete a search template. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html>`_ + + :arg id: Template ID + """ + if id in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'id'.") + return self.transport.perform_request('DELETE', _make_path('_search', + 'template', id), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'fields', + 'ignore_unavailable', 'level') + def field_stats(self, index=None, body=None, params=None): + """ + The field stats api allows one to find statistical properties of a + field without executing a search, but looking up measurements that are + natively available in the Lucene index. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-field-stats.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg body: Field json objects containing the name and optionally a range + to filter out indices result, that have results outside the defined + bounds + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg fields: A comma-separated list of fields for to get field + statistics for (min value, max value, and more) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg level: Defines if field stats should be returned on a per index + level or on a cluster wide level, default 'cluster', valid choices + are: 'indices', 'cluster' + """ + return self.transport.perform_request('GET', _make_path(index, + '_field_stats'), params=params, body=body) + + @query_params() + def render_search_template(self, id=None, body=None, params=None): + """ + `<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-template.html>`_ + + :arg id: The id of the stored search template + :arg body: The search definition template and its params + """ + return self.transport.perform_request('GET', _make_path('_render', + 'template', id), params=params, body=body) + + @query_params('search_type') + def msearch_template(self, body, index=None, doc_type=None, params=None): + """ + The /_search/template endpoint allows to use the mustache language to + pre render search requests, before they are executed and fill existing + templates with template parameters. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html>`_ + + :arg body: The request definitions (metadata-search request definition + pairs), separated by newlines + :arg index: A comma-separated list of index names to use as default + :arg doc_type: A comma-separated list of document types to use as + default + :arg search_type: Search operation type, valid choices are: + 'query_then_fetch', 'query_and_fetch', 'dfs_query_then_fetch', + 'dfs_query_and_fetch' + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('GET', _make_path(index, doc_type, + '_msearch', 'template'), params=params, body=self._bulk_body(body)) + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/cat.py b/scripts/external_libs/elasticsearch/elasticsearch/client/cat.py new file mode 100644 index 00000000..02704415 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/cat.py @@ -0,0 +1,359 @@ +from .utils import NamespacedClient, query_params, _make_path, SKIP_IN_PATH + +class CatClient(NamespacedClient): + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def aliases(self, name=None, params=None): + """ + + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-alias.html>`_ + + :arg name: A comma-separated list of alias names to return + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'aliases', name), params=params) + + @query_params('bytes', 'h', 'help', 'local', 'master_timeout', 'v') + def allocation(self, node_id=None, params=None): + """ + Allocation provides a snapshot of how shards have located around the + cluster and the state of disk usage. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-allocation.html>`_ + + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information + :arg bytes: The unit in which to display byte values, valid choices are: + 'b', 'k', 'kb', 'm', 'mb', 'g', 'gb', 't', 'tb', 'p', 'pb' + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'allocation', node_id), params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def count(self, index=None, params=None): + """ + Count provides quick access to the document count of the entire cluster, + or individual indices. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-count.html>`_ + + :arg index: A comma-separated list of index names to limit the returned + information + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'count', index), params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'ts', 'v') + def health(self, params=None): + """ + health is a terse, one-line representation of the same information from + :meth:`~elasticsearch.client.cluster.ClusterClient.health` API + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-health.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg ts: Set to false to disable timestamping, default True + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/health', + params=params) + + @query_params('help') + def help(self, params=None): + """ + A simple help for the cat api. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat.html>`_ + + :arg help: Return help information, default False + """ + return self.transport.perform_request('GET', '/_cat', params=params) + + @query_params('bytes', 'format', 'h', 'health', 'help', 'local', + 'master_timeout', 'pri', 'v') + def indices(self, index=None, params=None): + """ + The indices command provides a cross-section of each index. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-indices.html>`_ + + :arg index: A comma-separated list of index names to limit the returned + information + :arg bytes: The unit in which to display byte values, valid choices are: + 'b', 'k', 'm', 'g' + :arg h: Comma-separated list of column names to display + :arg health: A health status ("green", "yellow", or "red" to filter only + indices matching the specified health status, default None, valid + choices are: 'green', 'yellow', 'red' + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg pri: Set to true to return stats only for primary shards, default + False + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'indices', index), params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def master(self, params=None): + """ + Displays the master's node ID, bound IP address, and node name. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-master.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/master', + params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def nodes(self, params=None): + """ + The nodes command shows the cluster topology. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-nodes.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/nodes', + params=params) + + @query_params('bytes', 'h', 'help', 'master_timeout', 'v') + def recovery(self, index=None, params=None): + """ + recovery is a view of shard replication. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-recovery.html>`_ + + :arg index: A comma-separated list of index names to limit the returned + information + :arg bytes: The unit in which to display byte values, valid choices are: + 'b', 'k', 'kb', 'm', 'mb', 'g', 'gb', 't', 'tb', 'p', 'pb' + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'recovery', index), params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def shards(self, index=None, params=None): + """ + The shards command is the detailed view of what nodes contain which shards. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-shards.html>`_ + + :arg index: A comma-separated list of index names to limit the returned + information + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'shards', index), params=params) + + @query_params('h', 'help', 'v') + def segments(self, index=None, params=None): + """ + The segments command is the detailed view of Lucene segments per index. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-segments.html>`_ + + :arg index: A comma-separated list of index names to limit the returned + information + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'segments', index), params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def pending_tasks(self, params=None): + """ + pending_tasks provides the same information as the + :meth:`~elasticsearch.client.cluster.ClusterClient.pending_tasks` API + in a convenient tabular format. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-pending-tasks.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/pending_tasks', + params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'size', + 'thread_pool_patterns', 'v') + def thread_pool(self, thread_pools=None, params=None): + """ + Get information about thread pools. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-thread-pool.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg size: The multiplier in which to display values, valid choices are: + '', 'k', 'm', 'g', 't', 'p' + :arg thread_pool_patterns: A comma-separated list of regular-expressions + to filter the thread pools in the output + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'thread_pool', thread_pools), params=params) + + @query_params('bytes', 'h', 'help', 'local', 'master_timeout', 'v') + def fielddata(self, fields=None, params=None): + """ + Shows information about currently loaded fielddata on a per-node basis. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-fielddata.html>`_ + + :arg fields: A comma-separated list of fields to return the fielddata + size + :arg bytes: The unit in which to display byte values, valid choices are: + 'b', 'k', 'kb', 'm', 'mb', 'g', 'gb', 't', 'tb', 'p', 'pb' + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'fielddata', fields), params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def plugins(self, params=None): + """ + + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-plugins.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/plugins', + params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def nodeattrs(self, params=None): + """ + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-nodeattrs.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/nodeattrs', + params=params) + + @query_params('h', 'help', 'local', 'master_timeout', 'v') + def repositories(self, params=None): + """ + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-repositories.html>`_ + + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg local: Return local information, do not retrieve the state from + master node, default False + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/repositories', + params=params) + + @query_params('h', 'help', 'ignore_unavailable', 'master_timeout', 'v') + def snapshots(self, repository=None, params=None): + """ + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-snapshots.html>`_ + + :arg repository: Name of repository from which to fetch the snapshot + information + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg ignore_unavailable: Set to true to ignore unavailable snapshots, + default False + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', _make_path('_cat', + 'snapshots', repository), params=params) + + @query_params('actions', 'detailed', 'format', 'h', 'help', 'node_id', + 'parent_node', 'parent_task', 'v') + def tasks(self, params=None): + """ + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html>`_ + + :arg actions: A comma-separated list of actions that should be returned. + Leave empty to return all. + :arg detailed: Return detailed task information (default: false) + :arg format: a short version of the Accept header, e.g. json, yaml + :arg h: Comma-separated list of column names to display + :arg help: Return help information, default False + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg parent_node: Return tasks with specified parent node. + :arg parent_task: Return tasks with specified parent task id. Set to -1 + to return all. + :arg v: Verbose mode. Display column headers, default False + """ + return self.transport.perform_request('GET', '/_cat/tasks', + params=params) + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/cluster.py b/scripts/external_libs/elasticsearch/elasticsearch/client/cluster.py new file mode 100644 index 00000000..a1f10bdc --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/cluster.py @@ -0,0 +1,172 @@ +from .utils import NamespacedClient, query_params, _make_path + +class ClusterClient(NamespacedClient): + @query_params('level', 'local', 'master_timeout', 'timeout', + 'wait_for_active_shards', 'wait_for_events', + 'wait_for_no_relocating_shards', 'wait_for_nodes', 'wait_for_status') + def health(self, index=None, params=None): + """ + Get a very simple status on the health of the cluster. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html>`_ + + :arg index: Limit the information returned to a specific index + :arg level: Specify the level of detail for returned information, + default 'cluster', valid choices are: 'cluster', 'indices', 'shards' + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + :arg wait_for_active_shards: Wait until the specified number of shards + is active + :arg wait_for_events: Wait until all currently queued events with the + given priorty are processed, valid choices are: 'immediate', + 'urgent', 'high', 'normal', 'low', 'languid' + :arg wait_for_no_relocating_shards: Whether to wait until there are no + relocating shards in the cluster + :arg wait_for_nodes: Wait until the specified number of nodes is + available + :arg wait_for_status: Wait until cluster is in a specific state, default + None, valid choices are: 'green', 'yellow', 'red' + """ + return self.transport.perform_request('GET', _make_path('_cluster', + 'health', index), params=params) + + @query_params('local', 'master_timeout') + def pending_tasks(self, params=None): + """ + The pending cluster tasks API returns a list of any cluster-level + changes (e.g. create index, update mapping, allocate or fail shard) + which have not yet been executed. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-pending.html>`_ + + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Specify timeout for connection to master + """ + return self.transport.perform_request('GET', + '/_cluster/pending_tasks', params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'flat_settings', + 'ignore_unavailable', 'local', 'master_timeout') + def state(self, metric=None, index=None, params=None): + """ + Get a comprehensive state information of the whole cluster. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-state.html>`_ + + :arg metric: Limit the information returned to the specified metrics + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg flat_settings: Return settings in flat format (default: false) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Specify timeout for connection to master + """ + if index and not metric: + metric = '_all' + return self.transport.perform_request('GET', _make_path('_cluster', + 'state', metric, index), params=params) + + @query_params('flat_settings', 'human', 'timeout') + def stats(self, node_id=None, params=None): + """ + The Cluster Stats API allows to retrieve statistics from a cluster wide + perspective. The API returns basic index metrics and information about + the current nodes that form the cluster. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-stats.html>`_ + + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg flat_settings: Return settings in flat format (default: false) + :arg human: Whether to return time and byte values in human-readable + format., default False + :arg timeout: Explicit operation timeout + """ + url = '/_cluster/stats' + if node_id: + url = _make_path('_cluster/stats/nodes', node_id) + return self.transport.perform_request('GET', url, params=params) + + @query_params('dry_run', 'explain', 'master_timeout', 'metric', + 'retry_failed', 'timeout') + def reroute(self, body=None, params=None): + """ + Explicitly execute a cluster reroute allocation command including specific commands. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-reroute.html>`_ + + :arg body: The definition of `commands` to perform (`move`, `cancel`, + `allocate`) + :arg dry_run: Simulate the operation only and return the resulting state + :arg explain: Return an explanation of why the commands can or cannot be + executed + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg metric: Limit the information returned to the specified metrics. + Defaults to all but metadata, valid choices are: '_all', 'blocks', + 'metadata', 'nodes', 'routing_table', 'master_node', 'version' + :arg retry_failed: Retries allocation of shards that are blocked due to + too many subsequent allocation failures + :arg timeout: Explicit operation timeout + """ + return self.transport.perform_request('POST', '/_cluster/reroute', + params=params, body=body) + + @query_params('flat_settings', 'include_defaults', 'master_timeout', + 'timeout') + def get_settings(self, params=None): + """ + Get cluster settings. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-update-settings.html>`_ + + :arg flat_settings: Return settings in flat format (default: false) + :arg include_defaults: Whether to return all default clusters setting., + default False + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + """ + return self.transport.perform_request('GET', '/_cluster/settings', + params=params) + + @query_params('flat_settings', 'master_timeout', 'timeout') + def put_settings(self, body=None, params=None): + """ + Update cluster wide specific settings. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-update-settings.html>`_ + + :arg body: The settings to be updated. Can be either `transient` or + `persistent` (survives cluster restart). + :arg flat_settings: Return settings in flat format (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + """ + return self.transport.perform_request('PUT', '/_cluster/settings', + params=params, body=body) + + @query_params('include_disk_info', 'include_yes_decisions') + def allocation_explain(self, body=None, params=None): + """ + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-allocation-explain.html>`_ + + :arg body: The index, shard, and primary flag to explain. Empty means + 'explain the first unassigned shard' + :arg include_disk_info: Return information about disk usage and shard + sizes (default: false) + :arg include_yes_decisions: Return 'YES' decisions in explanation + (default: false) + """ + return self.transport.perform_request('GET', + '/_cluster/allocation/explain', params=params, body=body) + + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/indices.py b/scripts/external_libs/elasticsearch/elasticsearch/client/indices.py new file mode 100644 index 00000000..c820ec5d --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/indices.py @@ -0,0 +1,896 @@ +from .utils import NamespacedClient, query_params, _make_path, SKIP_IN_PATH + +class IndicesClient(NamespacedClient): + @query_params('analyzer', 'attributes', 'char_filter', 'explain', 'field', + 'filter', 'format', 'prefer_local', 'text', 'tokenizer') + def analyze(self, index=None, body=None, params=None): + """ + Perform the analysis process on a text and return the tokens breakdown of the text. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-analyze.html>`_ + + :arg index: The name of the index to scope the operation + :arg body: The text on which the analysis should be performed + :arg analyzer: The name of the analyzer to use + :arg attributes: A comma-separated list of token attributes to output, + this parameter works only with `explain=true` + :arg char_filter: A comma-separated list of character filters to use for + the analysis + :arg explain: With `true`, outputs more advanced details. (default: + false) + :arg field: Use the analyzer configured for this field (instead of + passing the analyzer name) + :arg filter: A comma-separated list of filters to use for the analysis + :arg format: Format of the output, default 'detailed', valid choices + are: 'detailed', 'text' + :arg prefer_local: With `true`, specify that a local shard should be + used if available, with `false`, use a random shard (default: true) + :arg text: The text on which the analysis should be performed (when + request body is not used) + :arg tokenizer: The name of the tokenizer to use for the analysis + """ + return self.transport.perform_request('GET', _make_path(index, + '_analyze'), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'force', + 'ignore_unavailable', 'operation_threading') + def refresh(self, index=None, params=None): + """ + Explicitly refresh one or more index, making all operations performed + since the last refresh available for search. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg force: Force a refresh even if not required, default False + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg operation_threading: TODO: ? + """ + return self.transport.perform_request('POST', _make_path(index, + '_refresh'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'force', + 'ignore_unavailable', 'wait_if_ongoing') + def flush(self, index=None, params=None): + """ + Explicitly flush one or more indices. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-flush.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string for all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg force: Whether a flush should be forced even if it is not + necessarily needed ie. if no changes will be committed to the index. + This is useful if transaction log IDs should be incremented even if + no uncommitted changes are present. (This setting can be considered + as internal) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg wait_if_ongoing: If set to true the flush operation will block + until the flush can be executed if another flush operation is + already executing. The default is true. If set to false the flush + will be skipped iff if another flush operation is already running. + """ + return self.transport.perform_request('POST', _make_path(index, + '_flush'), params=params) + + @query_params('master_timeout', 'timeout', 'update_all_types', + 'wait_for_active_shards') + def create(self, index, body=None, params=None): + """ + Create an index in Elasticsearch. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html>`_ + + :arg index: The name of the index + :arg body: The configuration for the index (`settings` and `mappings`) + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + :arg update_all_types: Whether to update the mapping for all fields with + the same name across all types or not + :arg wait_for_active_shards: Set the number of active shards to wait for + before the operation returns. + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('PUT', _make_path(index), + params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'flat_settings', + 'human', 'ignore_unavailable', 'include_defaults', 'local') + def get(self, index, feature=None, params=None): + """ + The get index API allows to retrieve information about one or more indexes. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-index.html>`_ + + :arg index: A comma-separated list of index names + :arg feature: A comma-separated list of features + :arg allow_no_indices: Ignore if a wildcard expression resolves to no + concrete indices (default: false) + :arg expand_wildcards: Whether wildcard expressions should get expanded + to open or closed indices (default: open), default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg flat_settings: Return settings in flat format (default: false) + :arg human: Whether to return version and creation date values in human- + readable format., default False + :arg ignore_unavailable: Ignore unavailable indexes (default: false) + :arg include_defaults: Whether to return all default setting for each of + the indices., default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('GET', _make_path(index, + feature), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'master_timeout', 'timeout') + def open(self, index, params=None): + """ + Open a closed index to make it available for search. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-open-close.html>`_ + + :arg index: The name of the index + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'closed', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('POST', _make_path(index, + '_open'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'master_timeout', 'timeout') + def close(self, index, params=None): + """ + Close an index to remove it's overhead from the cluster. Closed index + is blocked for read/write operations. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-open-close.html>`_ + + :arg index: The name of the index + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('POST', _make_path(index, + '_close'), params=params) + + @query_params('master_timeout', 'timeout') + def delete(self, index, params=None): + """ + Delete an index in Elasticsearch + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-index.html>`_ + + :arg index: A comma-separated list of indices to delete; use `_all` or + `*` string to delete all indices + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('DELETE', _make_path(index), + params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'local') + def exists(self, index, params=None): + """ + Return a boolean indicating whether given index exists. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-exists.html>`_ + + :arg index: A comma-separated list of indices to check + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'index'.") + return self.transport.perform_request('HEAD', _make_path(index), + params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'local') + def exists_type(self, index, doc_type, params=None): + """ + Check if a type/types exists in an index/indices. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-types-exists.html>`_ + + :arg index: A comma-separated list of index names; use `_all` to check + the types across all indices + :arg doc_type: A comma-separated list of document types to check + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + for param in (index, doc_type): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('HEAD', _make_path(index, doc_type), + params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'master_timeout', 'timeout', 'update_all_types') + def put_mapping(self, doc_type, body, index=None, params=None): + """ + Register specific mapping definition for a specific type. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html>`_ + + :arg doc_type: The name of the document type + :arg body: The mapping definition + :arg index: A comma-separated list of index names the mapping should be + added to (supports wildcards); use `_all` or omit to add the mapping + on all indices. + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + :arg update_all_types: Whether to update the mapping for all fields with + the same name across all types or not + """ + for param in (doc_type, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path(index, + '_mapping', doc_type), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'local') + def get_mapping(self, index=None, doc_type=None, params=None): + """ + Retrieve mapping definition of index or index/type. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-mapping.html>`_ + + :arg index: A comma-separated list of index names + :arg doc_type: A comma-separated list of document types + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + return self.transport.perform_request('GET', _make_path(index, + '_mapping', doc_type), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'include_defaults', 'local') + def get_field_mapping(self, fields, index=None, doc_type=None, params=None): + """ + Retrieve mapping definition of a specific field. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-field-mapping.html>`_ + + :arg fields: A comma-separated list of fields + :arg index: A comma-separated list of index names + :arg doc_type: A comma-separated list of document types + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg include_defaults: Whether the default mapping values should be + returned as well + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + if fields in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'fields'.") + return self.transport.perform_request('GET', _make_path(index, + '_mapping', doc_type, 'field', fields), params=params) + + @query_params('master_timeout', 'timeout') + def put_alias(self, index, name, body=None, params=None): + """ + Create an alias for a specific index/indices. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html>`_ + + :arg index: A comma-separated list of index names the alias should point + to (supports wildcards); use `_all` to perform the operation on all + indices. + :arg name: The name of the alias to be created or updated + :arg body: The settings for the alias, such as `routing` or `filter` + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit timeout for the operation + """ + for param in (index, name): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path(index, + '_alias', name), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'local') + def exists_alias(self, index=None, name=None, params=None): + """ + Return a boolean indicating whether given alias exists. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html>`_ + + :arg index: A comma-separated list of index names to filter aliases + :arg name: A comma-separated list of alias names to return + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default ['open', 'closed'], + valid choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + return self.transport.perform_request('HEAD', _make_path(index, '_alias', + name), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'local') + def get_alias(self, index=None, name=None, params=None): + """ + Retrieve a specified alias. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html>`_ + + :arg index: A comma-separated list of index names to filter aliases + :arg name: A comma-separated list of alias names to return + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'all', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + return self.transport.perform_request('GET', _make_path(index, + '_alias', name), params=params) + + @query_params('master_timeout', 'timeout') + def update_aliases(self, body, params=None): + """ + Update specified aliases. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html>`_ + + :arg body: The definition of `actions` to perform + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Request timeout + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('POST', '/_aliases', + params=params, body=body) + + @query_params('master_timeout', 'timeout') + def delete_alias(self, index, name, params=None): + """ + Delete specific alias. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html>`_ + + :arg index: A comma-separated list of index names (supports wildcards); + use `_all` for all indices + :arg name: A comma-separated list of aliases to delete (supports + wildcards); use `_all` to delete all aliases for the specified + indices. + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit timeout for the operation + """ + for param in (index, name): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('DELETE', _make_path(index, + '_alias', name), params=params) + + @query_params('create', 'flat_settings', 'master_timeout', 'order', + 'timeout') + def put_template(self, name, body, params=None): + """ + Create an index template that will automatically be applied to new + indices created. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html>`_ + + :arg name: The name of the template + :arg body: The template definition + :arg create: Whether the index template should only be added if new or + can also replace an existing one, default False + :arg flat_settings: Return settings in flat format (default: false) + :arg master_timeout: Specify timeout for connection to master + :arg order: The order for this template when merging multiple matching + ones (higher numbers are merged later, overriding the lower numbers) + :arg timeout: Explicit operation timeout + """ + for param in (name, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path('_template', + name), params=params, body=body) + + @query_params('local', 'master_timeout') + def exists_template(self, name, params=None): + """ + Return a boolean indicating whether given template exists. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html>`_ + + :arg name: The name of the template + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'name'.") + return self.transport.perform_request('HEAD', _make_path('_template', + name), params=params) + + @query_params('flat_settings', 'local', 'master_timeout') + def get_template(self, name=None, params=None): + """ + Retrieve an index template by its name. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html>`_ + + :arg name: The name of the template + :arg flat_settings: Return settings in flat format (default: false) + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + return self.transport.perform_request('GET', _make_path('_template', + name), params=params) + + @query_params('master_timeout', 'timeout') + def delete_template(self, name, params=None): + """ + Delete an index template by its name. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html>`_ + + :arg name: The name of the template + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'name'.") + return self.transport.perform_request('DELETE', + _make_path('_template', name), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'flat_settings', + 'human', 'ignore_unavailable', 'include_defaults', 'local') + def get_settings(self, index=None, name=None, params=None): + """ + Retrieve settings for one or more (or all) indices. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-settings.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg name: The name of the settings that should be included + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default ['open', 'closed'], + valid choices are: 'open', 'closed', 'none', 'all' + :arg flat_settings: Return settings in flat format (default: false) + :arg human: Whether to return version and creation date values in human- + readable format., default False + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg include_defaults: Whether to return all default setting for each of + the indices., default False + :arg local: Return local information, do not retrieve the state from + master node (default: false) + """ + return self.transport.perform_request('GET', _make_path(index, + '_settings', name), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'flat_settings', + 'ignore_unavailable', 'master_timeout', 'preserve_existing') + def put_settings(self, body, index=None, params=None): + """ + Change specific index level settings in real time. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-update-settings.html>`_ + + :arg body: The index settings to be updated + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg flat_settings: Return settings in flat format (default: false) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg master_timeout: Specify timeout for connection to master + :arg preserve_existing: Whether to update existing settings. If set to + `true` existing settings on an index remain unchanged, the default + is `false` + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('PUT', _make_path(index, + '_settings'), params=params, body=body) + + @query_params('completion_fields', 'fielddata_fields', 'fields', 'groups', + 'human', 'level', 'types') + def stats(self, index=None, metric=None, params=None): + """ + Retrieve statistics on different operations happening on an index. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-stats.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg metric: Limit the information returned the specific metrics. + :arg completion_fields: A comma-separated list of fields for `fielddata` + and `suggest` index metric (supports wildcards) + :arg fielddata_fields: A comma-separated list of fields for `fielddata` + index metric (supports wildcards) + :arg fields: A comma-separated list of fields for `fielddata` and + `completion` index metric (supports wildcards) + :arg groups: A comma-separated list of search groups for `search` index + metric + :arg human: Whether to return time and byte values in human-readable + format., default False + :arg level: Return stats aggregated at cluster, index or shard level, + default 'indices', valid choices are: 'cluster', 'indices', 'shards' + :arg types: A comma-separated list of document types for the `indexing` + index metric + """ + return self.transport.perform_request('GET', _make_path(index, + '_stats', metric), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'human', + 'ignore_unavailable', 'operation_threading', 'verbose') + def segments(self, index=None, params=None): + """ + Provide low level segments information that a Lucene index (shard level) is built with. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-segments.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg human: Whether to return time and byte values in human-readable + format., default False + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg operation_threading: TODO: ? + :arg verbose: Includes detailed memory usage by Lucene., default False + """ + return self.transport.perform_request('GET', _make_path(index, + '_segments'), params=params) + + @query_params('allow_no_indices', 'analyze_wildcard', 'analyzer', + 'default_operator', 'df', 'expand_wildcards', 'explain', + 'ignore_unavailable', 'lenient', 'lowercase_expanded_terms', + 'operation_threading', 'q', 'rewrite') + def validate_query(self, index=None, doc_type=None, body=None, params=None): + """ + Validate a potentially expensive query without executing it. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-validate.html>`_ + + :arg index: A comma-separated list of index names to restrict the + operation; use `_all` or empty string to perform the operation on + all indices + :arg doc_type: A comma-separated list of document types to restrict the + operation; leave empty to perform the operation on all types + :arg body: The query definition specified with the Query DSL + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg analyze_wildcard: Specify whether wildcard and prefix queries + should be analyzed (default: false) + :arg analyzer: The analyzer to use for the query string + :arg default_operator: The default operator for query string query (AND + or OR), default 'OR', valid choices are: 'AND', 'OR' + :arg df: The field to use as default where no field prefix is given in + the query string + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg explain: Return detailed information about the error + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg lenient: Specify whether format-based query failures (such as + providing text to a numeric field) should be ignored + :arg lowercase_expanded_terms: Specify whether query terms should be + lowercased + :arg operation_threading: TODO: ? + :arg q: Query in the Lucene query string syntax + :arg rewrite: Provide a more detailed explanation showing the actual + Lucene query that will be executed. + """ + return self.transport.perform_request('GET', _make_path(index, + doc_type, '_validate', 'query'), params=params, body=body) + + @query_params('allow_no_indices', 'expand_wildcards', 'field_data', + 'fielddata', 'fields', 'ignore_unavailable', 'query', 'recycler', + 'request') + def clear_cache(self, index=None, params=None): + """ + Clear either all caches or specific cached associated with one ore more indices. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-clearcache.html>`_ + + :arg index: A comma-separated list of index name to limit the operation + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg field_data: Clear field data + :arg fielddata: Clear field data + :arg fields: A comma-separated list of fields to clear when using the + `field_data` parameter (default: all) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg query: Clear query caches + :arg recycler: Clear the recycler cache + :arg request: Clear request cache + """ + return self.transport.perform_request('POST', _make_path(index, + '_cache', 'clear'), params=params) + + @query_params('active_only', 'detailed', 'human') + def recovery(self, index=None, params=None): + """ + The indices recovery API provides insight into on-going shard + recoveries. Recovery status may be reported for specific indices, or + cluster-wide. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-recovery.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg active_only: Display only those recoveries that are currently on- + going, default False + :arg detailed: Whether to display detailed information about shard + recovery, default False + :arg human: Whether to return time and byte values in human-readable + format., default False + """ + return self.transport.perform_request('GET', _make_path(index, + '_recovery'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'only_ancient_segments', 'wait_for_completion') + def upgrade(self, index=None, params=None): + """ + Upgrade one or more indices to the latest format through an API. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-upgrade.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg only_ancient_segments: If true, only ancient (an older Lucene major + release) segments will be upgraded + :arg wait_for_completion: Specify whether the request should block until + the all segments are upgraded (default: false) + """ + return self.transport.perform_request('POST', _make_path(index, + '_upgrade'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'human', + 'ignore_unavailable') + def get_upgrade(self, index=None, params=None): + """ + Monitor how much of one or more index is upgraded. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-upgrade.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg human: Whether to return time and byte values in human-readable + format., default False + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + """ + return self.transport.perform_request('GET', _make_path(index, + '_upgrade'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable') + def flush_synced(self, index=None, params=None): + """ + Perform a normal flush, then add a generated unique marker (sync_id) to all shards. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-synced-flush.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string for all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + """ + return self.transport.perform_request('POST', _make_path(index, + '_flush', 'synced'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', + 'operation_threading', 'status') + def shard_stores(self, index=None, params=None): + """ + Provides store information for shard copies of indices. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shards-stores.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg operation_threading: TODO: ? + :arg status: A comma-separated list of statuses used to filter on shards + to get store information for, valid choices are: 'green', 'yellow', + 'red', 'all' + """ + return self.transport.perform_request('GET', _make_path(index, + '_shard_stores'), params=params) + + @query_params('allow_no_indices', 'expand_wildcards', 'flush', + 'ignore_unavailable', 'max_num_segments', 'only_expunge_deletes', + 'operation_threading', 'wait_for_merge') + def forcemerge(self, index=None, params=None): + """ + The force merge API allows to force merging of one or more indices + through an API. The merge relates to the number of segments a Lucene + index holds within each shard. The force merge operation allows to + reduce the number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-forcemerge.html>`_ + + :arg index: A comma-separated list of index names; use `_all` or empty + string to perform the operation on all indices + :arg allow_no_indices: Whether to ignore if a wildcard indices + expression resolves into no concrete indices. (This includes `_all` + string or when no indices have been specified) + :arg expand_wildcards: Whether to expand wildcard expression to concrete + indices that are open, closed or both., default 'open', valid + choices are: 'open', 'closed', 'none', 'all' + :arg flush: Specify whether the index should be flushed after performing + the operation (default: true) + :arg ignore_unavailable: Whether specified concrete indices should be + ignored when unavailable (missing or closed) + :arg max_num_segments: The number of segments the index should be merged + into (default: dynamic) + :arg only_expunge_deletes: Specify whether the operation should only + expunge deleted documents + :arg operation_threading: TODO: ? + :arg wait_for_merge: Specify whether the request should block until the + merge process is finished (default: true) + """ + return self.transport.perform_request('POST', _make_path(index, + '_forcemerge'), params=params) + + @query_params('master_timeout', 'timeout', 'wait_for_active_shards') + def shrink(self, index, target, body=None, params=None): + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shrink-index.html>`_ + + :arg index: The name of the source index to shrink + :arg target: The name of the target index to shrink into + :arg body: The configuration for the target index (`settings` and + `aliases`) + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + :arg wait_for_active_shards: Set the number of active shards to wait for + on the shrunken index before the operation returns. + """ + for param in (index, target): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path(index, + '_shrink', target), params=params, body=body) + + @query_params('master_timeout', 'timeout', 'wait_for_active_shards') + def rollover(self, alias, new_index=None, body=None, params=None): + """ + The rollover index API rolls an alias over to a new index when the + existing index is considered to be too large or too old. + + The API accepts a single alias name and a list of conditions. The alias + must point to a single index only. If the index satisfies the specified + conditions then a new index is created and the alias is switched to + point to the new alias. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-rollover-index.html>`_ + + :arg alias: The name of the alias to rollover + :arg new_index: The name of the rollover index + :arg body: The conditions that needs to be met for executing rollover + :arg master_timeout: Specify timeout for connection to master + :arg timeout: Explicit operation timeout + :arg wait_for_active_shards: Set the number of active shards to wait for + on the newly created rollover index before the operation returns. + """ + if alias in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'alias'.") + return self.transport.perform_request('POST', _make_path(alias, + '_rollover', new_index), params=params, body=body) diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/ingest.py b/scripts/external_libs/elasticsearch/elasticsearch/client/ingest.py new file mode 100644 index 00000000..4a2def47 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/ingest.py @@ -0,0 +1,61 @@ +from .utils import NamespacedClient, query_params, _make_path, SKIP_IN_PATH + +class IngestClient(NamespacedClient): + @query_params('master_timeout') + def get_pipeline(self, id=None, params=None): + """ + `<https://www.elastic.co/guide/en/elasticsearch/plugins/current/ingest.html>`_ + + :arg id: Comma separated list of pipeline ids. Wildcards supported + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + return self.transport.perform_request('GET', _make_path('_ingest', + 'pipeline', id), params=params) + + @query_params('master_timeout', 'timeout') + def put_pipeline(self, id, body, params=None): + """ + `<https://www.elastic.co/guide/en/elasticsearch/plugins/current/ingest.html>`_ + + :arg id: Pipeline ID + :arg body: The ingest definition + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + """ + for param in (id, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path('_ingest', + 'pipeline', id), params=params, body=body) + + @query_params('master_timeout', 'timeout') + def delete_pipeline(self, id, params=None): + """ + `<https://www.elastic.co/guide/en/elasticsearch/plugins/current/ingest.html>`_ + + :arg id: Pipeline ID + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + """ + if id in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'id'.") + return self.transport.perform_request('DELETE', _make_path('_ingest', + 'pipeline', id), params=params) + + @query_params('verbose') + def simulate(self, body, id=None, params=None): + """ + `<https://www.elastic.co/guide/en/elasticsearch/plugins/current/ingest.html>`_ + + :arg body: The simulate definition + :arg id: Pipeline ID + :arg verbose: Verbose mode. Display data output for each processor in + executed pipeline, default False + """ + if body in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'body'.") + return self.transport.perform_request('GET', _make_path('_ingest', + 'pipeline', id, '_simulate'), params=params, body=body) diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/nodes.py b/scripts/external_libs/elasticsearch/elasticsearch/client/nodes.py new file mode 100644 index 00000000..6adc61d1 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/nodes.py @@ -0,0 +1,87 @@ +from .utils import NamespacedClient, query_params, _make_path + +class NodesClient(NamespacedClient): + @query_params('flat_settings', 'human', 'timeout') + def info(self, node_id=None, metric=None, params=None): + """ + The cluster nodes info API allows to retrieve one or more (or all) of + the cluster nodes information. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-nodes-info.html>`_ + + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg metric: A comma-separated list of metrics you wish returned. Leave + empty to return all. + :arg flat_settings: Return settings in flat format (default: false) + :arg human: Whether to return time and byte values in human-readable + format., default False + :arg timeout: Explicit operation timeout + """ + return self.transport.perform_request('GET', _make_path('_nodes', + node_id, metric), params=params) + + @query_params('completion_fields', 'fielddata_fields', 'fields', 'groups', + 'human', 'level', 'timeout', 'types') + def stats(self, node_id=None, metric=None, index_metric=None, params=None): + """ + The cluster nodes stats API allows to retrieve one or more (or all) of + the cluster nodes statistics. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-nodes-stats.html>`_ + + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg metric: Limit the information returned to the specified metrics + :arg index_metric: Limit the information returned for `indices` metric + to the specific index metrics. Isn't used if `indices` (or `all`) + metric isn't specified. + :arg completion_fields: A comma-separated list of fields for `fielddata` + and `suggest` index metric (supports wildcards) + :arg fielddata_fields: A comma-separated list of fields for `fielddata` + index metric (supports wildcards) + :arg fields: A comma-separated list of fields for `fielddata` and + `completion` index metric (supports wildcards) + :arg groups: A comma-separated list of search groups for `search` index + metric + :arg human: Whether to return time and byte values in human-readable + format., default False + :arg level: Return indices stats aggregated at node, index or shard + level, default 'node', valid choices are: 'node', 'indices', + 'shards' + :arg timeout: Explicit operation timeout + :arg types: A comma-separated list of document types for the `indexing` + index metric + """ + return self.transport.perform_request('GET', _make_path('_nodes', + node_id, 'stats', metric, index_metric), params=params) + + @query_params('doc_type', 'ignore_idle_threads', 'interval', 'snapshots', + 'threads', 'timeout') + def hot_threads(self, node_id=None, params=None): + """ + An API allowing to get the current hot threads on each node in the cluster. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-nodes-hot-threads.html>`_ + + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg doc_type: The type to sample (default: cpu), valid choices are: + 'cpu', 'wait', 'block' + :arg ignore_idle_threads: Don't show threads that are in known-idle + places, such as waiting on a socket select or pulling from an empty + task queue (default: true) + :arg interval: The interval for the second sampling of threads + :arg snapshots: Number of samples of thread stacktrace (default: 10) + :arg threads: Specify the number of threads to provide information for + (default: 3) + :arg timeout: Explicit operation timeout + """ + # avoid python reserved words + if params and 'type_' in params: + params['type'] = params.pop('type_') + return self.transport.perform_request('GET', _make_path('_cluster', + 'nodes', node_id, 'hotthreads'), params=params) diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/snapshot.py b/scripts/external_libs/elasticsearch/elasticsearch/client/snapshot.py new file mode 100644 index 00000000..87ed6572 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/snapshot.py @@ -0,0 +1,163 @@ +from .utils import NamespacedClient, query_params, _make_path, SKIP_IN_PATH + +class SnapshotClient(NamespacedClient): + @query_params('master_timeout', 'wait_for_completion') + def create(self, repository, snapshot, body=None, params=None): + """ + Create a snapshot in repository + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg snapshot: A snapshot name + :arg body: The snapshot definition + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg wait_for_completion: Should this request wait until the operation + has completed before returning, default False + """ + for param in (repository, snapshot): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path('_snapshot', + repository, snapshot), params=params, body=body) + + @query_params('master_timeout') + def delete(self, repository, snapshot, params=None): + """ + Deletes a snapshot from a repository. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg snapshot: A snapshot name + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + for param in (repository, snapshot): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('DELETE', + _make_path('_snapshot', repository, snapshot), params=params) + + @query_params('ignore_unavailable', 'master_timeout') + def get(self, repository, snapshot, params=None): + """ + Retrieve information about a snapshot. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg snapshot: A comma-separated list of snapshot names + :arg ignore_unavailable: Whether to ignore unavailable snapshots, + defaults to false which means a SnapshotMissingException is thrown + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + for param in (repository, snapshot): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('GET', _make_path('_snapshot', + repository, snapshot), params=params) + + @query_params('master_timeout', 'timeout') + def delete_repository(self, repository, params=None): + """ + Removes a shared file system repository. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A comma-separated list of repository names + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + """ + if repository in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'repository'.") + return self.transport.perform_request('DELETE', + _make_path('_snapshot', repository), params=params) + + @query_params('local', 'master_timeout') + def get_repository(self, repository=None, params=None): + """ + Return information about registered repositories. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A comma-separated list of repository names + :arg local: Return local information, do not retrieve the state from + master node (default: false) + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + return self.transport.perform_request('GET', _make_path('_snapshot', + repository), params=params) + + @query_params('master_timeout', 'timeout', 'verify') + def create_repository(self, repository, body, params=None): + """ + Registers a shared file system repository. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg body: The repository definition + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + :arg verify: Whether to verify the repository after creation + """ + for param in (repository, body): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('PUT', _make_path('_snapshot', + repository), params=params, body=body) + + @query_params('master_timeout', 'wait_for_completion') + def restore(self, repository, snapshot, body=None, params=None): + """ + Restore a snapshot. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg snapshot: A snapshot name + :arg body: Details of what to restore + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg wait_for_completion: Should this request wait until the operation + has completed before returning, default False + """ + for param in (repository, snapshot): + if param in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument.") + return self.transport.perform_request('POST', _make_path('_snapshot', + repository, snapshot, '_restore'), params=params, body=body) + + @query_params('ignore_unavailable', 'master_timeout') + def status(self, repository=None, snapshot=None, params=None): + """ + Return information about all currently running snapshots. By specifying + a repository name, it's possible to limit the results to a particular + repository. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg snapshot: A comma-separated list of snapshot names + :arg ignore_unavailable: Whether to ignore unavailable snapshots, + defaults to false which means a SnapshotMissingException is thrown + :arg master_timeout: Explicit operation timeout for connection to master + node + """ + return self.transport.perform_request('GET', _make_path('_snapshot', + repository, snapshot, '_status'), params=params) + + @query_params('master_timeout', 'timeout') + def verify_repository(self, repository, params=None): + """ + Returns a list of nodes where repository was successfully verified or + an error message if verification process failed. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html>`_ + + :arg repository: A repository name + :arg master_timeout: Explicit operation timeout for connection to master + node + :arg timeout: Explicit operation timeout + """ + if repository in SKIP_IN_PATH: + raise ValueError("Empty value passed for a required argument 'repository'.") + return self.transport.perform_request('POST', _make_path('_snapshot', + repository, '_verify'), params=params) diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/tasks.py b/scripts/external_libs/elasticsearch/elasticsearch/client/tasks.py new file mode 100644 index 00000000..a407f051 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/tasks.py @@ -0,0 +1,61 @@ +from .utils import NamespacedClient, query_params, _make_path, SKIP_IN_PATH + +class TasksClient(NamespacedClient): + @query_params('actions', 'detailed', 'group_by', 'node_id', 'parent_node', + 'parent_task', 'wait_for_completion') + def list(self, task_id=None, params=None): + """ + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/tasks-list.html>`_ + + :arg task_id: Return the task with specified id (node_id:task_number) + :arg actions: A comma-separated list of actions that should be returned. + Leave empty to return all. + :arg detailed: Return detailed task information (default: false) + :arg group_by: Group tasks by nodes or parent/child relationships, + default 'nodes', valid choices are: 'nodes', 'parents' + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg parent_node: Return tasks with specified parent node. + :arg parent_task: Return tasks with specified parent task id + (node_id:task_number). Set to -1 to return all. + :arg wait_for_completion: Wait for the matching tasks to complete + (default: false) + """ + return self.transport.perform_request('GET', _make_path('_tasks', + task_id), params=params) + + @query_params('actions', 'node_id', 'parent_node', 'parent_task') + def cancel(self, task_id=None, params=None): + """ + + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/tasks-cancel.html>`_ + + :arg task_id: Cancel the task with specified task id + (node_id:task_number) + :arg actions: A comma-separated list of actions that should be + cancelled. Leave empty to cancel all. + :arg node_id: A comma-separated list of node IDs or names to limit the + returned information; use `_local` to return information from the + node you're connecting to, leave empty to get information from all + nodes + :arg parent_node: Cancel tasks with specified parent node. + :arg parent_task: Cancel tasks with specified parent task id + (node_id:task_number). Set to -1 to cancel all. + """ + return self.transport.perform_request('POST', _make_path('_tasks', + task_id, '_cancel'), params=params) + + @query_params('wait_for_completion') + def get(self, task_id=None, params=None): + """ + Retrieve information for a particular task. + `<http://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html>`_ + + :arg task_id: Return the task with specified id (node_id:task_number) + :arg wait_for_completion: Wait for the matching tasks to complete + (default: false) + """ + return self.transport.perform_request('GET', _make_path('_tasks', + task_id), params=params) diff --git a/scripts/external_libs/elasticsearch/elasticsearch/client/utils.py b/scripts/external_libs/elasticsearch/elasticsearch/client/utils.py new file mode 100644 index 00000000..2327d823 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/client/utils.py @@ -0,0 +1,89 @@ +from __future__ import unicode_literals + +import weakref +from datetime import date, datetime +from functools import wraps +from ..compat import string_types, quote_plus + +# parts of URL to be omitted +SKIP_IN_PATH = (None, '', b'', [], ()) + +def _escape(value): + """ + Escape a single value of a URL string or a query parameter. If it is a list + or tuple, turn it into a comma-separated string first. + """ + + # make sequences into comma-separated stings + if isinstance(value, (list, tuple)): + value = ','.join(value) + + # dates and datetimes into isoformat + elif isinstance(value, (date, datetime)): + value = value.isoformat() + + # make bools into true/false strings + elif isinstance(value, bool): + value = str(value).lower() + + # encode strings to utf-8 + if isinstance(value, string_types): + try: + return value.encode('utf-8') + except UnicodeDecodeError: + # Python 2 and str, no need to re-encode + pass + + return str(value) + +def _make_path(*parts): + """ + Create a URL string from parts, omit all `None` values and empty strings. + Convert lists nad tuples to comma separated values. + """ + #TODO: maybe only allow some parts to be lists/tuples ? + return '/' + '/'.join( + # preserve ',' and '*' in url for nicer URLs in logs + quote_plus(_escape(p), b',*') for p in parts if p not in SKIP_IN_PATH) + +# parameters that apply to all methods +GLOBAL_PARAMS = ('pretty', 'format', 'filter_path') + +def query_params(*es_query_params): + """ + Decorator that pops all accepted parameters from method's kwargs and puts + them in the params argument. + """ + def _wrapper(func): + @wraps(func) + def _wrapped(*args, **kwargs): + params = kwargs.pop('params', {}) + for p in es_query_params + GLOBAL_PARAMS: + if p in kwargs: + v = kwargs.pop(p) + if v is not None: + params[p] = _escape(v) + + # don't treat ignore and request_timeout as other params to avoid escaping + for p in ('ignore', 'request_timeout'): + if p in kwargs: + params[p] = kwargs.pop(p) + return func(*args, params=params, **kwargs) + return _wrapped + return _wrapper + + +class NamespacedClient(object): + def __init__(self, client): + self.client = client + + @property + def transport(self): + return self.client.transport + +class AddonClient(NamespacedClient): + @classmethod + def infect_client(cls, client): + addon = cls(weakref.proxy(client)) + setattr(client, cls.namespace, addon) + return client diff --git a/scripts/external_libs/elasticsearch/elasticsearch/compat.py b/scripts/external_libs/elasticsearch/elasticsearch/compat.py new file mode 100644 index 00000000..deee3c52 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/compat.py @@ -0,0 +1,13 @@ +import sys + +PY2 = sys.version_info[0] == 2 + +if PY2: + string_types = basestring, + from urllib import quote_plus, urlencode + from urlparse import urlparse + from itertools import imap as map +else: + string_types = str, bytes + from urllib.parse import quote_plus, urlencode, urlparse + map = map diff --git a/scripts/external_libs/elasticsearch/elasticsearch/connection/__init__.py b/scripts/external_libs/elasticsearch/elasticsearch/connection/__init__.py new file mode 100644 index 00000000..c2f484a1 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/connection/__init__.py @@ -0,0 +1,3 @@ +from .base import Connection +from .http_requests import RequestsHttpConnection +from .http_urllib3 import Urllib3HttpConnection diff --git a/scripts/external_libs/elasticsearch/elasticsearch/connection/base.py b/scripts/external_libs/elasticsearch/elasticsearch/connection/base.py new file mode 100644 index 00000000..c0206b2e --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/connection/base.py @@ -0,0 +1,124 @@ +import logging +try: + import simplejson as json +except ImportError: + import json + +from ..exceptions import TransportError, HTTP_EXCEPTIONS + +logger = logging.getLogger('elasticsearch') + +# create the elasticsearch.trace logger, but only set propagate to False if the +# logger hasn't already been configured +_tracer_already_configured = 'elasticsearch.trace' in logging.Logger.manager.loggerDict +tracer = logging.getLogger('elasticsearch.trace') +if not _tracer_already_configured: + tracer.propagate = False + + +class Connection(object): + """ + Class responsible for maintaining a connection to an Elasticsearch node. It + holds persistent connection pool to it and it's main interface + (`perform_request`) is thread-safe. + + Also responsible for logging. + """ + transport_schema = 'http' + + def __init__(self, host='localhost', port=9200, use_ssl=False, url_prefix='', timeout=10, **kwargs): + """ + :arg host: hostname of the node (default: localhost) + :arg port: port to use (integer, default: 9200) + :arg url_prefix: optional url prefix for elasticsearch + :arg timeout: default timeout in seconds (float, default: 10) + """ + scheme = self.transport_schema + if use_ssl: + scheme += 's' + self.host = '%s://%s:%s' % (scheme, host, port) + if url_prefix: + url_prefix = '/' + url_prefix.strip('/') + self.url_prefix = url_prefix + self.timeout = timeout + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.host) + + def _pretty_json(self, data): + # pretty JSON in tracer curl logs + try: + return json.dumps(json.loads(data), sort_keys=True, indent=2, separators=(',', ': ')).replace("'", r'\u0027') + except (ValueError, TypeError): + # non-json data or a bulk request + return data + + def _log_trace(self, method, path, body, status_code, response, duration): + if not tracer.isEnabledFor(logging.INFO) or not tracer.handlers: + return + + # include pretty in trace curls + path = path.replace('?', '?pretty&', 1) if '?' in path else path + '?pretty' + if self.url_prefix: + path = path.replace(self.url_prefix, '', 1) + tracer.info("curl -X%s 'http://localhost:9200%s' -d '%s'", method, path, self._pretty_json(body) if body else '') + + if tracer.isEnabledFor(logging.DEBUG): + tracer.debug('#[%s] (%.3fs)\n#%s', status_code, duration, self._pretty_json(response).replace('\n', '\n#') if response else '') + + def log_request_success(self, method, full_url, path, body, status_code, response, duration): + """ Log a successful API call. """ + # TODO: optionally pass in params instead of full_url and do urlencode only when needed + + # body has already been serialized to utf-8, deserialize it for logging + # TODO: find a better way to avoid (de)encoding the body back and forth + if body: + body = body.decode('utf-8') + + logger.info( + '%s %s [status:%s request:%.3fs]', method, full_url, + status_code, duration + ) + logger.debug('> %s', body) + logger.debug('< %s', response) + + self._log_trace(method, path, body, status_code, response, duration) + + def log_request_fail(self, method, full_url, path, body, duration, status_code=None, response=None, exception=None): + """ Log an unsuccessful API call. """ + # do not log 404s on HEAD requests + if method == 'HEAD' and status_code == 404: + return + logger.warning( + '%s %s [status:%s request:%.3fs]', method, full_url, + status_code or 'N/A', duration, exc_info=exception is not None + ) + + # body has already been serialized to utf-8, deserialize it for logging + # TODO: find a better way to avoid (de)encoding the body back and forth + if body: + body = body.decode('utf-8') + + logger.debug('> %s', body) + + self._log_trace(method, path, body, status_code, response, duration) + + if response is not None: + logger.debug('< %s', response) + + def _raise_error(self, status_code, raw_data): + """ Locate appropriate exception and raise it. """ + error_message = raw_data + additional_info = None + try: + if raw_data: + additional_info = json.loads(raw_data) + error_message = additional_info.get('error', error_message) + if isinstance(error_message, dict) and 'type' in error_message: + error_message = error_message['type'] + except (ValueError, TypeError) as err: + logger.warning('Undecodable raw error response from server: %s', err) + + raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info) + + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/connection/http_requests.py b/scripts/external_libs/elasticsearch/elasticsearch/connection/http_requests.py new file mode 100644 index 00000000..6c9a924b --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/connection/http_requests.py @@ -0,0 +1,96 @@ +import time +import warnings +try: + import requests + REQUESTS_AVAILABLE = True +except ImportError: + REQUESTS_AVAILABLE = False + +from .base import Connection +from ..exceptions import ConnectionError, ImproperlyConfigured, ConnectionTimeout, SSLError +from ..compat import urlencode, string_types + +class RequestsHttpConnection(Connection): + """ + Connection using the `requests` library. + + :arg http_auth: optional http auth information as either ':' separated + string or a tuple. Any value will be passed into requests as `auth`. + :arg use_ssl: use ssl for the connection if `True` + :arg verify_certs: whether to verify SSL certificates + :arg ca_certs: optional path to CA bundle. By default standard requests' + bundle will be used. + :arg client_cert: path to the file containing the private key and the + certificate, or cert only if using client_key + :arg client_key: path to the file containing the private key if using + separate cert and key files (client_cert will contain only the cert) + :arg headers: any custom http headers to be add to requests + """ + def __init__(self, host='localhost', port=9200, http_auth=None, + use_ssl=False, verify_certs=True, ca_certs=None, client_cert=None, + client_key=None, headers=None, **kwargs): + if not REQUESTS_AVAILABLE: + raise ImproperlyConfigured("Please install requests to use RequestsHttpConnection.") + + super(RequestsHttpConnection, self).__init__(host=host, port=port, **kwargs) + self.session = requests.Session() + self.session.headers = headers + if http_auth is not None: + if isinstance(http_auth, (tuple, list)): + http_auth = tuple(http_auth) + elif isinstance(http_auth, string_types): + http_auth = tuple(http_auth.split(':', 1)) + self.session.auth = http_auth + self.base_url = 'http%s://%s:%d%s' % ( + 's' if use_ssl else '', + host, port, self.url_prefix + ) + self.session.verify = verify_certs + if not client_key: + self.session.cert = client_cert + elif client_cert: + # cert is a tuple of (certfile, keyfile) + self.session.cert = (client_cert, client_key) + if ca_certs: + if not verify_certs: + raise ImproperlyConfigured("You cannot pass CA certificates when verify SSL is off.") + self.session.verify = ca_certs + + if use_ssl and not verify_certs: + warnings.warn( + 'Connecting to %s using SSL with verify_certs=False is insecure.' % self.base_url) + + def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()): + url = self.base_url + url + if params: + url = '%s?%s' % (url, urlencode(params or {})) + + start = time.time() + try: + response = self.session.request(method, url, data=body, timeout=timeout or self.timeout) + duration = time.time() - start + raw_data = response.text + except requests.exceptions.SSLError as e: + self.log_request_fail(method, url, response.request.path_url, body, time.time() - start, exception=e) + raise SSLError('N/A', str(e), e) + except requests.Timeout as e: + self.log_request_fail(method, url, response.request.path_url, body, time.time() - start, exception=e) + raise ConnectionTimeout('TIMEOUT', str(e), e) + except requests.ConnectionError as e: + self.log_request_fail(method, url, response.request.path_url, body, time.time() - start, exception=e) + raise ConnectionError('N/A', str(e), e) + + # raise errors based on http status codes, let the client handle those if needed + if not (200 <= response.status_code < 300) and response.status_code not in ignore: + self.log_request_fail(method, url, response.request.path_url, body, duration, response.status_code, raw_data) + self._raise_error(response.status_code, raw_data) + + self.log_request_success(method, url, response.request.path_url, body, response.status_code, raw_data, duration) + + return response.status_code, response.headers, raw_data + + def close(self): + """ + Explicitly closes connections + """ + self.session.close() diff --git a/scripts/external_libs/elasticsearch/elasticsearch/connection/http_urllib3.py b/scripts/external_libs/elasticsearch/elasticsearch/connection/http_urllib3.py new file mode 100644 index 00000000..066466cd --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/connection/http_urllib3.py @@ -0,0 +1,135 @@ +import time +import urllib3 +from urllib3.exceptions import ReadTimeoutError, SSLError as UrllibSSLError +import warnings + +CA_CERTS = None + +try: + import certifi + CA_CERTS = certifi.where() +except ImportError: + pass + +from .base import Connection +from ..exceptions import ConnectionError, ImproperlyConfigured, ConnectionTimeout, SSLError +from ..compat import urlencode + +class Urllib3HttpConnection(Connection): + """ + Default connection class using the `urllib3` library and the http protocol. + + :arg host: hostname of the node (default: localhost) + :arg port: port to use (integer, default: 9200) + :arg url_prefix: optional url prefix for elasticsearch + :arg timeout: default timeout in seconds (float, default: 10) + :arg http_auth: optional http auth information as either ':' separated + string or a tuple + :arg use_ssl: use ssl for the connection if `True` + :arg verify_certs: whether to verify SSL certificates + :arg ca_certs: optional path to CA bundle. See + https://urllib3.readthedocs.io/en/latest/security.html#using-certifi-with-urllib3 + for instructions how to get default set + :arg client_cert: path to the file containing the private key and the + certificate, or cert only if using client_key + :arg client_key: path to the file containing the private key if using + separate cert and key files (client_cert will contain only the cert) + :arg ssl_version: version of the SSL protocol to use. Choices are: + SSLv23 (default) SSLv2 SSLv3 TLSv1 (see ``PROTOCOL_*`` constants in the + ``ssl`` module for exact options for your environment). + :arg ssl_assert_hostname: use hostname verification if not `False` + :arg ssl_assert_fingerprint: verify the supplied certificate fingerprint if not `None` + :arg maxsize: the number of connections which will be kept open to this + host. See https://urllib3.readthedocs.io/en/1.4/pools.html#api for more + information. + :arg headers: any custom http headers to be add to requests + """ + def __init__(self, host='localhost', port=9200, http_auth=None, + use_ssl=False, verify_certs=True, ca_certs=None, client_cert=None, + client_key=None, ssl_version=None, ssl_assert_hostname=None, + ssl_assert_fingerprint=None, maxsize=10, headers=None, **kwargs): + + super(Urllib3HttpConnection, self).__init__(host=host, port=port, use_ssl=use_ssl, **kwargs) + self.headers = headers.copy() if headers else {} + self.headers.update(urllib3.make_headers(keep_alive=True)) + if http_auth is not None: + if isinstance(http_auth, (tuple, list)): + http_auth = ':'.join(http_auth) + self.headers.update(urllib3.make_headers(basic_auth=http_auth)) + + ca_certs = CA_CERTS if ca_certs is None else ca_certs + pool_class = urllib3.HTTPConnectionPool + kw = {} + if use_ssl: + pool_class = urllib3.HTTPSConnectionPool + kw.update({ + 'ssl_version': ssl_version, + 'assert_hostname': ssl_assert_hostname, + 'assert_fingerprint': ssl_assert_fingerprint, + }) + + if verify_certs: + if not ca_certs: + raise ImproperlyConfigured("Root certificates are missing for certificate " + "validation. Either pass them in using the ca_certs parameter or " + "install certifi to use it automatically.") + + kw.update({ + 'cert_reqs': 'CERT_REQUIRED', + 'ca_certs': ca_certs, + 'cert_file': client_cert, + 'key_file': client_key, + }) + else: + warnings.warn( + 'Connecting to %s using SSL with verify_certs=False is insecure.' % host) + + self.pool = pool_class(host, port=port, timeout=self.timeout, maxsize=maxsize, **kw) + + def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()): + url = self.url_prefix + url + if params: + url = '%s?%s' % (url, urlencode(params)) + full_url = self.host + url + + start = time.time() + try: + kw = {} + if timeout: + kw['timeout'] = timeout + + # in python2 we need to make sure the url and method are not + # unicode. Otherwise the body will be decoded into unicode too and + # that will fail (#133, #201). + if not isinstance(url, str): + url = url.encode('utf-8') + if not isinstance(method, str): + method = method.encode('utf-8') + + response = self.pool.urlopen(method, url, body, retries=False, headers=self.headers, **kw) + duration = time.time() - start + raw_data = response.data.decode('utf-8') + except UrllibSSLError as e: + self.log_request_fail(method, full_url, url, body, time.time() - start, exception=e) + raise SSLError('N/A', str(e), e) + except ReadTimeoutError as e: + self.log_request_fail(method, full_url, url, body, time.time() - start, exception=e) + raise ConnectionTimeout('TIMEOUT', str(e), e) + except Exception as e: + self.log_request_fail(method, full_url, url, body, time.time() - start, exception=e) + raise ConnectionError('N/A', str(e), e) + + if not (200 <= response.status < 300) and response.status not in ignore: + self.log_request_fail(method, full_url, url, body, duration, response.status, raw_data) + self._raise_error(response.status, raw_data) + + self.log_request_success(method, full_url, url, body, response.status, + raw_data, duration) + + return response.status, response.getheaders(), raw_data + + def close(self): + """ + Explicitly closes connection + """ + self.pool.close() diff --git a/scripts/external_libs/elasticsearch/elasticsearch/connection/pooling.py b/scripts/external_libs/elasticsearch/elasticsearch/connection/pooling.py new file mode 100644 index 00000000..3115e7c2 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/connection/pooling.py @@ -0,0 +1,33 @@ +try: + import queue +except ImportError: + import Queue as queue +from .base import Connection + + +class PoolingConnection(Connection): + """ + Base connection class for connections that use libraries without thread + safety and no capacity for connection pooling. To use this just implement a + ``_make_connection`` method that constructs a new connection and returns + it. + """ + def __init__(self, *args, **kwargs): + self._free_connections = queue.Queue() + super(PoolingConnection, self).__init__(*args, **kwargs) + + def _get_connection(self): + try: + return self._free_connections.get_nowait() + except queue.Empty: + return self._make_connection() + + def _release_connection(self, con): + self._free_connections.put(con) + + def close(self): + """ + Explicitly close connection + """ + pass + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/connection_pool.py b/scripts/external_libs/elasticsearch/elasticsearch/connection_pool.py new file mode 100644 index 00000000..3ec43edf --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/connection_pool.py @@ -0,0 +1,260 @@ +import time +import random +import logging + +try: + from Queue import PriorityQueue, Empty +except ImportError: + from queue import PriorityQueue, Empty + +from .exceptions import ImproperlyConfigured + +logger = logging.getLogger('elasticsearch') + +class ConnectionSelector(object): + """ + Simple class used to select a connection from a list of currently live + connection instances. In init time it is passed a dictionary containing all + the connections' options which it can then use during the selection + process. When the `select` method is called it is given a list of + *currently* live connections to choose from. + + The options dictionary is the one that has been passed to + :class:`~elasticsearch.Transport` as `hosts` param and the same that is + used to construct the Connection object itself. When the Connection was + created from information retrieved from the cluster via the sniffing + process it will be the dictionary returned by the `host_info_callback`. + + Example of where this would be useful is a zone-aware selector that would + only select connections from it's own zones and only fall back to other + connections where there would be none in it's zones. + """ + def __init__(self, opts): + """ + :arg opts: dictionary of connection instances and their options + """ + self.connection_opts = opts + + def select(self, connections): + """ + Select a connection from the given list. + + :arg connections: list of live connections to choose from + """ + pass + + +class RandomSelector(ConnectionSelector): + """ + Select a connection at random + """ + def select(self, connections): + return random.choice(connections) + + +class RoundRobinSelector(ConnectionSelector): + """ + Selector using round-robin. + """ + def __init__(self, opts): + super(RoundRobinSelector, self).__init__(opts) + self.rr = -1 + + def select(self, connections): + self.rr += 1 + self.rr %= len(connections) + return connections[self.rr] + +class ConnectionPool(object): + """ + Container holding the :class:`~elasticsearch.Connection` instances, + managing the selection process (via a + :class:`~elasticsearch.ConnectionSelector`) and dead connections. + + It's only interactions are with the :class:`~elasticsearch.Transport` class + that drives all the actions within `ConnectionPool`. + + Initially connections are stored on the class as a list and, along with the + connection options, get passed to the `ConnectionSelector` instance for + future reference. + + Upon each request the `Transport` will ask for a `Connection` via the + `get_connection` method. If the connection fails (it's `perform_request` + raises a `ConnectionError`) it will be marked as dead (via `mark_dead`) and + put on a timeout (if it fails N times in a row the timeout is exponentially + longer - the formula is `default_timeout * 2 ** (fail_count - 1)`). When + the timeout is over the connection will be resurrected and returned to the + live pool. A connection that has been peviously marked as dead and + succeedes will be marked as live (it's fail count will be deleted). + """ + def __init__(self, connections, dead_timeout=60, timeout_cutoff=5, + selector_class=RoundRobinSelector, randomize_hosts=True, **kwargs): + """ + :arg connections: list of tuples containing the + :class:`~elasticsearch.Connection` instance and it's options + :arg dead_timeout: number of seconds a connection should be retired for + after a failure, increases on consecutive failures + :arg timeout_cutoff: number of consecutive failures after which the + timeout doesn't increase + :arg selector_class: :class:`~elasticsearch.ConnectionSelector` + subclass to use if more than one connection is live + :arg randomize_hosts: shuffle the list of connections upon arrival to + avoid dog piling effect across processes + """ + if not connections: + raise ImproperlyConfigured("No defined connections, you need to " + "specify at least one host.") + self.connection_opts = connections + self.connections = [c for (c, opts) in connections] + # remember original connection list for resurrect(force=True) + self.orig_connections = tuple(self.connections) + # PriorityQueue for thread safety and ease of timeout management + self.dead = PriorityQueue(len(self.connections)) + self.dead_count = {} + + if randomize_hosts: + # randomize the connection list to avoid all clients hitting same node + # after startup/restart + random.shuffle(self.connections) + + # default timeout after which to try resurrecting a connection + self.dead_timeout = dead_timeout + self.timeout_cutoff = timeout_cutoff + + self.selector = selector_class(dict(connections)) + + def mark_dead(self, connection, now=None): + """ + Mark the connection as dead (failed). Remove it from the live pool and + put it on a timeout. + + :arg connection: the failed instance + """ + # allow inject for testing purposes + now = now if now else time.time() + try: + self.connections.remove(connection) + except ValueError: + # connection not alive or another thread marked it already, ignore + return + else: + dead_count = self.dead_count.get(connection, 0) + 1 + self.dead_count[connection] = dead_count + timeout = self.dead_timeout * 2 ** min(dead_count - 1, self.timeout_cutoff) + self.dead.put((now + timeout, connection)) + logger.warning( + 'Connection %r has failed for %i times in a row, putting on %i second timeout.', + connection, dead_count, timeout + ) + + def mark_live(self, connection): + """ + Mark connection as healthy after a resurrection. Resets the fail + counter for the connection. + + :arg connection: the connection to redeem + """ + try: + del self.dead_count[connection] + except KeyError: + # race condition, safe to ignore + pass + + def resurrect(self, force=False): + """ + Attempt to resurrect a connection from the dead pool. It will try to + locate one (not all) eligible (it's timeout is over) connection to + return to the live pool. Any resurrected connection is also returned. + + :arg force: resurrect a connection even if there is none eligible (used + when we have no live connections). If force is specified resurrect + always returns a connection. + + """ + # no dead connections + if self.dead.empty(): + # we are forced to return a connection, take one from the original + # list. This is to avoid a race condition where get_connection can + # see no live connections but when it calls resurrect self.dead is + # also empty. We assume that other threat has resurrected all + # available connections so we can safely return one at random. + if force: + return random.choice(self.orig_connections) + return + + try: + # retrieve a connection to check + timeout, connection = self.dead.get(block=False) + except Empty: + # other thread has been faster and the queue is now empty. If we + # are forced, return a connection at random again. + if force: + return random.choice(self.orig_connections) + return + + if not force and timeout > time.time(): + # return it back if not eligible and not forced + self.dead.put((timeout, connection)) + return + + # either we were forced or the connection is elligible to be retried + self.connections.append(connection) + logger.info('Resurrecting connection %r (force=%s).', connection, force) + return connection + + def get_connection(self): + """ + Return a connection from the pool using the `ConnectionSelector` + instance. + + It tries to resurrect eligible connections, forces a resurrection when + no connections are availible and passes the list of live connections to + the selector instance to choose from. + + Returns a connection instance and it's current fail count. + """ + self.resurrect() + connections = self.connections[:] + + # no live nodes, resurrect one by force and return it + if not connections: + return self.resurrect(True) + + # only call selector if we have a selection + if len(connections) > 1: + return self.selector.select(connections) + + # only one connection, no need for a selector + return connections[0] + + def close(self): + """ + Explicitly closes connections + """ + for conn in self.orig_connections: + conn.close() + +class DummyConnectionPool(ConnectionPool): + def __init__(self, connections, **kwargs): + if len(connections) != 1: + raise ImproperlyConfigured("DummyConnectionPool needs exactly one " + "connection defined.") + # we need connection opts for sniffing logic + self.connection_opts = connections + self.connection = connections[0][0] + self.connections = (self.connection, ) + + def get_connection(self): + return self.connection + + def close(self): + """ + Explicitly closes connections + """ + self.connection.close() + + def _noop(self, *args, **kwargs): + pass + mark_dead = mark_live = resurrect = _noop + + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/exceptions.py b/scripts/external_libs/elasticsearch/elasticsearch/exceptions.py new file mode 100644 index 00000000..76978a86 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/exceptions.py @@ -0,0 +1,109 @@ +__all__ = [ + 'ImproperlyConfigured', 'ElasticsearchException', 'SerializationError', + 'TransportError', 'NotFoundError', 'ConflictError', 'RequestError', 'ConnectionError', + 'SSLError', 'ConnectionTimeout' +] + +class ImproperlyConfigured(Exception): + """ + Exception raised when the config passed to the client is inconsistent or invalid. + """ + + +class ElasticsearchException(Exception): + """ + Base class for all exceptions raised by this package's operations (doesn't + apply to :class:`~elasticsearch.ImproperlyConfigured`). + """ + + +class SerializationError(ElasticsearchException): + """ + Data passed in failed to serialize properly in the ``Serializer`` being + used. + """ + + +class TransportError(ElasticsearchException): + """ + Exception raised when ES returns a non-OK (>=400) HTTP status code. Or when + an actual connection error happens; in that case the ``status_code`` will + be set to ``'N/A'``. + """ + @property + def status_code(self): + """ + The HTTP status code of the response that precipitated the error or + ``'N/A'`` if not applicable. + """ + return self.args[0] + + @property + def error(self): + """ A string error message. """ + return self.args[1] + + @property + def info(self): + """ Dict of returned error info from ES, where available. """ + return self.args[2] + + def __str__(self): + cause = '' + try: + if self.info: + cause = ', %r' % self.info['error']['root_cause'][0]['reason'] + except LookupError: + pass + return 'TransportError(%s, %r%s)' % (self.status_code, self.error, cause) + + +class ConnectionError(TransportError): + """ + Error raised when there was an exception while talking to ES. Original + exception from the underlying :class:`~elasticsearch.Connection` + implementation is available as ``.info.`` + """ + def __str__(self): + return 'ConnectionError(%s) caused by: %s(%s)' % ( + self.error, self.info.__class__.__name__, self.info) + + +class SSLError(ConnectionError): + """ Error raised when encountering SSL errors. """ + + +class ConnectionTimeout(ConnectionError): + """ A network timeout. Doesn't cause a node retry by default. """ + def __str__(self): + return 'ConnectionTimeout caused by - %s(%s)' % ( + self.info.__class__.__name__, self.info) + + +class NotFoundError(TransportError): + """ Exception representing a 404 status code. """ + + +class ConflictError(TransportError): + """ Exception representing a 409 status code. """ + + +class RequestError(TransportError): + """ Exception representing a 400 status code. """ + + +class AuthenticationException(TransportError): + """ Exception representing a 401 status code. """ + + +class AuthorizationException(TransportError): + """ Exception representing a 403 status code. """ + +# more generic mappings from status_code to python exceptions +HTTP_EXCEPTIONS = { + 400: RequestError, + 401: AuthenticationException, + 403: AuthorizationException, + 404: NotFoundError, + 409: ConflictError, +} diff --git a/scripts/external_libs/elasticsearch/elasticsearch/helpers/__init__.py b/scripts/external_libs/elasticsearch/elasticsearch/helpers/__init__.py new file mode 100644 index 00000000..953b644f --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/helpers/__init__.py @@ -0,0 +1,375 @@ +from __future__ import unicode_literals + +import logging +from operator import methodcaller + +from ..exceptions import ElasticsearchException, TransportError +from ..compat import map, string_types + +logger = logging.getLogger('elasticsearch.helpers') + +class BulkIndexError(ElasticsearchException): + @property + def errors(self): + """ List of errors from execution of the last chunk. """ + return self.args[1] + + +class ScanError(ElasticsearchException): + def __init__(self, scroll_id, *args, **kwargs): + super(ScanError, self).__init__(*args, **kwargs) + self.scroll_id = scroll_id + +def expand_action(data): + """ + From one document or action definition passed in by the user extract the + action/data lines needed for elasticsearch's + :meth:`~elasticsearch.Elasticsearch.bulk` api. + """ + # when given a string, assume user wants to index raw json + if isinstance(data, string_types): + return '{"index":{}}', data + + # make sure we don't alter the action + data = data.copy() + op_type = data.pop('_op_type', 'index') + action = {op_type: {}} + for key in ('_index', '_parent', '_percolate', '_routing', '_timestamp', + '_ttl', '_type', '_version', '_version_type', '_id', '_retry_on_conflict'): + if key in data: + action[op_type][key] = data.pop(key) + + # no data payload for delete + if op_type == 'delete': + return action, None + + return action, data.get('_source', data) + +def _chunk_actions(actions, chunk_size, max_chunk_bytes, serializer): + """ + Split actions into chunks by number or size, serialize them into strings in + the process. + """ + bulk_actions = [] + size, action_count = 0, 0 + for action, data in actions: + action = serializer.dumps(action) + cur_size = len(action) + 1 + + if data is not None: + data = serializer.dumps(data) + cur_size += len(data) + 1 + + # full chunk, send it and start a new one + if bulk_actions and (size + cur_size > max_chunk_bytes or action_count == chunk_size): + yield bulk_actions + bulk_actions = [] + size, action_count = 0, 0 + + bulk_actions.append(action) + if data is not None: + bulk_actions.append(data) + size += cur_size + action_count += 1 + + if bulk_actions: + yield bulk_actions + +def _process_bulk_chunk(client, bulk_actions, raise_on_exception=True, raise_on_error=True, **kwargs): + """ + Send a bulk request to elasticsearch and process the output. + """ + # if raise on error is set, we need to collect errors per chunk before raising them + errors = [] + + try: + # send the actual request + resp = client.bulk('\n'.join(bulk_actions) + '\n', **kwargs) + except TransportError as e: + # default behavior - just propagate exception + if raise_on_exception: + raise e + + # if we are not propagating, mark all actions in current chunk as failed + err_message = str(e) + exc_errors = [] + + # deserialize the data back, thisis expensive but only run on + # errors if raise_on_exception is false, so shouldn't be a real + # issue + bulk_data = map(client.transport.serializer.loads, bulk_actions) + while True: + try: + # collect all the information about failed actions + action = next(bulk_data) + op_type, action = action.popitem() + info = {"error": err_message, "status": e.status_code, "exception": e} + if op_type != 'delete': + info['data'] = next(bulk_data) + info.update(action) + exc_errors.append({op_type: info}) + except StopIteration: + break + + # emulate standard behavior for failed actions + if raise_on_error: + raise BulkIndexError('%i document(s) failed to index.' % len(exc_errors), exc_errors) + else: + for err in exc_errors: + yield False, err + return + + # go through request-reponse pairs and detect failures + for op_type, item in map(methodcaller('popitem'), resp['items']): + ok = 200 <= item.get('status', 500) < 300 + if not ok and raise_on_error: + errors.append({op_type: item}) + + if ok or not errors: + # if we are not just recording all errors to be able to raise + # them all at once, yield items individually + yield ok, {op_type: item} + + if errors: + raise BulkIndexError('%i document(s) failed to index.' % len(errors), errors) + +def streaming_bulk(client, actions, chunk_size=500, max_chunk_bytes=100 * 1024 * 1024, + raise_on_error=True, expand_action_callback=expand_action, + raise_on_exception=True, **kwargs): + """ + Streaming bulk consumes actions from the iterable passed in and yields + results per action. For non-streaming usecases use + :func:`~elasticsearch.helpers.bulk` which is a wrapper around streaming + bulk that returns summary information about the bulk operation once the + entire input is consumed and sent. + + + :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use + :arg actions: iterable containing the actions to be executed + :arg chunk_size: number of docs in one chunk sent to es (default: 500) + :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB) + :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`) + from the execution of the last chunk when some occur. By default we raise. + :arg raise_on_exception: if ``False`` then don't propagate exceptions from + call to ``bulk`` and just report the items that failed as failed. + :arg expand_action_callback: callback executed on each action passed in, + should return a tuple containing the action line and the data line + (`None` if data line should be omitted). + """ + actions = map(expand_action_callback, actions) + + for bulk_actions in _chunk_actions(actions, chunk_size, max_chunk_bytes, client.transport.serializer): + for result in _process_bulk_chunk(client, bulk_actions, raise_on_exception, raise_on_error, **kwargs): + yield result + +def bulk(client, actions, stats_only=False, **kwargs): + """ + Helper for the :meth:`~elasticsearch.Elasticsearch.bulk` api that provides + a more human friendly interface - it consumes an iterator of actions and + sends them to elasticsearch in chunks. It returns a tuple with summary + information - number of successfully executed actions and either list of + errors or number of errors if `stats_only` is set to `True`. + + See :func:`~elasticsearch.helpers.streaming_bulk` for more accepted + parameters + + :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use + :arg actions: iterator containing the actions + :arg stats_only: if `True` only report number of successful/failed + operations instead of just number of successful and a list of error responses + + Any additional keyword arguments will be passed to + :func:`~elasticsearch.helpers.streaming_bulk` which is used to execute + the operation. + """ + success, failed = 0, 0 + + # list of errors to be collected is not stats_only + errors = [] + + for ok, item in streaming_bulk(client, actions, **kwargs): + # go through request-reponse pairs and detect failures + if not ok: + if not stats_only: + errors.append(item) + failed += 1 + else: + success += 1 + + return success, failed if stats_only else errors + +def parallel_bulk(client, actions, thread_count=4, chunk_size=500, + max_chunk_bytes=100 * 1024 * 1024, + expand_action_callback=expand_action, **kwargs): + """ + Parallel version of the bulk helper run in multiple threads at once. + + :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use + :arg actions: iterator containing the actions + :arg thread_count: size of the threadpool to use for the bulk requests + :arg chunk_size: number of docs in one chunk sent to es (default: 500) + :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB) + :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`) + from the execution of the last chunk when some occur. By default we raise. + :arg raise_on_exception: if ``False`` then don't propagate exceptions from + call to ``bulk`` and just report the items that failed as failed. + :arg expand_action_callback: callback executed on each action passed in, + should return a tuple containing the action line and the data line + (`None` if data line should be omitted). + """ + # Avoid importing multiprocessing unless parallel_bulk is used + # to avoid exceptions on restricted environments like App Engine + from multiprocessing.dummy import Pool + actions = map(expand_action_callback, actions) + + pool = Pool(thread_count) + + try: + for result in pool.imap( + lambda chunk: list(_process_bulk_chunk(client, chunk, **kwargs)), + _chunk_actions(actions, chunk_size, max_chunk_bytes, client.transport.serializer) + ): + for item in result: + yield item + + finally: + pool.close() + pool.join() + +def scan(client, query=None, scroll='5m', raise_on_error=True, + preserve_order=False, size=1000, request_timeout=None, clear_scroll=True, **kwargs): + """ + Simple abstraction on top of the + :meth:`~elasticsearch.Elasticsearch.scroll` api - a simple iterator that + yields all hits as returned by underlining scroll requests. + + By default scan does not return results in any pre-determined order. To + have a standard order in the returned documents (either by score or + explicit sort definition) when scrolling, use ``preserve_order=True``. This + may be an expensive operation and will negate the performance benefits of + using ``scan``. + + :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use + :arg query: body for the :meth:`~elasticsearch.Elasticsearch.search` api + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + :arg raise_on_error: raises an exception (``ScanError``) if an error is + encountered (some shards fail to execute). By default we raise. + :arg preserve_order: don't set the ``search_type`` to ``scan`` - this will + cause the scroll to paginate with preserving the order. Note that this + can be an extremely expensive operation and can easily lead to + unpredictable results, use with caution. + :arg size: size (per shard) of the batch send at each iteration. + :arg request_timeout: explicit timeout for each call to ``scan`` + :arg clear_scroll: explicitly calls delete on the scroll id via the clear + scroll API at the end of the method on completion or error, defaults + to true. + + Any additional keyword arguments will be passed to the initial + :meth:`~elasticsearch.Elasticsearch.search` call:: + + scan(es, + query={"query": {"match": {"title": "python"}}}, + index="orders-*", + doc_type="books" + ) + + """ + if not preserve_order: + query = query.copy() if query else {} + query["sort"] = "_doc" + # initial search + resp = client.search(body=query, scroll=scroll, size=size, + request_timeout=request_timeout, **kwargs) + + scroll_id = resp.get('_scroll_id') + if scroll_id is None: + return + + try: + first_run = True + while True: + # if we didn't set search_type to scan initial search contains data + if first_run: + first_run = False + else: + resp = client.scroll(scroll_id, scroll=scroll, request_timeout=request_timeout) + + for hit in resp['hits']['hits']: + yield hit + + # check if we have any errrors + if resp["_shards"]["failed"]: + logger.warning( + 'Scroll request has failed on %d shards out of %d.', + resp['_shards']['failed'], resp['_shards']['total'] + ) + if raise_on_error: + raise ScanError( + scroll_id, + 'Scroll request has failed on %d shards out of %d.' % + (resp['_shards']['failed'], resp['_shards']['total']) + ) + + scroll_id = resp.get('_scroll_id') + # end of scroll + if scroll_id is None or not resp['hits']['hits']: + break + finally: + if scroll_id and clear_scroll: + client.clear_scroll(body={'scroll_id': [scroll_id]}, ignore=(404, )) + +def reindex(client, source_index, target_index, query=None, target_client=None, + chunk_size=500, scroll='5m', scan_kwargs={}, bulk_kwargs={}): + + """ + Reindex all documents from one index that satisfy a given query + to another, potentially (if `target_client` is specified) on a different cluster. + If you don't specify the query you will reindex all the documents. + + Since ``2.3`` a :meth:`~elasticsearch.Elasticsearch.reindex` api is + available as part of elasticsearch itself. It is recommended to use the api + instead of this helper wherever possible. The helper is here mostly for + backwards compatibility and for situations where more flexibility is + needed. + + .. note:: + + This helper doesn't transfer mappings, just the data. + + :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use (for + read if `target_client` is specified as well) + :arg source_index: index (or list of indices) to read documents from + :arg target_index: name of the index in the target cluster to populate + :arg query: body for the :meth:`~elasticsearch.Elasticsearch.search` api + :arg target_client: optional, is specified will be used for writing (thus + enabling reindex between clusters) + :arg chunk_size: number of docs in one chunk sent to es (default: 500) + :arg scroll: Specify how long a consistent view of the index should be + maintained for scrolled search + :arg scan_kwargs: additional kwargs to be passed to + :func:`~elasticsearch.helpers.scan` + :arg bulk_kwargs: additional kwargs to be passed to + :func:`~elasticsearch.helpers.bulk` + """ + target_client = client if target_client is None else target_client + + docs = scan(client, + query=query, + index=source_index, + scroll=scroll, + **scan_kwargs + ) + def _change_doc_index(hits, index): + for h in hits: + h['_index'] = index + if 'fields' in h: + h.update(h.pop('fields')) + yield h + + kwargs = { + 'stats_only': True, + } + kwargs.update(bulk_kwargs) + return bulk(target_client, _change_doc_index(docs, target_index), + chunk_size=chunk_size, **kwargs) diff --git a/scripts/external_libs/elasticsearch/elasticsearch/helpers/test.py b/scripts/external_libs/elasticsearch/elasticsearch/helpers/test.py new file mode 100644 index 00000000..6fdd2ec0 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/helpers/test.py @@ -0,0 +1,60 @@ +import time +import os +try: + # python 2.6 + from unittest2 import TestCase, SkipTest +except ImportError: + from unittest import TestCase, SkipTest + +from elasticsearch import Elasticsearch +from elasticsearch.exceptions import ConnectionError + +def get_test_client(nowait=False, **kwargs): + # construct kwargs from the environment + kw = {'timeout': 30} + if 'TEST_ES_CONNECTION' in os.environ: + from elasticsearch import connection + kw['connection_class'] = getattr(connection, os.environ['TEST_ES_CONNECTION']) + + kw.update(kwargs) + client = Elasticsearch([os.environ.get('TEST_ES_SERVER', {})], **kw) + + # wait for yellow status + for _ in range(1 if nowait else 100): + try: + client.cluster.health(wait_for_status='yellow') + return client + except ConnectionError: + time.sleep(.1) + else: + # timeout + raise SkipTest("Elasticsearch failed to start.") + +def _get_version(version_string): + if '.' not in version_string: + return () + version = version_string.strip().split('.') + return tuple(int(v) if v.isdigit() else 999 for v in version) + +class ElasticsearchTestCase(TestCase): + @staticmethod + def _get_client(): + return get_test_client() + + @classmethod + def setUpClass(cls): + super(ElasticsearchTestCase, cls).setUpClass() + cls.client = cls._get_client() + + def tearDown(self): + super(ElasticsearchTestCase, self).tearDown() + self.client.indices.delete(index='*', ignore=404) + self.client.indices.delete_template(name='*', ignore=404) + + @property + def es_version(self): + if not hasattr(self, '_es_version'): + version_string = self.client.info()['version']['number'] + self._es_version = _get_version(version_string) + return self._es_version + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/serializer.py b/scripts/external_libs/elasticsearch/elasticsearch/serializer.py new file mode 100644 index 00000000..808db714 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/serializer.py @@ -0,0 +1,77 @@ +try: + import simplejson as json +except ImportError: + import json +import uuid +from datetime import date, datetime +from decimal import Decimal + +from .exceptions import SerializationError, ImproperlyConfigured +from .compat import string_types + +class TextSerializer(object): + mimetype = 'text/plain' + + def loads(self, s): + return s + + def dumps(self, data): + if isinstance(data, string_types): + return data + + raise SerializationError('Cannot serialize %r into text.' % data) + +class JSONSerializer(object): + mimetype = 'application/json' + + def default(self, data): + if isinstance(data, (date, datetime)): + return data.isoformat() + elif isinstance(data, Decimal): + return float(data) + elif isinstance(data, uuid.UUID): + return str(data) + raise TypeError("Unable to serialize %r (type: %s)" % (data, type(data))) + + def loads(self, s): + try: + return json.loads(s) + except (ValueError, TypeError) as e: + raise SerializationError(s, e) + + def dumps(self, data): + # don't serialize strings + if isinstance(data, string_types): + return data + + try: + return json.dumps(data, default=self.default, ensure_ascii=False) + except (ValueError, TypeError) as e: + raise SerializationError(data, e) + +DEFAULT_SERIALIZERS = { + JSONSerializer.mimetype: JSONSerializer(), + TextSerializer.mimetype: TextSerializer(), +} + +class Deserializer(object): + def __init__(self, serializers, default_mimetype='application/json'): + try: + self.default = serializers[default_mimetype] + except KeyError: + raise ImproperlyConfigured('Cannot find default serializer (%s)' % default_mimetype) + self.serializers = serializers + + def loads(self, s, mimetype=None): + if not mimetype: + deserializer = self.default + else: + # split out charset + mimetype = mimetype.split(';', 1)[0] + try: + deserializer = self.serializers[mimetype] + except KeyError: + raise SerializationError('Unknown mimetype, unable to deserialize: %s' % mimetype) + + return deserializer.loads(s) + diff --git a/scripts/external_libs/elasticsearch/elasticsearch/transport.py b/scripts/external_libs/elasticsearch/elasticsearch/transport.py new file mode 100644 index 00000000..352ad717 --- /dev/null +++ b/scripts/external_libs/elasticsearch/elasticsearch/transport.py @@ -0,0 +1,364 @@ +import time +from itertools import chain + +from .connection import Urllib3HttpConnection +from .connection_pool import ConnectionPool, DummyConnectionPool +from .serializer import JSONSerializer, Deserializer, DEFAULT_SERIALIZERS +from .exceptions import ConnectionError, TransportError, SerializationError, \ + ConnectionTimeout, ImproperlyConfigured + + +def get_host_info(node_info, host): + """ + Simple callback that takes the node info from `/_cluster/nodes` and a + parsed connection information and return the connection information. If + `None` is returned this node will be skipped. + + Useful for filtering nodes (by proximity for example) or if additional + information needs to be provided for the :class:`~elasticsearch.Connection` + class. By default master only nodes are filtered out since they shouldn't + typically be used for API operations. + + :arg node_info: node information from `/_cluster/nodes` + :arg host: connection information (host, port) extracted from the node info + """ + attrs = node_info.get('attributes', {}) + + # ignore master only nodes + if (attrs.get('data', 'true') == 'false' and + attrs.get('client', 'false') == 'false' and + attrs.get('master', 'true') == 'true'): + return None + return host + +class Transport(object): + """ + Encapsulation of transport-related to logic. Handles instantiation of the + individual connections as well as creating a connection pool to hold them. + + Main interface is the `perform_request` method. + """ + def __init__(self, hosts, connection_class=Urllib3HttpConnection, + connection_pool_class=ConnectionPool, host_info_callback=get_host_info, + sniff_on_start=False, sniffer_timeout=None, sniff_timeout=.1, + sniff_on_connection_fail=False, serializer=JSONSerializer(), serializers=None, + default_mimetype='application/json', max_retries=3, retry_on_status=(502, 503, 504, ), + retry_on_timeout=False, send_get_body_as='GET', **kwargs): + """ + :arg hosts: list of dictionaries, each containing keyword arguments to + create a `connection_class` instance + :arg connection_class: subclass of :class:`~elasticsearch.Connection` to use + :arg connection_pool_class: subclass of :class:`~elasticsearch.ConnectionPool` to use + :arg host_info_callback: callback responsible for taking the node information from + `/_cluser/nodes`, along with already extracted information, and + producing a list of arguments (same as `hosts` parameter) + :arg sniff_on_start: flag indicating whether to obtain a list of nodes + from the cluser at startup time + :arg sniffer_timeout: number of seconds between automatic sniffs + :arg sniff_on_connection_fail: flag controlling if connection failure triggers a sniff + :arg sniff_timeout: timeout used for the sniff request - it should be a + fast api call and we are talking potentially to more nodes so we want + to fail quickly. Not used during initial sniffing (if + ``sniff_on_start`` is on) when the connection still isn't + initialized. + :arg serializer: serializer instance + :arg serializers: optional dict of serializer instances that will be + used for deserializing data coming from the server. (key is the mimetype) + :arg default_mimetype: when no mimetype is specified by the server + response assume this mimetype, defaults to `'application/json'` + :arg max_retries: maximum number of retries before an exception is propagated + :arg retry_on_status: set of HTTP status codes on which we should retry + on a different node. defaults to ``(502, 503, 504)`` + :arg retry_on_timeout: should timeout trigger a retry on different + node? (default `False`) + :arg send_get_body_as: for GET requests with body this option allows + you to specify an alternate way of execution for environments that + don't support passing bodies with GET requests. If you set this to + 'POST' a POST method will be used instead, if to 'source' then the body + will be serialized and passed as a query parameter `source`. + + Any extra keyword arguments will be passed to the `connection_class` + when creating and instance unless overriden by that connection's + options provided as part of the hosts parameter. + """ + + # serialization config + _serializers = DEFAULT_SERIALIZERS.copy() + # if a serializer has been specified, use it for deserialization as well + _serializers[serializer.mimetype] = serializer + # if custom serializers map has been supplied, override the defaults with it + if serializers: + _serializers.update(serializers) + # create a deserializer with our config + self.deserializer = Deserializer(_serializers, default_mimetype) + + self.max_retries = max_retries + self.retry_on_timeout = retry_on_timeout + self.retry_on_status = retry_on_status + self.send_get_body_as = send_get_body_as + + # data serializer + self.serializer = serializer + + # store all strategies... + self.connection_pool_class = connection_pool_class + self.connection_class = connection_class + + # ...save kwargs to be passed to the connections + self.kwargs = kwargs + self.hosts = hosts + + # ...and instantiate them + self.set_connections(hosts) + # retain the original connection instances for sniffing + self.seed_connections = self.connection_pool.connections[:] + + # sniffing data + self.sniffer_timeout = sniffer_timeout + self.sniff_on_connection_fail = sniff_on_connection_fail + self.last_sniff = time.time() + self.sniff_timeout = sniff_timeout + + # callback to construct host dict from data in /_cluster/nodes + self.host_info_callback = host_info_callback + + if sniff_on_start: + self.sniff_hosts(True) + + def add_connection(self, host): + """ + Create a new :class:`~elasticsearch.Connection` instance and add it to the pool. + + :arg host: kwargs that will be used to create the instance + """ + self.hosts.append(host) + self.set_connections(self.hosts) + + def set_connections(self, hosts): + """ + Instantiate all the connections and crate new connection pool to hold + them. Tries to identify unchanged hosts and re-use existing + :class:`~elasticsearch.Connection` instances. + + :arg hosts: same as `__init__` + """ + # construct the connections + def _create_connection(host): + # if this is not the initial setup look at the existing connection + # options and identify connections that haven't changed and can be + # kept around. + if hasattr(self, 'connection_pool'): + for (connection, old_host) in self.connection_pool.connection_opts: + if old_host == host: + return connection + + # previously unseen params, create new connection + kwargs = self.kwargs.copy() + kwargs.update(host) + + if 'scheme' in host and host['scheme'] != self.connection_class.transport_schema: + raise ImproperlyConfigured( + 'Scheme specified in connection (%s) is not the same as the connection class (%s) specifies (%s).' % ( + host['scheme'], self.connection_class.__name__, self.connection_class.transport_schema + )) + return self.connection_class(**kwargs) + connections = map(_create_connection, hosts) + + connections = list(zip(connections, hosts)) + if len(connections) == 1: + self.connection_pool = DummyConnectionPool(connections) + else: + # pass the hosts dicts to the connection pool to optionally extract parameters from + self.connection_pool = self.connection_pool_class(connections, **self.kwargs) + + def get_connection(self): + """ + Retreive a :class:`~elasticsearch.Connection` instance from the + :class:`~elasticsearch.ConnectionPool` instance. + """ + if self.sniffer_timeout: + if time.time() >= self.last_sniff + self.sniffer_timeout: + self.sniff_hosts() + return self.connection_pool.get_connection() + + def _get_sniff_data(self, initial=False): + """ + Perform the request to get sniffins information. Returns a list of + dictionaries (one per node) containing all the information from the + cluster. + + It also sets the last_sniff attribute in case of a successful attempt. + + In rare cases it might be possible to override this method in your + custom Transport class to serve data from alternative source like + configuration management. + """ + previous_sniff = self.last_sniff + + try: + # reset last_sniff timestamp + self.last_sniff = time.time() + # go through all current connections as well as the + # seed_connections for good measure + for c in chain(self.connection_pool.connections, self.seed_connections): + try: + # use small timeout for the sniffing request, should be a fast api call + _, headers, node_info = c.perform_request('GET', '/_nodes/_all/clear', + timeout=self.sniff_timeout if not initial else None) + node_info = self.deserializer.loads(node_info, headers.get('content-type')) + break + except (ConnectionError, SerializationError): + pass + else: + raise TransportError("N/A", "Unable to sniff hosts.") + except: + # keep the previous value on error + self.last_sniff = previous_sniff + raise + + return list(node_info['nodes'].values()) + + def _get_host_info(self, host_info): + address_key = self.connection_class.transport_schema + '_address' + host = {} + address = host_info.get(address_key, '') + if '/' in address: + host['host'], address = address.split('/', 1) + + # malformed address + if ':' not in address: + return None + + ip, port = address.rsplit(':', 1) + + # use the ip if not overridden by publish_host + host.setdefault('host', ip) + host['port'] = int(port) + + return self.host_info_callback(host_info, host) + + def sniff_hosts(self, initial=False): + """ + Obtain a list of nodes from the cluster and create a new connection + pool using the information retrieved. + + To extract the node connection parameters use the ``nodes_to_host_callback``. + + :arg initial: flag indicating if this is during startup + (``sniff_on_start``), ignore the ``sniff_timeout`` if ``True`` + """ + node_info = self._get_sniff_data(initial) + + hosts = list(filter(None, (self._get_host_info(n) for n in node_info))) + + # we weren't able to get any nodes, maybe using an incompatible + # transport_schema or host_info_callback blocked all - raise error. + if not hosts: + raise TransportError("N/A", "Unable to sniff hosts - no viable hosts found.") + + self.set_connections(hosts) + + def mark_dead(self, connection): + """ + Mark a connection as dead (failed) in the connection pool. If sniffing + on failure is enabled this will initiate the sniffing process. + + :arg connection: instance of :class:`~elasticsearch.Connection` that failed + """ + # mark as dead even when sniffing to avoid hitting this host during the sniff process + self.connection_pool.mark_dead(connection) + if self.sniff_on_connection_fail: + self.sniff_hosts() + + def perform_request(self, method, url, params=None, body=None): + """ + Perform the actual request. Retrieve a connection from the connection + pool, pass all the information to it's perform_request method and + return the data. + + If an exception was raised, mark the connection as failed and retry (up + to `max_retries` times). + + If the operation was succesful and the connection used was previously + marked as dead, mark it as live, resetting it's failure count. + + :arg method: HTTP method to use + :arg url: absolute url (without host) to target + :arg params: dictionary of query parameters, will be handed over to the + underlying :class:`~elasticsearch.Connection` class for serialization + :arg body: body of the request, will be serializes using serializer and + passed to the connection + """ + if body is not None: + body = self.serializer.dumps(body) + + # some clients or environments don't support sending GET with body + if method in ('HEAD', 'GET') and self.send_get_body_as != 'GET': + # send it as post instead + if self.send_get_body_as == 'POST': + method = 'POST' + + # or as source parameter + elif self.send_get_body_as == 'source': + if params is None: + params = {} + params['source'] = body + body = None + + if body is not None: + try: + body = body.encode('utf-8') + except (UnicodeDecodeError, AttributeError): + # bytes/str - no need to re-encode + pass + + ignore = () + timeout = None + if params: + timeout = params.pop('request_timeout', None) + ignore = params.pop('ignore', ()) + if isinstance(ignore, int): + ignore = (ignore, ) + + for attempt in range(self.max_retries + 1): + connection = self.get_connection() + + try: + status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout) + + except TransportError as e: + if method == 'HEAD' and e.status_code == 404: + return False + + retry = False + if isinstance(e, ConnectionTimeout): + retry = self.retry_on_timeout + elif isinstance(e, ConnectionError): + retry = True + elif e.status_code in self.retry_on_status: + retry = True + + if retry: + # only mark as dead if we are retrying + self.mark_dead(connection) + # raise exception on last retry + if attempt == self.max_retries: + raise + else: + raise + + else: + if method == 'HEAD': + return 200 <= status < 300 + + # connection didn't fail, confirm it's live status + self.connection_pool.mark_live(connection) + if data: + data = self.deserializer.loads(data, headers.get('content-type')) + return data + + def close(self): + """ + Explcitly closes connections + """ + self.connection_pool.close() |