Elasticsearch Cheatsheet

My Elasticsearch Cheatsheet using Python

Python Library

Create a Client with Python

Using IAM Authentication for AWS Elasticsearch

from elasticsearch import Elasticsearch, RequestsHttpConnection, helpers
from requests_aws4auth import AWS4Auth

aws_auth = AWS4Auth(access_key, secret_key, AWS_REGION, 'es', session_token=token)
es = Elasticsearch(
    hosts = [{'host': ES_ENDPOINT, 'port': 443}], 
    http_auth=aws_credentials, use_ssl=True, verify_certs=True, 
    connection_class=RequestsHttpConnection
)

Authenticate with HTTP Basic Auth

from elasticsearch import Elasticsearch, RequestsHttpConnection, helpers

es = Elasticsearch(
    hosts = [{'host': ES_ENDPOINT, 'port': 443}], 
    http_auth=('user', 'password'), use_ssl=True, verify_certs=True, 
    connection_class=RequestsHttpConnection
)

Elasticsearch Info Response

Get a response from ES:

>>> es.info()
{'name': 'elasticsearch-02', 'cluster_name': 'es-cluster', 'cluster_uuid': 'EJDqv5VrQyao07ndQuwhCw', 'version': {'number': '6.8.2', 'build_flavor': 'default', 'build_type': 'deb', 'build_hash': 'b506955', 'build_date': '2019-07-24T15:24:41.545295Z', 'build_snapshot': False, 'lucene_version': '7.7.0', 'minimum_wire_compatibility_version': '5.6.0', 'minimum_index_compatibility_version': '5.0.0'}, 'tagline': 'You Know, for Search'}

prettify:

>>> print(json.dumps(es.info(), indent=2))
{
  "name": "elasticsearch-02",
  "cluster_name": "es-cluster",
  "cluster_uuid": "EJDqv5VrQyao07ndQuwhCw",
  "version": {
    "number": "6.8.2",
    "build_flavor": "default",
    "build_type": "deb",
    "build_hash": "b506955",
    "build_date": "2019-07-24T15:24:41.545295Z",
    "build_snapshot": false,
    "lucene_version": "7.7.0",
    "minimum_wire_compatibility_version": "5.6.0",
    "minimum_index_compatibility_version": "5.0.0"
  },
  "tagline": "You Know, for Search"
}

Ingest

Create a document and specify a id:

res = es.index(index="test-index", doc_type='tweet', id=1, body=doc)

Bulk Ingest

from elasticsearch import Elasticsearch, RequestsHttpConnection, helpers

bulk_docs = []
list_of_dicts = [{"name": "ruan", "age": 32}, {"name": "stefan", "age": 31}]

for doc in list_of_dicts:
    doc['_index'] = 'my-index-2019.06.12'
    doc['_type'] = '_doc'
    bulk_docs.append(doc)
    del doc
    
helpers.bulk(es, bulk_docs)

View Indices

>>> es.indices.get_alias("*").keys()
dict_keys(['fluentd-2020.02.26', 'metricbeat-2020.02.25', 'filebeat-2020.02.25', 'fluentd-2020.02.25', '.tasks', 'fluentd-2020.02.24', 'metricbeat-2019', 'telegram-bot', '.kibana_1', 'metricbeat-2020.02.26', 'filebeat-2020.02.26', 'metricbeat-2020.02'])

Search

Query: {"query": {"match": {"text": "HI"}}}

>>> es.search(index="telegram-bot", doc_type="_doc", body={"query": {"match": {"text": "HI"}}})
{'took': 335, 'timed_out': False, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, 'hits': {'total': 1, 'max_score': 0.6931472, 'hits': [{'_index': 'telegram-bot', '_type': '_doc', '_id': 'x', '_score': 0.6931472, '_source': {'message_id': x, 'date': x, 'text': 'HI', 'entities': [], 'caption_entities': [], 'photo': [], 'new_chat_members': [], 'new_chat_photo': [], 'delete_chat_photo': False, 'group_chat_created': False, 'supergroup_chat_created': False, 'channel_chat_created': False}}]}}

Using Python Requests

Create a Index:

>>> response = requests.put(
    'https://es.x.x/xstats', 
    auth=('username', 'pass'), 
    headers={'content-type': 'application/json'}, 
    json={'settings': {'number_of_shards': 2}}
)