from elasticsearch import Elasticsearch, helpers
from elasticsearch_dsl import Search, Q
import json

es = Elasticsearch()

index_name = 'stemmed_py'

if es.indices.exists(index=index_name):
    print("{} already exists; let's get rid of it...".format(index_name))
    es.indices.delete(index=index_name)

es.indices.create(index=index_name, body={
    'settings': {
        'analysis': {
            'analyzer': {
                'default': {
                    'type': 'standard'
                }
            }            
        }
    }
})

# if we want to do one-off indexing of a relatively small number of documents:

for line in open('../grants.first100.json'):
    this_grant = json.loads(line)
    res = es.index(index=index_name, body=this_grant, doc_type='grant', id=this_grant['app_id'])

    print(res['created'])

    print(res['created'])
    
    
# if we've got a larger number of documents, we want to use the bulk API:

# def grant_source(idx_name):
#     for line in open('grants.first100.json'):
#         this_grant = json.loads(line)
#         yield {'_index': idx_name, '_id': this_grant['app_id'], '_type':'grant', '_source': this_grant}
#
# helpers.bulk(es, grant_source(index_name))

# helpers.bulk takes a Python iterable that produces action specifications, so we can stream a large data 
# set and not have to load it all into memory at once.


# either way, we'll want to refresh the index
es.indices.refresh(index=index_name)

####################### Time to query!

# we are using the query DSL, which is more idiomatic than the straight-up ES API
# for details, see https://elasticsearch-dsl.readthedocs.io/en/latest/

this_search = Search(using=es, index=index_name).query("match", proj_title="study")

response = this_search.execute()

print("There were {} results!".format(response.hits.total))

for hit in response:
    print(hit.proj_title)