IDDRS_API/data_api/CreateIndexES.py

60 lines
2.0 KiB
Python
Raw Normal View History

2023-11-20 14:31:13 +00:00
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import json
import os
from os import walk
from pathlib import Path
class CreateIndexES:
def __init__(self):
self.ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
self.BASE_DIR = Path(__file__).resolve().parent.parent
def createIndex(self):
# Create the client instance
es = Elasticsearch(
"https://localhost:9200",
ca_certs="/etc/elasticsearch/certs/http_ca.crt",
basic_auth=("elastic", self.ELASTIC_PASSWORD)
)
index_name = "iddrs"
mapping = {
"mappings": {
"properties": {
"Title": {"type": "text"},
"Paragraph": {"type": "text"},
}
}
}
if not es.indices.exists(index=index_name):
print("Index does not exist. Creating...")
es.indices.create(index=index_name, body=mapping)
elif es.indices.exists(index=index_name):
print("Index already exists. Deleting and recreating...")
# Delete the index (including all documents)
es.indices.delete(index=index_name, ignore=[400, 404])
es.indices.create(index=index_name, body=mapping)
#es.indices.create(index=index_name, body=mapping, ignore=400) # Ignore if the index already exists
#
2023-11-30 14:06:43 +00:00
filenames = next(walk(os.path.join(self.BASE_DIR,'media/data/json/')), (None, None, []))[2]
2023-11-20 14:31:13 +00:00
for file in filenames:
2023-11-30 14:06:43 +00:00
with open(os.path.join(self.BASE_DIR,'media/data/json/')+file, 'r') as f:
2023-11-20 14:31:13 +00:00
data = json.load(f)
actions = [
{
"_op_type": "index",
"_index": index_name,
#"_id": i + 1,
"_source": document,
}
for i, document in enumerate(data)
]
success, failed = bulk(es, actions, index=index_name, raise_on_error=False)
print(success)