60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
from elasticsearch import Elasticsearch
|
|
from elasticsearch.helpers import bulk
|
|
import json
|
|
import os
|
|
from os import walk
|
|
from pathlib import Path
|
|
|
|
class CreateIndexES:
|
|
|
|
def __init__(self):
|
|
self.ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
|
|
self.BASE_DIR = Path(__file__).resolve().parent.parent
|
|
|
|
def createIndex(self):
|
|
# Create the client instance
|
|
es = Elasticsearch(
|
|
"https://localhost:9200",
|
|
ca_certs="/etc/elasticsearch/certs/http_ca.crt",
|
|
basic_auth=("elastic", self.ELASTIC_PASSWORD)
|
|
)
|
|
|
|
index_name = "iddrs"
|
|
|
|
mapping = {
|
|
"mappings": {
|
|
"properties": {
|
|
"Title": {"type": "text"},
|
|
"Paragraph": {"type": "text"},
|
|
}
|
|
}
|
|
}
|
|
|
|
if not es.indices.exists(index=index_name):
|
|
print("Index does not exist. Creating...")
|
|
es.indices.create(index=index_name, body=mapping)
|
|
elif es.indices.exists(index=index_name):
|
|
print("Index already exists. Deleting and recreating...")
|
|
# Delete the index (including all documents)
|
|
es.indices.delete(index=index_name, ignore=[400, 404])
|
|
es.indices.create(index=index_name, body=mapping)
|
|
|
|
#es.indices.create(index=index_name, body=mapping, ignore=400) # Ignore if the index already exists
|
|
#
|
|
filenames = next(walk(os.path.join(self.BASE_DIR,'media/data/json/')), (None, None, []))[2]
|
|
|
|
for file in filenames:
|
|
with open(os.path.join(self.BASE_DIR,'media/data/json/')+file, 'r') as f:
|
|
data = json.load(f)
|
|
actions = [
|
|
{
|
|
"_op_type": "index",
|
|
"_index": index_name,
|
|
#"_id": i + 1,
|
|
"_source": document,
|
|
}
|
|
for i, document in enumerate(data)
|
|
]
|
|
success, failed = bulk(es, actions, index=index_name, raise_on_error=False)
|
|
print(success)
|
|
|