IDDRS_API/search_tfidf/elasticSearch.py
2023-11-21 15:20:57 +01:00

69 lines
2.4 KiB
Python

from elasticsearch import Elasticsearch
def build_search_query(phrase, min_score):
search_query = {
"size": 100,
"query": {
"multi_match": {
"query": phrase,
"fields": ["Paragraph", "Title"]
}
},
"highlight": {
"fields": {
"Paragraph": {}
},
"pre_tags": [""],
"post_tags": [""],
},
"min_score": min_score
}
return search_query
def eSearch (phrase):
# Set the password for connecting to Elasticsearch
ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
ca_certs="/etc/elasticsearch/certs/http_ca.crt"
# Get the Elasticsearch password from environment variable
# ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
# Get the CA certificates path from environment variable
# ca_certs = os.environ.get('CA_CERTS')
# Create an Elasticsearch client instance to use for searching
# Connect to the local Elasticsearch instance on port 9200
# Use certificate authentication with the provided certificate
# Authenticate with the elastic user and the password set above
es = Elasticsearch(
"https://localhost:9200",
ca_certs=ca_certs,
basic_auth=("elastic", ELASTIC_PASSWORD)
)
# Search for products matching a specific category
number_of_hits = 0
min_score = 8
final_results = []
search_results = []
while number_of_hits == 0:
search_query = build_search_query(phrase, min_score)
search_results = es.search(index="iddrs", body=search_query)
number_of_hits = len(search_results["hits"]["hits"])
min_score = min_score - 1
# Process and display search results
for hit in search_results["hits"]["hits"]:
highlighted_texts = hit.get('highlight', {}).get('Paragraph', []) # Use get() to avoid KeyError
original_paragraph = hit.get('_source', {}).get('Paragraph', [])
#print(highlighted_texts)
if highlighted_texts: # Check if highlight is not None
for highlighted_text in highlighted_texts:
original_paragraph = original_paragraph.replace(highlighted_text, f"<span style='background-color:#ffff00'>{highlighted_text}</span>")
hit["_source"]["Highlight"] = original_paragraph
else:
hit["_source"]["Highlight"] = []
final_results.append(hit["_source"])
return final_results , min_score + 1