69 lines
2.4 KiB
Python
69 lines
2.4 KiB
Python
from elasticsearch import Elasticsearch
|
|
|
|
def build_search_query(phrase, min_score):
|
|
|
|
search_query = {
|
|
"size": 100,
|
|
"query": {
|
|
"multi_match": {
|
|
"query": phrase,
|
|
"fields": ["Paragraph", "Title"]
|
|
}
|
|
},
|
|
"highlight": {
|
|
"fields": {
|
|
"Paragraph": {}
|
|
},
|
|
"pre_tags": [""],
|
|
"post_tags": [""],
|
|
},
|
|
"min_score": min_score
|
|
}
|
|
|
|
return search_query
|
|
|
|
def eSearch (phrase):
|
|
# Set the password for connecting to Elasticsearch
|
|
ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
|
|
ca_certs="/etc/elasticsearch/certs/http_ca.crt"
|
|
|
|
# Get the Elasticsearch password from environment variable
|
|
# ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
|
|
# Get the CA certificates path from environment variable
|
|
# ca_certs = os.environ.get('CA_CERTS')
|
|
|
|
# Create an Elasticsearch client instance to use for searching
|
|
# Connect to the local Elasticsearch instance on port 9200
|
|
# Use certificate authentication with the provided certificate
|
|
# Authenticate with the elastic user and the password set above
|
|
es = Elasticsearch(
|
|
"https://localhost:9200",
|
|
ca_certs=ca_certs,
|
|
basic_auth=("elastic", ELASTIC_PASSWORD)
|
|
)
|
|
|
|
# Search for products matching a specific category
|
|
number_of_hits = 0
|
|
min_score = 8
|
|
final_results = []
|
|
search_results = []
|
|
while number_of_hits == 0:
|
|
search_query = build_search_query(phrase, min_score)
|
|
search_results = es.search(index="iddrs", body=search_query)
|
|
number_of_hits = len(search_results["hits"]["hits"])
|
|
min_score = min_score - 1
|
|
|
|
# Process and display search results
|
|
for hit in search_results["hits"]["hits"]:
|
|
highlighted_texts = hit.get('highlight', {}).get('Paragraph', []) # Use get() to avoid KeyError
|
|
original_paragraph = hit.get('_source', {}).get('Paragraph', [])
|
|
#print(highlighted_texts)
|
|
if highlighted_texts: # Check if highlight is not None
|
|
for highlighted_text in highlighted_texts:
|
|
original_paragraph = original_paragraph.replace(highlighted_text, f"<span style='background-color:#ffff00'>{highlighted_text}</span>")
|
|
hit["_source"]["Highlight"] = original_paragraph
|
|
else:
|
|
hit["_source"]["Highlight"] = []
|
|
final_results.append(hit["_source"])
|
|
|
|
return final_results , min_score + 1 |