IDDRS_API/search_tfidf/elasticSearch.py

from elasticsearch import Elasticsearch

def build_search_query(phrase, min_score):

  search_query = {
    "size": 100,
    "query": {
      "multi_match": {
        "query": phrase,
        "fields": ["Paragraph", "Title"]
      }
    },
    "highlight": {
         "fields": {
             "Paragraph": {}
         },
         "pre_tags": [""],
         "post_tags": [""],
     },
    "min_score": min_score
  }

  return search_query

def eSearch (phrase):
    # Set the password for connecting to Elasticsearch
    ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
    ca_certs="/etc/elasticsearch/certs/http_ca.crt"

    # Get the Elasticsearch password from environment variable
    # ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
    # Get the CA certificates path from environment variable
    # ca_certs = os.environ.get('CA_CERTS')

    # Create an Elasticsearch client instance to use for searching
    # Connect to the local Elasticsearch instance on port 9200
    # Use certificate authentication with the provided certificate
    # Authenticate with the elastic user and the password set above
    es = Elasticsearch(
        "https://localhost:9200",
        ca_certs=ca_certs,
        basic_auth=("elastic", ELASTIC_PASSWORD)
    )

    # Search for products matching a specific category
    number_of_hits = 0
    min_score = 8
    final_results = []
    search_results = []
    while number_of_hits == 0:
        search_query = build_search_query(phrase, min_score)
        search_results = es.search(index="iddrs", body=search_query)
        number_of_hits = len(search_results["hits"]["hits"])
        min_score = min_score - 1

    # Process and display search results
    for hit in search_results["hits"]["hits"]:
        highlighted_texts = hit.get('highlight', {}).get('Paragraph', [])  # Use get() to avoid KeyError
        original_paragraph = hit.get('_source', {}).get('Paragraph', [])
        #print(highlighted_texts)
        if highlighted_texts: # Check if highlight is not None
            for highlighted_text in highlighted_texts:
                original_paragraph = original_paragraph.replace(highlighted_text, f"<span style='background-color:#ffff00'>{highlighted_text}</span>")
            hit["_source"]["Highlight"] = original_paragraph
        else:
            hit["_source"]["Highlight"] = []
        final_results.append(hit["_source"])

    return final_results , min_score + 1