Upload Project

2023-11-20 15:31:13 +01:00 · 2023-11-20 15:31:13 +01:00 · 5592f5e7f0
commit 5592f5e7f0
parent 49eb16d0ac
168 changed files with 40547 additions and 363488 deletions
--- a/admin_api/pycache/init.cpython-310.pyc
+++ b/admin_api/pycache/init.cpython-310.pyc
--- a/admin_api/pycache/admin.cpython-310.pyc
+++ b/admin_api/pycache/admin.cpython-310.pyc
--- a/admin_api/pycache/apps.cpython-310.pyc
+++ b/admin_api/pycache/apps.cpython-310.pyc
--- a/admin_api/pycache/models.cpython-310.pyc
+++ b/admin_api/pycache/models.cpython-310.pyc
--- a/admin_api/pycache/preprocessData.cpython-310.pyc
+++ b/admin_api/pycache/preprocessData.cpython-310.pyc
--- a/admin_api/pycache/serializer.cpython-310.pyc
+++ b/admin_api/pycache/serializer.cpython-310.pyc
--- a/admin_api/pycache/urls.cpython-310.pyc
+++ b/admin_api/pycache/urls.cpython-310.pyc
--- a/admin_api/pycache/views.cpython-310.pyc
+++ b/admin_api/pycache/views.cpython-310.pyc
--- a/admin_api/migrations/0001_initial.py
+++ b/admin_api/migrations/0001_initial.py
@ -1,24 +0,0 @@
 # Generated by Django 4.1.3 on 2023-07-07 07:17
 from django.db import migrations, models
 class Migration(migrations.Migration):
    initial = True
    dependencies = [
    ]
    operations = [
        migrations.CreateModel(
            name='NewContentTracker',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('paragraphID', models.CharField(max_length=10)),
                ('levelNumber', models.CharField(max_length=200)),
                ('standardTitle', models.CharField(max_length=200)),
                ('paragraph', models.TextField()),
            ],
        ),
    ]
--- a/admin_api/migrations/pycache/0001_initial.cpython-310.pyc
+++ b/admin_api/migrations/pycache/0001_initial.cpython-310.pyc
--- a/admin_api/migrations/pycache/init.cpython-310.pyc
+++ b/admin_api/migrations/pycache/init.cpython-310.pyc
--- a/admin_api/models.py
+++ b/admin_api/models.py
@ -1,12 +0,0 @@
 from django.db import models
 # Create your models here.
 class NewContentTracker(models.Model):
    paragraphID = models.CharField(max_length=10)
    levelNumber = models.CharField(max_length=200)
    standardTitle = models.CharField(max_length=200)
    paragraph = models.TextField()
    def __str__(self):
        return str(self.id)
--- a/admin_api/preprocessData.py
+++ b/admin_api/preprocessData.py
@ -1,137 +0,0 @@
 import json
 import nltk
 from spacy.matcher import Matcher
 import spacy
 import os
 from os import walk
 from pathlib import Path
 import pandas as pd
 import re
 from nltk.stem import WordNetLemmatizer
 from nltk.tokenize import word_tokenize, MWETokenizer
 from string import punctuation
 #from App.models import Level
 class PreprocessData:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_lg')
        self.stopwords = nltk.corpus.stopwords.words('english')
        self.wordnet_lemmatizer = WordNetLemmatizer()
        self.BASE_DIR = Path(__file__).resolve().parent.parent
    def remove_punctuation(self, text):
        my_punctuation = punctuation.replace(".", "")
        my_punctuation = my_punctuation.replace("/", "")
        punctuationfree="".join([i for i in text if i not in my_punctuation])
        return punctuationfree
    def remove_stopwords(self, text):
        output= [i for i in text if i not in self.stopwords]
        return output
    def tokenization(self, text):
        tokens = re.split('\W',text)
        return tokens
    def lemmatizer(self, text):
        lemm_text = [self.wordnet_lemmatizer.lemmatize(word) for word in text]
        return lemm_text
    def merge_files(self, levels):
        levels.sort()
        allData = list()
        for level in levels:
            filenames = next(walk(os.path.join(self.BASE_DIR,'static/data/'+level)), (None, None, []))[2]
            for file in filenames:
                with open(os.path.join(self.BASE_DIR,'static/data/'+level+"/"+file), 'r') as f:
                    data = json.load(f)
                    for dd in data:
                        dd['LevelName'] = level
                        dd['Module'] = file[11:-5].replace('-',' ')
                        dd['Title'] = file[:-5]
                    allData.extend(data)
        for idx, d in enumerate(allData):
            d['ParagraphID'] = idx
        return allData
    def add_compliance(self, paragraphs):
        df = pd.DataFrame(paragraphs)
        matcher = Matcher(self.nlp.vocab)
        pattern1 = [{'LOWER':'shall'}]
        pattern2 = [{'LOWER':'should'}]
        pattern3 = [{'LOWER':'may'}]
        pattern4 = [{'LOWER':'must'}]
        pattern5 = [{'LOWER':'can'}]
        matcher.add('Shall',[pattern1])
        matcher.add('Should',[pattern2])
        matcher.add('May',[pattern3])
        matcher.add('Must',[pattern4])
        matcher.add('Can',[pattern5])
        for index, row in df.iterrows():
            doc = self.nlp(row['Paragraph'])
            found_matches = matcher(doc)
            if found_matches:
                for match_id, start, end in found_matches:
                    string_id = self.nlp.vocab.strings[match_id]
                    span = doc[start:end]
                    if string_id == 'Shall':
                        df.loc[index,'Shall'] = 1
                    if string_id == 'Should':
                        df.loc[index,'Should'] = 1
                    if string_id == 'May':
                        df.loc[index,'May'] = 1
                    if string_id == 'Must':
                        df.loc[index,'Must'] = 1
                    if string_id == 'Can':
                        df.loc[index,'Can'] = 1
        return df
    def title_sent(self, title, sent):
        new_sent = title+': '+sent
        return new_sent
    def split_into_sentneces(self, data):
        df = data
        df['Sentence'] = df['Paragraph'].apply(lambda xx:nltk.tokenize.sent_tokenize(xx))
        df = df.explode("Sentence").reset_index(drop=True)
        df['ProcessedSent']= df['Sentence'].apply(lambda xx:self.remove_punctuation(xx))
        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: xx.lower())
        tokenizer = MWETokenizer()
        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: tokenizer.tokenize(word_tokenize(xx)))
        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: self.remove_stopwords(xx))
        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: self.lemmatizer(xx))
        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: ' '.join(xx))
        df['ProcessedSent']= df.Module+' '+ df.ProcessedSent
        return df
    def pre_process_files(self):
        print('Pre-processing started')
        levels = next(walk(os.path.join(self.BASE_DIR, 'static/data')), (None, None, []))[1]
        # levels = ['1 General IDDRS']
        paragraphs = self.merge_files(levels)
        df = self.add_compliance(paragraphs)
        processed_data = self.split_into_sentneces(df)
        processed_data.to_json(os.path.join(self.BASE_DIR, 'static/searchable/data2.json'), orient='records', indent=4)
        print('Pre-processing finished')
--- a/admin_api/serializer.py
+++ b/admin_api/serializer.py
@ -1,7 +0,0 @@
 from rest_framework import serializers
 from .models import NewContentTracker
 class NewContentTrackerSerializer(serializers.ModelSerializer):
    class Meta:
        model = NewContentTracker
        fields = '__all__'
--- a/admin_api/urls.py
+++ b/admin_api/urls.py
@ -1,32 +0,0 @@
 from django.urls import path, include
 from rest_framework import routers
 from .views import LevelViewSet, StandardsViewSet, NewContentTrackerViewSet
 from . import views
 router = routers.DefaultRouter()
 router.register(r'levels', LevelViewSet)
 router.register(r'standards', StandardsViewSet)
 router.register(r'NewContentTracker', NewContentTrackerViewSet)
 urlpatterns = [
    path('', include(router.urls)),
    path('content-list/', views.contentList, name='content-list'),
    path('content-create/', views.contentCreate, name='content-create'),
    path('content-detail/', views.contentDetail, name='content-detail'),
    path('content-update/', views.contentUpdate, name='content-update'),
    path('content-delete/', views.contentDelete, name='content-delete'),
    path('level-submit/', views.levelSubmit, name='level-submit'),
    path('level-delete/', views.levelDelete, name='level-delete'),
    path('standard-submit/', views.standardSubmit, name='standard-submit'),
    path('standard-delete/', views.standardDelete, name='standard-delete'),
    path('pre-process/', views.preprocess, name='pre-process'),
    path('api/', include('admin_api.api.urls')),
    path('login/', views.loginPage, name='login'),
    path('logout/', views.logoutPage, name='logout'),
 ]
--- a/admin_api/views.py
+++ b/admin_api/views.py
@ -1,314 +0,0 @@
 from http.client import HTTPResponse
 from django.shortcuts import render, redirect
 from rest_framework import viewsets
 from search_tfidf.models import Level, Standards
 from .models import NewContentTracker
 from .serializer import NewContentTrackerSerializer
 from search_tfidf.serializer import LevelSerializer, StandardsSerializer
 from django.http import JsonResponse
 from django.views.decorators.csrf import csrf_exempt
 import json
 import os
 from pathlib import Path
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 from django.db.models import Max
 import shutil
 from django.contrib.auth import authenticate, login, logout
 from django.contrib import messages
 from .preprocessData import PreprocessData
 # Create your views here.
@permission_classes([IsAuthenticated])
 class LevelViewSet(viewsets.ModelViewSet):
    queryset = Level.objects.all()
    serializer_class = LevelSerializer
 class StandardsViewSet(viewsets.ModelViewSet):
    queryset = Standards.objects.all()
    serializer_class = StandardsSerializer
 class NewContentTrackerViewSet(viewsets.ModelViewSet):
    queryset = NewContentTracker.objects.all()
    serializer_class = NewContentTrackerSerializer
 BASE_DIR = Path(__file__).resolve().parent.parent
 # ################################################################
 # #######################Contents#################################
 # ################################################################
@csrf_exempt
@api_view(['GET'])
 def contentList(request):
    # Get the values from the request parameters
    level = request.GET.get('level')
    standard = request.GET.get('standard')
    data = ""
    module_path  = filePath(level, standard)
    # Read the JSON file
    with open(module_path) as f:
        data = json.load(f)
    # Return the filtered data as a JSON response
    return JsonResponse({'contents': data})
@csrf_exempt
@api_view(['POST'])
 def contentCreate(request):
    level = request.data['params']['level']
    standard = request.data['params']['standard']
    #print(request.data['params']['formData']['Heading1'])
    module_path  = filePath(level, standard)
 # 
    levelInfo = Level.objects.get(levelNumber=level)
    standardInfo = Standards.objects.get(standardTitle = standard)
 # 
    with open(module_path) as json_file:
        data = json.load(json_file)
 # 
    new_id = 0
    ids = []
    for obj in data:
        ids.append(obj['ID'])
    if len(ids) > 0:
        new_id = max(ids)+1
 # 
    new_obj = request.data['params']['formData']
    new_obj['ID'] = new_id
    new_obj['Color'] = levelInfo.levelColor
    new_obj['Level'] = levelInfo.levelNumber
    new_obj['LevelName'] = levelInfo.levelName
    new_obj['Title'] = standardInfo.standardTitle
    new_obj['Module'] = standardInfo.standardTitle
    data.append(new_obj)
 # 
    with open(module_path, 'w') as f:
        json.dump(data, f, indent=4)
    haveToPreProcess(new_id, levelInfo.levelNumber, standardInfo.standardTitle, new_obj['Paragraph'])
    return Response('')
@csrf_exempt
@api_view(['GET'])
 def contentDetail(request):
    level = request.GET.get('level')
    standard = request.GET.get('standard')
    id = request.GET.get('id')
    module_path  = filePath(level, standard)
    with open(module_path) as f:
        data = json.load(f)
    for obj in data:
        if obj['ID'] == int(id):
            data = obj
    return JsonResponse({'paragraph': data})
@csrf_exempt
@api_view(['POST'])
 def contentUpdate(request):
    level = request.data['params']['level']
    standard = request.data['params']['standard']
    id = request.data['params']['id']
    updated_content = request.data['params']['formData']
    module_path  = filePath(level, standard)
    with open(module_path) as f:
        data = json.load(f)
    for obj in data:
        if obj['ID'] == int(id):
            obj['Heading1'] = updated_content['Heading1']
            obj['Heading2'] = updated_content['Heading2']
            obj['Heading3'] = updated_content['Heading3']
            obj['Heading4'] = updated_content['Heading4']
            obj['Paragraph'] = updated_content['Paragraph']
            obj['PageNum'] = updated_content['PageNum']
    with open(module_path, 'w') as f:
        json.dump(data, f)
    haveToPreProcess(obj['ID'], level, standard, updated_content['Paragraph'])
    return Response('')
@csrf_exempt
@api_view(['POST'])
 def contentDelete(request):
    level = request.data['params']['level']
    standard = request.data['params']['standard']
    id = request.data['params']['id']
    print(level,standard,id)
    module_path  = filePath(level, standard)
    with open(module_path) as f:
       data = json.load(f)
    for i in range(len(data)):
       if data[i]['ID'] == int(id):
           data.pop(i)
           break
    with open(module_path, 'w') as f:
       json.dump(data, f)
    haveToPreProcess(id, level, standard, 'Deleted')
    return Response('')
 # ################################################################
 # #######################Levels###################################
 # ################################################################
@csrf_exempt
@api_view(['POST'])
 def levelSubmit(request):
    data = request.data['params']['editingRow']
    if 'id' in data:
        level = Level.objects.get(id=data['id'])
        serializer = LevelSerializer(instance=level, data=data)
        if serializer.is_valid():
            serializer.save()
    else:
        newLevelNumber = Level.objects.aggregate(Max('levelNumber'))['levelNumber__max']+1
        data['levelNumber'] = newLevelNumber
        serializer = LevelSerializer(data=data)
        if serializer.is_valid():
            serializer.save()
            level_new_dir = os.path.join(BASE_DIR, 'static/data/'+str(newLevelNumber)+'/')
            os.makedirs(level_new_dir, exist_ok=True)
    return Response('')
@csrf_exempt
@api_view(['POST'])
 def levelDelete(request):
    data = request.data['params']['rowData']
    level = Level.objects.get(id=data['id'])
    level.delete()
    level_del_dir = os.path.join(BASE_DIR, 'static/data/'+str(data['levelNumber'])+'/')
    shutil.rmtree(level_del_dir)
    haveToPreProcess(data['id'], data['levelNumber'], 'No', 'LevelDeleted')
    return Response('')
 # ################################################################
 # #######################Standards################################
 # ################################################################
@csrf_exempt
@api_view(['POST'])
 def standardSubmit(request):
    data = request.data['params']['editingRow']
    if 'id' in data:
        standard = Standards.objects.get(id = data['id'])
        current_path = os.path.join(BASE_DIR, 'static/data/'+str(standard.standardLevel)+'/'+standard.standardTitle+'.json')
        new_path = os.path.join(BASE_DIR, 'static/data/'+str(standard.standardLevel)+'/'+data['standardTitle']+'.json')
        serializer = StandardsSerializer(instance=standard, data=data)
        if serializer.is_valid():
            serializer.save()
            os.rename(current_path, new_path)
    else:
        levelRow = Level.objects.get(id = data['levelID'])
        data['levelID'] = int(data['levelID'])
        data['standardLevel'] = levelRow.levelNumber
        serializer = StandardsSerializer(data=data)
        if serializer.is_valid():
            serializer.save()
            newFile=[]
            standard_new = os.path.join(BASE_DIR, 'static/data/'+str(levelRow.levelNumber)+'/'+data['standardTitle']+'.json')
            with open(standard_new, 'w') as file:
                # Write the JSON data to the file
                json.dump(newFile, file)
            file.close()
        else:
            print(serializer.errors)
    return Response('')
@csrf_exempt
@api_view(['POST'])
 def standardDelete(request):
    data = request.data['params']['rowData']
    standard = Standards.objects.get(id=data['id'])
    standard.delete()
    haveToPreProcess(data['id'], data['standardLevel'], data['standardTitle'], 'StandardDeleted')
    return Response('Item successfully deleted!')
 # ################################################################
 # #######################pre-process##############################
 # ################################################################
@csrf_exempt
@api_view(['POST'])
 def preprocess(request):
    process_files = PreprocessData()
    process_files.pre_process_files()
    NewContentTracker.objects.all().delete()
    return Response('Procssed successfully')
 # ################################################################
 # #######################Authentication###########################
 # ################################################################
 def loginPage(request):
    pass
 def logoutPage(request):
    pass
 # ################################################################
 # ################################################################
 # ################################################################
 def filePath(level_input, standard_input):    
    standards_dir = os.path.join(BASE_DIR, 'static/data/')
    file_path = ''
    levels = next(os.walk(os.path.join(BASE_DIR, 'static/data')), (None, None, []))[1]
    if str(level_input) in levels:
        filenames = next(os.walk(standards_dir+level_input), (None, None, []))[2]
        for file in filenames:
            if str(standard_input) in file:
                file_path = standards_dir+str(level_input)+'/'+file
    return file_path
 def haveToPreProcess(id, levelNumber, standardTitle, paragraph):
    ######################################################
    ############NewContentTracker#########################
    newContent = {}
    newContent['paragraphID'] = id
    newContent['levelNumber'] = levelNumber
    newContent['standardTitle'] = standardTitle
    newContent['paragraph'] = paragraph
    serializer = NewContentTrackerSerializer(data=newContent)
    if serializer.is_valid():
        serializer.save()
    return ('Added')
--- a/data_api/CreateIndexES.py
+++ b/data_api/CreateIndexES.py
@ -0,0 +1,60 @@
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import bulk
 import json
 import os
 from os import walk
 from pathlib import Path
 class CreateIndexES:
    def __init__(self):
        self.ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
        self.BASE_DIR = Path(__file__).resolve().parent.parent
    def createIndex(self):
        # Create the client instance
        es = Elasticsearch(
            "https://localhost:9200",
            ca_certs="/etc/elasticsearch/certs/http_ca.crt",
            basic_auth=("elastic", self.ELASTIC_PASSWORD)
        )
        index_name = "iddrs"
        mapping = {
        "mappings": {
            "properties": {
                "Title": {"type": "text"},
                "Paragraph": {"type": "text"},
            }
        }
    }
        if not es.indices.exists(index=index_name):
            print("Index does not exist. Creating...")
            es.indices.create(index=index_name, body=mapping)
        elif es.indices.exists(index=index_name):
            print("Index already exists. Deleting and recreating...")
            # Delete the index (including all documents)
            es.indices.delete(index=index_name, ignore=[400, 404])
            es.indices.create(index=index_name, body=mapping)
        #es.indices.create(index=index_name, body=mapping, ignore=400)  # Ignore if the index already exists
 #
        filenames = next(walk(os.path.join(self.BASE_DIR,'static/data/json/')), (None, None, []))[2]
        for file in filenames:
            with open(os.path.join(self.BASE_DIR,'static/data/json/')+file, 'r') as f:
                data = json.load(f)
            actions = [
                {
                    "_op_type": "index",
                    "_index": index_name,
                    #"_id": i + 1,
                    "_source": document,
                }
                for i, document in enumerate(data)
            ]
            success, failed = bulk(es, actions, index=index_name, raise_on_error=False)
            print(success)
--- a/data_api/PreprocessFile.py
+++ b/data_api/PreprocessFile.py
@ -0,0 +1,243 @@
 from docx import Document
 import os
 import fitz
 import re
 import uuid
 import shutil
 import json
 import logging
 from pathlib import Path
 import spacy
 from spacy.matcher import Matcher
 from .models import Levels, StandardsList
 from .CreateIndexES import CreateIndexES
 class PreprocessFile:
    def __init__(self):
        self.BASE_DIR = Path(__file__).resolve().parent.parent
    def find_summary_page(self, pdf_path, summary_text):
        doc = fitz.open(pdf_path)
        summary_count = 0
        for page_num in range(len(doc)):
            page = doc[page_num]
            text_instances = page.get_text("text")
            # Counting the number of occurrences of the summary text on the page
            summary_count += text_instances.count(summary_text)
            if summary_count >= 2:
                return page_num
            else:
                page_num = 0
        return page_num
    def find_text_in_pdf_from_summary(self, pdf_path, search_text, summary_text):
        summary_page = self.find_summary_page(pdf_path, summary_text)
        if summary_page is None:
            # print("Summary not found in the PDF.")
            return None
        doc = fitz.open(pdf_path)
        for page_num in range(
            summary_page + 1, len(doc)
        ):  # Start searching after the 2nd summary
            page = doc[page_num]
            text_instances = page.get_text("text")
            # Use regex to find instances of search_text without anything following it on the same line
            regex_pattern = re.compile(rf"\b{search_text}\b(?![^\n]*\S)", re.IGNORECASE)
            match = regex_pattern.search(text_instances)
            if match:
                # print(f"Text found on page {page_num + 1}, after the 2nd summary.")
                return page_num + 1
        # print("Text not found in the PDF.")
        return None
    # Custom serialization function
    def custom_json_serialization(self, text):
        # Replace newline characters with spaces
        return text.replace("\n", " ")
    def process_standards(self):
        # BASE_DIR = Path(__file__).resolve().parent.parent
        nlp = spacy.load("en_core_web_sm")
        matcher = Matcher(nlp.vocab)
        pattern1 = [{"LOWER": "shall"}]
        pattern2 = [{"LOWER": "should"}]
        pattern3 = [{"LOWER": "may"}]
        pattern4 = [{"LOWER": "must"}]
        pattern5 = [{"LOWER": "can"}]
        matcher.add("Shall", [pattern1])
        matcher.add("Should", [pattern2])
        matcher.add("May", [pattern3])
        matcher.add("Must", [pattern4])
        matcher.add("Can", [pattern5])
        root_json_dir = os.path.join(
            self.BASE_DIR, "static/data/json/"
        )  # "../Standards/json"
        root_data_json_dir = os.path.join(
            self.BASE_DIR, "static/data/"
        )  # "../Standards/json"
        summary_text = "Summary"
        data = []
        # for logging
        number_of_successed_files = 0
        number_of_sections = 0
        number_of_sections_after_cleaning = 0
        number_of_cleaned_sections = 0
        # Check if the json directory exists
        if os.path.exists(root_json_dir):
            # Delete the directory and its contents
            shutil.rmtree(root_json_dir)
        # Create a new directory
        os.makedirs(root_json_dir)
        # Configure logging settings
        log_file = os.path.join(
            self.BASE_DIR, "static/data/json_log.log"
        )  # "../Standards/json/json_log.log"  # Specify the path and filename for the log file
        logging.basicConfig(
            filename=log_file,  # Set the log file
            level=logging.DEBUG,  # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
            format="%(asctime)s - %(levelname)s - %(message)s",  # Define log message format
            datefmt="%Y-%m-%d %H:%M:%S",  # Define date/time format
        )
        for standard in StandardsList.objects.all():
            standard_file_pdf = standard.standardFilePDF
            standard_file_word = standard.standardFileWord
            json_path = os.path.join(root_json_dir, standard.standardTitle + ".json")
            print(json_path)
            sections = []
            current_section = ""
            data_per_file = []
            # logging has to come here
            number_of_sections_after_cleaning_per_file = 0
            try:
                word_doc = Document(standard_file_word.path)
                for paragraph in word_doc.paragraphs:
                    if (
                        paragraph.style.name.startswith("Heading")
                        or "Section" in paragraph.style.name
                        or "Sub-section" in paragraph.style.name
                    ):
                        # If we're starting a new section, save off the old one
                        if current_section:
                            sections.append(current_section.strip())
                            current_section = ""
                        current_section += paragraph.text + "\n"
                    else:
                        # Otherwise, append text to current section
                        current_section += paragraph.text + "\n"
                # Append the last section to the list of sections if it exists
                if current_section.strip():
                    sections.append(current_section.strip())
                # print all sections
                for index, section in enumerate(sections):
                    # for logging
                    number_of_sections += 1
                    if section != "" and len(section.split()) > 25:
                        # for logging
                        number_of_sections_after_cleaning += 1
                        number_of_sections_after_cleaning_per_file += 1
                        first_line = section.strip().splitlines()[0]
                        text_to_search = first_line
                        page_num = self.find_text_in_pdf_from_summary(
                            standard_file_pdf, text_to_search, summary_text
                        )
                        doc = nlp(section)
                        found_matches = matcher(doc)
                        shall = should = may = must = can = False
                        if found_matches:
                            for match_id, start, end in found_matches:
                                string_id = nlp.vocab.strings[match_id]
                                span = doc[start:end]
                                if string_id == "Shall":
                                    shall = True
                                if string_id == "Should":
                                    should = True
                                if string_id == "May":
                                    may = True
                                if string_id == "Must":
                                    must = True
                                if string_id == "Can":
                                    can = True
                        section_boj = {
                            "ID": str(uuid.uuid4()),
                            "Color": standard.levelID.levelColor,
                            "Level": str(standard.levelNumber),
                            "LevelName": "",
                            "Title": standard.standardTitle,
                            "Heading1": "",
                            "Heading2": "",
                            "Heading3": "",
                            "Heading4": "",
                            "Module": standard.standardTitle,
                            "PageNum": page_num,
                            "Paragraph": self.custom_json_serialization(section),
                            "Can": can,
                            "May": may,
                            "Shall": shall,
                            "Should": should,
                            "Must": must,
                        }
                        data_per_file.append(section_boj)
                        data.append(section_boj)
                    else:
                        # for logging
                        number_of_cleaned_sections + 1
            except Exception as e:
                print(
                    f"An error occurred while processing {standard.standardTitle}: {str(e)}"
                )
            try:
                with open(json_path, "w") as json_file:
                    json.dump(data_per_file, json_file, indent=4)
                    logging.info(
                        f"{number_of_sections_after_cleaning_per_file} seactions has been saved to: {json_path}"
                    )
                    number_of_successed_files += 1
            except Exception as e:
                logging.error(
                    f"Number of successed saved files: {number_of_successed_files}"
                )
        logging.info(f"Number of successed saved files: {number_of_successed_files}")
        logging.info(f"Number of seactions: {number_of_sections}")
        logging.info(
            f"Number of seactions after cleaning: {number_of_sections_after_cleaning}"
        )
        logging.info(f"Number of cleaned seactions: {number_of_cleaned_sections}")
        with open(root_data_json_dir + "/data.json", "w") as json_file:
            # Use json.dump to write the data to the file
            json.dump(
                data, json_file, indent=4
            )  # Use indent for pretty formatting (optional)
        # Close the log file (optional, usually done automatically)
        logging.shutdown()
--- a/admin_api/init.py
+++ b/admin_api/init.py
--- a/data_api/pycache/CreateIndexES.cpython-310.pyc
+++ b/data_api/pycache/CreateIndexES.cpython-310.pyc
--- a/data_api/pycache/PreprocessFile.cpython-310.pyc
+++ b/data_api/pycache/PreprocessFile.cpython-310.pyc
--- a/data_api/pycache/init.cpython-310.pyc
+++ b/data_api/pycache/init.cpython-310.pyc
--- a/data_api/pycache/admin.cpython-310.pyc
+++ b/data_api/pycache/admin.cpython-310.pyc
--- a/data_api/pycache/apps.cpython-310.pyc
+++ b/data_api/pycache/apps.cpython-310.pyc
--- a/data_api/pycache/forms.cpython-310.pyc
+++ b/data_api/pycache/forms.cpython-310.pyc
--- a/data_api/pycache/models.cpython-310.pyc
+++ b/data_api/pycache/models.cpython-310.pyc
--- a/data_api/pycache/serializers.cpython-310.pyc
+++ b/data_api/pycache/serializers.cpython-310.pyc
--- a/data_api/pycache/urls.cpython-310.pyc
+++ b/data_api/pycache/urls.cpython-310.pyc
--- a/data_api/pycache/views.cpython-310.pyc
+++ b/data_api/pycache/views.cpython-310.pyc
--- a/admin_api/admin.py
+++ b/admin_api/admin.py
--- a/admin_api/api/init.py
+++ b/admin_api/api/init.py
--- a/admin_api/api/pycache/init.cpython-310.pyc
+++ b/admin_api/api/pycache/init.cpython-310.pyc
--- a/admin_api/api/pycache/urls.cpython-310.pyc
+++ b/admin_api/api/pycache/urls.cpython-310.pyc
--- a/admin_api/api/pycache/views.cpython-310.pyc
+++ b/admin_api/api/pycache/views.cpython-310.pyc
--- a/admin_api/api/serilaizer.py
+++ b/admin_api/api/serilaizer.py
--- a/admin_api/api/urls.py
+++ b/admin_api/api/urls.py
--- a/admin_api/api/views.py
+++ b/admin_api/api/views.py
@ -1,7 +1,5 @@
 from django.http import JsonResponse
 from rest_framework.response import Response
 from rest_framework.decorators import api_view
 from rest_framework_simplejwt.serializers import TokenObtainPairSerializer
 from rest_framework_simplejwt.views import TokenObtainPairView
--- a/admin_api/apps.py
+++ b/admin_api/apps.py
@ -1,6 +1,6 @@
 from django.apps import AppConfig
-class AdminApiConfig(AppConfig):
+class DataApiConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
-    name = 'admin_api'
+    name = 'data_api'
--- a/data_api/forms.py
+++ b/data_api/forms.py
@ -0,0 +1,7 @@
 from django import forms
 from .models import StandardsList
 class StandardUploadForm(forms.ModelForm):
    class Meta:
        model = StandardsList
        fields = '__all__'
--- a/data_api/migrations/0001_initial.py
+++ b/data_api/migrations/0001_initial.py
@ -0,0 +1,34 @@
 # Generated by Django 4.1.3 on 2023-11-03 09:07
 from django.db import migrations, models
 import django.db.models.deletion
 class Migration(migrations.Migration):
    initial = True
    dependencies = [
    ]
    operations = [
        migrations.CreateModel(
            name='Levels',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('levelTitle', models.CharField(max_length=200)),
                ('levelColor', models.CharField(max_length=200)),
                ('levelNumber', models.IntegerField()),
            ],
        ),
        migrations.CreateModel(
            name='Standards',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('standardFile', models.FileField(blank=True, null=True, upload_to='')),
                ('standardTitle', models.CharField(max_length=200)),
                ('standardPath', models.CharField(blank=True, max_length=200, null=True)),
                ('levelID', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='data_api.levels')),
            ],
        ),
    ]
--- a/data_api/migrations/0002_rename_standards_standardslist.py
+++ b/data_api/migrations/0002_rename_standards_standardslist.py
@ -0,0 +1,17 @@
 # Generated by Django 4.1.3 on 2023-11-03 09:08
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0001_initial'),
    ]
    operations = [
        migrations.RenameModel(
            old_name='Standards',
            new_name='StandardsList',
        ),
    ]
--- a/data_api/migrations/0003_rename_standardfile_standardslist_standardfilepdf_and_more.py
+++ b/data_api/migrations/0003_rename_standardfile_standardslist_standardfilepdf_and_more.py
@ -0,0 +1,33 @@
 # Generated by Django 4.1.3 on 2023-11-03 21:15
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0002_rename_standards_standardslist'),
    ]
    operations = [
        migrations.RenameField(
            model_name='standardslist',
            old_name='standardFile',
            new_name='standardFilePDF',
        ),
        migrations.RenameField(
            model_name='standardslist',
            old_name='standardPath',
            new_name='standardPathPDF',
        ),
        migrations.AddField(
            model_name='standardslist',
            name='standardFileWord',
            field=models.FileField(blank=True, null=True, upload_to=''),
        ),
        migrations.AddField(
            model_name='standardslist',
            name='standardPathWord',
            field=models.CharField(blank=True, max_length=200, null=True),
        ),
    ]
--- a/data_api/migrations/0004_standardslist_uploaded_at_and_more.py
+++ b/data_api/migrations/0004_standardslist_uploaded_at_and_more.py
@ -0,0 +1,31 @@
 # Generated by Django 4.1.3 on 2023-11-06 08:50
 import data_api.models
 from django.db import migrations, models
 import django.utils.timezone
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0003_rename_standardfile_standardslist_standardfilepdf_and_more'),
    ]
    operations = [
        migrations.AddField(
            model_name='standardslist',
            name='uploaded_at',
            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
            preserve_default=False,
        ),
        migrations.AlterField(
            model_name='standardslist',
            name='standardFilePDF',
            field=models.FileField(blank=True, null=True, upload_to=data_api.models.dynamic_upload_to),
        ),
        migrations.AlterField(
            model_name='standardslist',
            name='standardFileWord',
            field=models.FileField(blank=True, null=True, upload_to=data_api.models.dynamic_upload_to),
        ),
    ]
--- a/data_api/migrations/0005_standardslist_levelnumber_and_more.py
+++ b/data_api/migrations/0005_standardslist_levelnumber_and_more.py
@ -0,0 +1,29 @@
 # Generated by Django 4.1.3 on 2023-11-06 12:41
 import data_api.models
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0004_standardslist_uploaded_at_and_more'),
    ]
    operations = [
        migrations.AddField(
            model_name='standardslist',
            name='levelNumber',
            field=models.IntegerField(blank=True, null=True),
        ),
        migrations.AlterField(
            model_name='standardslist',
            name='standardFilePDF',
            field=models.FileField(upload_to=data_api.models.dynamic_upload_to),
        ),
        migrations.AlterField(
            model_name='standardslist',
            name='standardFileWord',
            field=models.FileField(upload_to=data_api.models.dynamic_upload_to),
        ),
    ]
--- a/data_api/migrations/0006_remove_standardslist_standardpathpdf_and_more.py
+++ b/data_api/migrations/0006_remove_standardslist_standardpathpdf_and_more.py
@ -0,0 +1,21 @@
 # Generated by Django 4.1.3 on 2023-11-06 12:49
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0005_standardslist_levelnumber_and_more'),
    ]
    operations = [
        migrations.RemoveField(
            model_name='standardslist',
            name='standardPathPDF',
        ),
        migrations.RemoveField(
            model_name='standardslist',
            name='standardPathWord',
        ),
    ]
--- a/data_api/migrations/0007_standardslist_standardcolor.py
+++ b/data_api/migrations/0007_standardslist_standardcolor.py
@ -0,0 +1,18 @@
 # Generated by Django 4.1.3 on 2023-11-20 10:14
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0006_remove_standardslist_standardpathpdf_and_more'),
    ]
    operations = [
        migrations.AddField(
            model_name='standardslist',
            name='standardColor',
            field=models.CharField(blank=True, max_length=200),
        ),
    ]
--- a/data_api/migrations/0008_fileevent.py
+++ b/data_api/migrations/0008_fileevent.py
@ -0,0 +1,22 @@
 # Generated by Django 4.1.3 on 2023-11-20 10:47
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0007_standardslist_standardcolor'),
    ]
    operations = [
        migrations.CreateModel(
            name='FileEvent',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('event_type', models.CharField(choices=[('UPLOAD', 'File Uploaded'), ('DELETE', 'File Deleted')], max_length=10)),
                ('file_name', models.CharField(max_length=255)),
                ('timestamp', models.DateTimeField(auto_now_add=True)),
            ],
        ),
    ]
--- a/data_api/migrations/0009_fileevent_indexed.py
+++ b/data_api/migrations/0009_fileevent_indexed.py
@ -0,0 +1,18 @@
 # Generated by Django 4.1.3 on 2023-11-20 13:03
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0008_fileevent'),
    ]
    operations = [
        migrations.AddField(
            model_name='fileevent',
            name='indexed',
            field=models.BooleanField(default=False),
        ),
    ]
--- a/data_api/migrations/0010_remove_fileevent_indexed_fileevent_filestatus.py
+++ b/data_api/migrations/0010_remove_fileevent_indexed_fileevent_filestatus.py
@ -0,0 +1,22 @@
 # Generated by Django 4.1.3 on 2023-11-20 13:39
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('data_api', '0009_fileevent_indexed'),
    ]
    operations = [
        migrations.RemoveField(
            model_name='fileevent',
            name='indexed',
        ),
        migrations.AddField(
            model_name='fileevent',
            name='fileStatus',
            field=models.CharField(default='Pending', max_length=255),
        ),
    ]
--- a/admin_api/migrations/init.py
+++ b/admin_api/migrations/init.py
--- a/data_api/migrations/pycache/0001_initial.cpython-310.pyc
+++ b/data_api/migrations/pycache/0001_initial.cpython-310.pyc
--- a/data_api/migrations/pycache/0002_rename_standards_standardslist.cpython-310.pyc
+++ b/data_api/migrations/pycache/0002_rename_standards_standardslist.cpython-310.pyc
--- a/data_api/migrations/pycache/0003_rename_standardfile_standardslist_standardfilepdf_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0003_rename_standardfile_standardslist_standardfilepdf_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0004_standardslist_uploaded_at_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0004_standardslist_uploaded_at_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0005_standardslist_levelnumber_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0005_standardslist_levelnumber_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0006_remove_standardslist_standardpathpdf_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0006_remove_standardslist_standardpathpdf_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0007_standardslist_standardcolor.cpython-310.pyc
+++ b/data_api/migrations/pycache/0007_standardslist_standardcolor.cpython-310.pyc
--- a/data_api/migrations/pycache/0008_fileevent.cpython-310.pyc
+++ b/data_api/migrations/pycache/0008_fileevent.cpython-310.pyc
--- a/data_api/migrations/pycache/0009_fileevent_indexed.cpython-310.pyc
+++ b/data_api/migrations/pycache/0009_fileevent_indexed.cpython-310.pyc
--- a/data_api/migrations/pycache/0010_remove_fileevent_indexed_fileevent_filestatus.cpython-310.pyc
+++ b/data_api/migrations/pycache/0010_remove_fileevent_indexed_fileevent_filestatus.cpython-310.pyc
--- a/data_api/migrations/pycache/init.cpython-310.pyc
+++ b/data_api/migrations/pycache/init.cpython-310.pyc
--- a/data_api/models.py
+++ b/data_api/models.py
@ -0,0 +1,60 @@
 from django.db import models
 import os
 def dynamic_upload_to(instance, filename):
    # Generate a dynamic folder name based on some criteria (e.g., user, date, etc.)
    return os.path.join(
        "iddrs_api/static/data/Standards", str(instance.levelNumber), filename
    )
 # Create your models here.
 class Levels(models.Model):
    # The Levels model defines the schema for levels in the database.
    # It has fields for the level's title, color, and number.
    levelTitle = models.CharField(max_length=200)
    levelColor = models.CharField(max_length=200)
    levelNumber = models.IntegerField()
    def __str__(self):
        return str(self.levelNumber)
 class StandardsList(models.Model):
    # The StandardsList model defines the schema for standards in the database.
    # It has a foreign key to Levels, fields for the standard's file, title and path,
    # and a __str__ method to represent the standard by its title.
    levelID = models.ForeignKey(Levels, on_delete=models.CASCADE, blank=True, null=True)
    levelNumber = models.IntegerField(blank=True, null=True)
    standardFilePDF = models.FileField(upload_to=dynamic_upload_to)  # , upload_to=""
    standardFileWord = models.FileField(upload_to=dynamic_upload_to)  # , upload_to=""
    standardTitle = models.CharField(max_length=200)
    standardColor = models.CharField(max_length=200, blank=True)
    uploaded_at = models.DateTimeField(auto_now_add=True)
    def save(self, *args, **kwargs):
        # Automatically set StdColor based on the associated Levels model's color
        if self.levelID:
            self.standardColor = self.levelID.levelColor
        super().save(*args, **kwargs)
    def __str__(self):
        return self.standardTitle
 class FileEvent(models.Model):
    EVENT_CHOICES = (
        ("UPLOAD", "File Uploaded"),
        ("DELETE", "File Deleted"),
    )
    event_type = models.CharField(max_length=10, choices=EVENT_CHOICES)
    file_name = models.CharField(max_length=255)
    timestamp = models.DateTimeField(auto_now_add=True)
    fileStatus = models.CharField(default='Pending', max_length=255)
    def __str__(self):
        return f"{self.get_event_type_display()}: {self.file_name}"
--- a/data_api/serializers.py
+++ b/data_api/serializers.py
@ -0,0 +1,17 @@
 from rest_framework import serializers
 from .models import Levels, StandardsList, FileEvent
 class LevelSerializer(serializers.ModelSerializer):
    class Meta:
        model = Levels
        fields = '__all__'
 class StandardsSerializer(serializers.ModelSerializer):
    class Meta:
        model = StandardsList
        fields = '__all__'
 class FileEventSerializer(serializers.ModelSerializer):
    class Meta:
        model = FileEvent
        fields = '__all__'
--- a/data_api/static/data/json/data.json
+++ b/data_api/static/data/json/data.json
@ -0,0 +1 @@
 []
--- a/data_api/static/data/json_log.log
+++ b/data_api/static/data/json_log.log
@ -0,0 +1,35 @@
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
 2023-11-09 16:17:19 - INFO - Number of successed saved files: 0
 2023-11-09 16:17:19 - INFO - Number of seactions: 0
 2023-11-09 16:17:19 - INFO - Number of seactions after cleaning: 0
 2023-11-09 16:17:19 - INFO - Number of cleaned seactions: 0
 2023-11-09 16:21:55 - INFO - /home/louai/Documents/BICC/IDDRS/iddrs_tool/iddrs_api/data_api/PreprocessFile.py changed, reloading.
--- a/admin_api/tests.py
+++ b/admin_api/tests.py
--- a/data_api/urls.py
+++ b/data_api/urls.py
@ -0,0 +1,25 @@
 from django.urls import path, include
 from rest_framework import routers
 from .views import LevelViewSet, StandardsViewSet, FileEventsViewSet
 from . import views
 router = routers.DefaultRouter()
 router.register(r'levels', LevelViewSet)
 router.register(r'standards', StandardsViewSet)
 router.register(r'fileEvents', FileEventsViewSet)
 urlpatterns = [
    path('', include(router.urls)),
    path('level-submit/', views.levelSubmit, name='level-submit'),
    path('level-delete/', views.levelDelete, name='level-delete'),
    path('upload-standard/', views.upload_standard, name='upload-standard'),
    path('standard-delete/', views.standardDelete, name='standard-delete'),
    path('process-files/', views.processFiles, name='process-files'),
    #path('api/', include('admin_api.api.urls')),
 ]
--- a/data_api/views.py
+++ b/data_api/views.py
@ -0,0 +1,159 @@
 from rest_framework import viewsets
 from .models import Levels, StandardsList, FileEvent
 from .serializers import LevelSerializer, StandardsSerializer, FileEventSerializer
 from django.views.decorators.csrf import csrf_exempt
 import os
 from pathlib import Path
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 from django.db.models import Max
 import shutil
 from .PreprocessFile import PreprocessFile
 from .CreateIndexES import CreateIndexES
 import logging
 logger = logging.getLogger(__name__) 
 # Create your views here.
 #@permission_classes([IsAuthenticated])
 class LevelViewSet(viewsets.ModelViewSet):
    queryset = Levels.objects.all()
    serializer_class = LevelSerializer
 #@permission_classes([IsAuthenticated])
 class StandardsViewSet(viewsets.ModelViewSet):
    queryset = StandardsList.objects.all().order_by('levelNumber')
    serializer_class = StandardsSerializer
 class FileEventsViewSet(viewsets.ModelViewSet):
    queryset = FileEvent.objects.all().filter(fileStatus='Pending')
    serializer_class = FileEventSerializer
 BASE_DIR = Path(__file__).resolve().parent.parent
 # ################################################################
 # #######################Levels###################################
 # ################################################################
 def check_level_folder():
    level_numbers = Levels.objects.values_list('levelNumber', flat=True)
    for level_number in level_numbers:
        level_folder = os.path.join(BASE_DIR,'static/data/Standards/'+str(level_number)+'/')
        if not os.path.exists(level_folder):
            os.makedirs(level_folder)
@csrf_exempt
@api_view(['POST'])
 def levelSubmit(request):
    data = request.data['params']['editingRow']
    if 'id' in data:
        level = Levels.objects.get(id=data['id'])
        serializer = LevelSerializer(instance=level, data=data)
        if serializer.is_valid():
            serializer.save()
            check_level_folder()
    else:
        newLevelNumber = Levels.objects.aggregate(Max('levelNumber'))['levelNumber__max']+1
        data['levelNumber'] = newLevelNumber
        serializer = LevelSerializer(data=data)
        if serializer.is_valid():
            serializer.save()
            check_level_folder()
    return Response('')
@csrf_exempt
@api_view(['POST'])
 def levelDelete(request):
    data = request.data['params']['rowData']
    level = Levels.objects.get(id=data['id'])
    level.delete()
    level_del_dir = os.path.join(BASE_DIR, 'static/data/Standards/'+str(data['levelNumber'])+'/')
    shutil.rmtree(level_del_dir)
    return Response('')
 # ################################################################
 # #######################Standards################################
 # ################################################################
@csrf_exempt
@api_view(['POST'])
 def upload_standard(request):
    if request.method == 'POST':
        level_number = request.data['selectedLevel']
        pdfFile = request.FILES['pdfFile']
        wordFile = request.FILES['wordFile']
        standard_level_id = Levels.objects.filter(levelNumber=level_number).values_list('id', flat=True)[0]
        standard = {
            'levelID': standard_level_id,
            'levelNumber': level_number,
            'standardFilePDF': pdfFile,
            'standardFileWord': wordFile,
            'standardTitle': pdfFile.name.split('.pdf')[0]
        }
        serializer = StandardsSerializer(data=standard)
        if serializer.is_valid():
            serializer.save()
            # Create a FileEvent for the upload
            FileEvent.objects.create(event_type='UPLOAD', file_name=pdfFile.name.split('.pdf')[0])
        else:
            print('Invalid')
    return Response('Done!')
@csrf_exempt
@api_view(['POST'])
 def standardDelete(request):
    data = request.data['params']['stdData']
    standard = StandardsList.objects.get(id = data['id'])
    delete_file(standard.standardFilePDF.path)
    delete_file(standard.standardFileWord.path)
    standard.delete()
    # Create a FileEvent for the delete
    FileEvent.objects.create(event_type='DELETE', file_name=standard.standardFilePDF.name.split('.pdf')[0])
    return Response('')
 def delete_file(file_path):
    try:
        os.remove(file_path)
        print(f"File {file_path} deleted successfully.")
    except FileNotFoundError:
        print(f"File {file_path} not found.")
    except PermissionError:
        print(f"Permission error: Unable to delete {file_path}.")
    except Exception as e:
        print(f"An error occurred: {e}")
@csrf_exempt
@api_view(['POST'])
 def processFiles(request):
    logger.info('Starting file Processing ...')
    try:
        process_files = PreprocessFile()
        process_files.process_standards()
        craetIndex = CreateIndexES()
        craetIndex.createIndex()
    except Exception as e:
        logger.error(e)
        raise
    # delete all records in the FileEvents table
    # FileEvent.objects.all().delete()
    # set all records in FileEvents table in column indexed to true
    FileEvent.objects.all().update(fileStatus='Indexed')
    logger.info('File Processing completed')
    return Response('Procssed successfully')
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/iddrs_api/pycache/settings.cpython-310.pyc
+++ b/iddrs_api/pycache/settings.cpython-310.pyc
--- a/iddrs_api/pycache/urls.cpython-310.pyc
+++ b/iddrs_api/pycache/urls.cpython-310.pyc
--- a/iddrs_api/settings.py
+++ b/iddrs_api/settings.py
@ -50,11 +50,11 @@ INSTALLED_APPS = [
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'rest_framework',
    'rest_framework.authtoken',
    'rest_framework_simplejwt.token_blacklist',
    'django_filters',
    'search_tfidf',
-    'admin_api',
+    'data_api',
    'user_auth',
    'corsheaders',
 ]
--- a/iddrs_api/urls.py
+++ b/iddrs_api/urls.py
@ -19,6 +19,5 @@ from django.urls import path, include
 urlpatterns = [
    path('admin/', admin.site.urls),
    path('client_api/', include('search_tfidf.urls')),
-    path('admin_api/', include('admin_api.urls')),
+    path('data_api/', include('data_api.urls')),
    path('user_auth/', include('user_auth.urls')),
 ]
--- a/search_tfidf/pycache/PreprocessFile.cpython-310.pyc
+++ b/search_tfidf/pycache/PreprocessFile.cpython-310.pyc
--- a/search_tfidf/pycache/elasticSearch.cpython-310.pyc
+++ b/search_tfidf/pycache/elasticSearch.cpython-310.pyc
--- a/search_tfidf/pycache/tfidfSearch.cpython-310.pyc
+++ b/search_tfidf/pycache/tfidfSearch.cpython-310.pyc
--- a/search_tfidf/pycache/views.cpython-310.pyc
+++ b/search_tfidf/pycache/views.cpython-310.pyc
--- a/search_tfidf/elasticSearch.py
+++ b/search_tfidf/elasticSearch.py
@ -0,0 +1,71 @@
 from django.shortcuts import render
 from elasticsearch import Elasticsearch
 import os
 def build_search_query(phrase, min_score):
  search_query = {
    "size": 100,
    "query": {
      "multi_match": {
        "query": phrase,
        "fields": ["Paragraph", "Title"]  
      }
    },
    "highlight": {
         "fields": {
             "Paragraph": {}
         },
         "pre_tags": [""],
         "post_tags": [""],
     },
    "min_score": min_score
  }
  return search_query
 def eSearch (phrase):
    # Set the password for connecting to Elasticsearch
    ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
    ca_certs="/etc/elasticsearch/certs/http_ca.crt"
    # Get the Elasticsearch password from environment variable
    # ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
    # Get the CA certificates path from environment variable
    # ca_certs = os.environ.get('CA_CERTS')
    # Create an Elasticsearch client instance to use for searching
    # Connect to the local Elasticsearch instance on port 9200
    # Use certificate authentication with the provided certificate
    # Authenticate with the elastic user and the password set above
    es = Elasticsearch(
        "https://localhost:9200",
        ca_certs=ca_certs,
        basic_auth=("elastic", ELASTIC_PASSWORD)
    )
    # Search for products matching a specific category
    number_of_hits = 0
    min_score = 8
    final_results = []
    search_results = []
    while number_of_hits == 0:
        search_query = build_search_query(phrase, min_score)
        search_results = es.search(index="iddrs", body=search_query)
        number_of_hits = len(search_results["hits"]["hits"])
        min_score = min_score - 1
    # Process and display search results
    for hit in search_results["hits"]["hits"]:
        highlighted_texts = hit.get('highlight', {}).get('Paragraph', [])  # Use get() to avoid KeyError
        original_paragraph = hit.get('_source', {}).get('Paragraph', [])
        #print(highlighted_texts)
        if highlighted_texts: # Check if highlight is not None
            for highlighted_text in highlighted_texts:
                original_paragraph = original_paragraph.replace(highlighted_text, f"<span style='background-color:#ffff00'>{highlighted_text}</span>")
            hit["_source"]["Highlight"] = original_paragraph
        else:
            hit["_source"]["Highlight"] = []
        final_results.append(hit["_source"])
    return final_results , min_score + 1
--- a/search_tfidf/tests.py
+++ b/search_tfidf/tests.py
@ -1,3 +0,0 @@
 from django.test import TestCase
 # Create your tests here.
--- a/search_tfidf/views.py
+++ b/search_tfidf/views.py
@ -6,6 +6,7 @@ from django.http import JsonResponse
 from django.views.decorators.csrf import csrf_exempt
 import json
 from .tfidfSearch import cosine_similarity
 from .elasticSearch import eSearch
 from rest_framework.decorators import api_view
 from pathlib import Path
 import os
@ -45,7 +46,8 @@ def get_input(request):
                return JsonResponse({"message": "Data received", "results":searchResults})
            else:
-                searchResults = cosine_similarity(phrase, title=False)
+                #searchResults = cosine_similarity(phrase, title=False)
                searchResults = eSearch(phrase)
                return JsonResponse({"message": "Data received", "results":searchResults})
--- a/static/IDDRSStandards/1/IDDRS-1.10-Introduction-To-The-IDDRS.pdf
+++ b/static/IDDRSStandards/1/IDDRS-1.10-Introduction-To-The-IDDRS.pdf
--- a/static/IDDRSStandards/1/IDDRS-1.20-Glossary.pdf
+++ b/static/IDDRSStandards/1/IDDRS-1.20-Glossary.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.10-The-UN-Approach-To-DDR.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.10-The-UN-Approach-To-DDR.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.11-The-Legal-Framework-For-UNDDR.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.11-The-Legal-Framework-For-UNDDR.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.20-The-Politics-of-DDR.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.20-The-Politics-of-DDR.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.30-Community-Violence-Reduction.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.30-Community-Violence-Reduction.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.40-Reintegration-as-Part-of-Sustaining-Peace.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.40-Reintegration-as-Part-of-Sustaining-Peace.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.10-Integrated-DDR-Planning-Processes-and-Structures.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.10-Integrated-DDR-Planning-Processes-and-Structures.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.20-DDR-Programme-Design.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.20-DDR-Programme-Design.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.30-National-Institutions-for-DDR.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.30-National-Institutions-for-DDR.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.40-Mission-and-Programme-Support-for-DDR.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.40-Mission-and-Programme-Support-for-DDR.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.41-Finance-and-Budgeting.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.41-Finance-and-Budgeting.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.42-Personnel-and-Staffing.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.42-Personnel-and-Staffing.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.50-Monitoring-and-Evaluation-of-DDR-Programmes.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.50-Monitoring-and-Evaluation-of-DDR-Programmes.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.10-Disarmament.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.10-Disarmament.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.11-Transitional-Weapons-and-Ammunition-Management.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.11-Transitional-Weapons-and-Ammunition-Management.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.20-Demobilization.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.20-Demobilization.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.30-Reintegration.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.30-Reintegration.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.40-UN-Military-Roles-and-Responsibilities.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.40-UN-Military-Roles-and-Responsibilities.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.50-Police-Roles-and-Responsibilities.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.50-Police-Roles-and-Responsibilities.pdf
--- a/Show More
+++ b/Show More
		`@ -1,3 +0,0 @@`
			`from django.test import TestCase`

			`# Create your tests here.`