Upload Project

2023-11-20 15:31:13 +01:00 · 2023-11-20 15:31:13 +01:00 · 5592f5e7f0
commit 5592f5e7f0
parent 49eb16d0ac
168 changed files with 40547 additions and 363488 deletions
--- a/admin_api/pycache/init.cpython-310.pyc
+++ b/admin_api/pycache/init.cpython-310.pyc
--- a/admin_api/pycache/admin.cpython-310.pyc
+++ b/admin_api/pycache/admin.cpython-310.pyc
--- a/admin_api/pycache/apps.cpython-310.pyc
+++ b/admin_api/pycache/apps.cpython-310.pyc
--- a/admin_api/pycache/models.cpython-310.pyc
+++ b/admin_api/pycache/models.cpython-310.pyc
--- a/admin_api/pycache/preprocessData.cpython-310.pyc
+++ b/admin_api/pycache/preprocessData.cpython-310.pyc
--- a/admin_api/pycache/serializer.cpython-310.pyc
+++ b/admin_api/pycache/serializer.cpython-310.pyc
--- a/admin_api/pycache/urls.cpython-310.pyc
+++ b/admin_api/pycache/urls.cpython-310.pyc
--- a/admin_api/pycache/views.cpython-310.pyc
+++ b/admin_api/pycache/views.cpython-310.pyc
--- a/admin_api/migrations/0001_initial.py
+++ b/admin_api/migrations/0001_initial.py
@ -1,24 +0,0 @@
-# Generated by Django 4.1.3 on 2023-07-07 07:17
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    initial = True
-
-    dependencies = [
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='NewContentTracker',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('paragraphID', models.CharField(max_length=10)),
-                ('levelNumber', models.CharField(max_length=200)),
-                ('standardTitle', models.CharField(max_length=200)),
-                ('paragraph', models.TextField()),
-            ],
-        ),
-    ]
--- a/admin_api/migrations/pycache/0001_initial.cpython-310.pyc
+++ b/admin_api/migrations/pycache/0001_initial.cpython-310.pyc
--- a/admin_api/migrations/pycache/init.cpython-310.pyc
+++ b/admin_api/migrations/pycache/init.cpython-310.pyc
--- a/admin_api/models.py
+++ b/admin_api/models.py
@ -1,12 +0,0 @@
-from django.db import models
-
-# Create your models here.
-class NewContentTracker(models.Model):
-    paragraphID = models.CharField(max_length=10)
-    levelNumber = models.CharField(max_length=200)
-    standardTitle = models.CharField(max_length=200)
-    paragraph = models.TextField()
-
-
-    def __str__(self):
-        return str(self.id)
--- a/admin_api/preprocessData.py
+++ b/admin_api/preprocessData.py
@ -1,137 +0,0 @@
-import json
-import nltk
-from spacy.matcher import Matcher
-import spacy
-
-import os
-from os import walk
-from pathlib import Path
-import pandas as pd
-import re
-from nltk.stem import WordNetLemmatizer
-from nltk.tokenize import word_tokenize, MWETokenizer
-from string import punctuation
-#from App.models import Level
-
-class PreprocessData:
-
-    def __init__(self):
-        self.nlp = spacy.load('en_core_web_lg')
-        self.stopwords = nltk.corpus.stopwords.words('english')
-        self.wordnet_lemmatizer = WordNetLemmatizer()
-        self.BASE_DIR = Path(__file__).resolve().parent.parent
-        
-
-
-    def remove_punctuation(self, text):
-        my_punctuation = punctuation.replace(".", "")
-        my_punctuation = my_punctuation.replace("/", "")
-        punctuationfree="".join([i for i in text if i not in my_punctuation])
-        return punctuationfree
-
-    
-    def remove_stopwords(self, text):
-        output= [i for i in text if i not in self.stopwords]
-        return output
-    
-    def tokenization(self, text):
-        tokens = re.split('\W',text)
-        return tokens
-    
-    
-    def lemmatizer(self, text):
-        lemm_text = [self.wordnet_lemmatizer.lemmatize(word) for word in text]
-        return lemm_text
-
-        
-    def merge_files(self, levels):
-        levels.sort()
-        allData = list()
-        for level in levels:
-            filenames = next(walk(os.path.join(self.BASE_DIR,'static/data/'+level)), (None, None, []))[2]
-            for file in filenames:
-                with open(os.path.join(self.BASE_DIR,'static/data/'+level+"/"+file), 'r') as f:
-                    data = json.load(f)
-                    for dd in data:
-                        dd['LevelName'] = level
-                        dd['Module'] = file[11:-5].replace('-',' ')
-                        dd['Title'] = file[:-5]
-                    allData.extend(data)
-        for idx, d in enumerate(allData):
-            d['ParagraphID'] = idx
-        return allData
-
-    def add_compliance(self, paragraphs):
-        df = pd.DataFrame(paragraphs)
-        matcher = Matcher(self.nlp.vocab)
-        pattern1 = [{'LOWER':'shall'}]
-        pattern2 = [{'LOWER':'should'}]
-        pattern3 = [{'LOWER':'may'}]
-        pattern4 = [{'LOWER':'must'}]
-        pattern5 = [{'LOWER':'can'}]
-
-        matcher.add('Shall',[pattern1])
-        matcher.add('Should',[pattern2])
-        matcher.add('May',[pattern3])
-        matcher.add('Must',[pattern4])
-        matcher.add('Can',[pattern5])
-
-        for index, row in df.iterrows():
-            doc = self.nlp(row['Paragraph'])
-            found_matches = matcher(doc)
-            if found_matches:
-                for match_id, start, end in found_matches:
-                    string_id = self.nlp.vocab.strings[match_id]
-                    span = doc[start:end]
-                    if string_id == 'Shall':
-                        df.loc[index,'Shall'] = 1
-
-                    if string_id == 'Should':
-                        df.loc[index,'Should'] = 1
-
-                    if string_id == 'May':
-                        df.loc[index,'May'] = 1
-
-                    if string_id == 'Must':
-                        df.loc[index,'Must'] = 1
-
-                    if string_id == 'Can':
-                        df.loc[index,'Can'] = 1
-        return df
-
-    def title_sent(self, title, sent):
-        new_sent = title+': '+sent
-        return new_sent
-
-    def split_into_sentneces(self, data):
-        df = data
-        df['Sentence'] = df['Paragraph'].apply(lambda xx:nltk.tokenize.sent_tokenize(xx))
-        df = df.explode("Sentence").reset_index(drop=True)
-
-        df['ProcessedSent']= df['Sentence'].apply(lambda xx:self.remove_punctuation(xx))
-        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: xx.lower())
-        tokenizer = MWETokenizer()
-        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: tokenizer.tokenize(word_tokenize(xx)))
-        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: self.remove_stopwords(xx))
-        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: self.lemmatizer(xx))
-        df['ProcessedSent']= df['ProcessedSent'].apply(lambda xx: ' '.join(xx))
-        df['ProcessedSent']= df.Module+' '+ df.ProcessedSent
-
-        return df
-
-    def pre_process_files(self):
-        print('Pre-processing started')
-        levels = next(walk(os.path.join(self.BASE_DIR, 'static/data')), (None, None, []))[1]
-        # levels = ['1 General IDDRS']
-        paragraphs = self.merge_files(levels)
-        df = self.add_compliance(paragraphs)
-        processed_data = self.split_into_sentneces(df)
-        processed_data.to_json(os.path.join(self.BASE_DIR, 'static/searchable/data2.json'), orient='records', indent=4)
-        print('Pre-processing finished')
-
-
-
-
-
-
-
--- a/admin_api/serializer.py
+++ b/admin_api/serializer.py
@ -1,7 +0,0 @@
-from rest_framework import serializers
-from .models import NewContentTracker
-
-class NewContentTrackerSerializer(serializers.ModelSerializer):
-    class Meta:
-        model = NewContentTracker
-        fields = '__all__'
--- a/admin_api/urls.py
+++ b/admin_api/urls.py
@ -1,32 +0,0 @@
-from django.urls import path, include
-from rest_framework import routers
-from .views import LevelViewSet, StandardsViewSet, NewContentTrackerViewSet
-from . import views
-
-router = routers.DefaultRouter()
-router.register(r'levels', LevelViewSet)
-router.register(r'standards', StandardsViewSet)
-router.register(r'NewContentTracker', NewContentTrackerViewSet)
-
-
-urlpatterns = [
-    path('', include(router.urls)),
-    path('content-list/', views.contentList, name='content-list'),
-    path('content-create/', views.contentCreate, name='content-create'),
-    path('content-detail/', views.contentDetail, name='content-detail'),
-    path('content-update/', views.contentUpdate, name='content-update'),
-    path('content-delete/', views.contentDelete, name='content-delete'),
-
-    path('level-submit/', views.levelSubmit, name='level-submit'),
-    path('level-delete/', views.levelDelete, name='level-delete'),
-
-    path('standard-submit/', views.standardSubmit, name='standard-submit'),
-    path('standard-delete/', views.standardDelete, name='standard-delete'),
-    
-    path('pre-process/', views.preprocess, name='pre-process'),
-
-    path('api/', include('admin_api.api.urls')),
-
-    path('login/', views.loginPage, name='login'),
-    path('logout/', views.logoutPage, name='logout'),
-]
--- a/admin_api/views.py
+++ b/admin_api/views.py
@ -1,314 +0,0 @@
-from http.client import HTTPResponse
-from django.shortcuts import render, redirect
-from rest_framework import viewsets
-from search_tfidf.models import Level, Standards
-from .models import NewContentTracker
-from .serializer import NewContentTrackerSerializer
-from search_tfidf.serializer import LevelSerializer, StandardsSerializer
-from django.http import JsonResponse
-from django.views.decorators.csrf import csrf_exempt
-import json
-import os
-from pathlib import Path
-from rest_framework.decorators import api_view, permission_classes
-from rest_framework.permissions import IsAuthenticated
-from rest_framework.response import Response
-from django.db.models import Max
-import shutil
-from django.contrib.auth import authenticate, login, logout
-from django.contrib import messages
-
-
-from .preprocessData import PreprocessData
-# Create your views here.
-@permission_classes([IsAuthenticated])
-class LevelViewSet(viewsets.ModelViewSet):
-    queryset = Level.objects.all()
-    serializer_class = LevelSerializer
-
-class StandardsViewSet(viewsets.ModelViewSet):
-    queryset = Standards.objects.all()
-    serializer_class = StandardsSerializer
-
-class NewContentTrackerViewSet(viewsets.ModelViewSet):
-    queryset = NewContentTracker.objects.all()
-    serializer_class = NewContentTrackerSerializer
-
-
-BASE_DIR = Path(__file__).resolve().parent.parent
-
-# ################################################################
-# #######################Contents#################################
-# ################################################################
-
-@csrf_exempt
-@api_view(['GET'])
-def contentList(request):
-    # Get the values from the request parameters
-    level = request.GET.get('level')
-    standard = request.GET.get('standard')
-    data = ""
-
-    module_path  = filePath(level, standard)
-    # Read the JSON file
-    with open(module_path) as f:
-        data = json.load(f)
-
-    # Return the filtered data as a JSON response
-    return JsonResponse({'contents': data})
-
-@csrf_exempt
-@api_view(['POST'])
-def contentCreate(request):
-    level = request.data['params']['level']
-    standard = request.data['params']['standard']
-    #print(request.data['params']['formData']['Heading1'])
-    module_path  = filePath(level, standard)
-# 
-    levelInfo = Level.objects.get(levelNumber=level)
-    standardInfo = Standards.objects.get(standardTitle = standard)
-# 
-    with open(module_path) as json_file:
-        data = json.load(json_file)
-# 
-    new_id = 0
-    ids = []
-    for obj in data:
-        ids.append(obj['ID'])
-    if len(ids) > 0:
-        new_id = max(ids)+1
-# 
-    new_obj = request.data['params']['formData']
-    new_obj['ID'] = new_id
-    new_obj['Color'] = levelInfo.levelColor
-    new_obj['Level'] = levelInfo.levelNumber
-    new_obj['LevelName'] = levelInfo.levelName
-    new_obj['Title'] = standardInfo.standardTitle
-    new_obj['Module'] = standardInfo.standardTitle
-    data.append(new_obj)
-# 
-    with open(module_path, 'w') as f:
-        json.dump(data, f, indent=4)
-    
-    haveToPreProcess(new_id, levelInfo.levelNumber, standardInfo.standardTitle, new_obj['Paragraph'])
-    
-    return Response('')
-
-@csrf_exempt
-@api_view(['GET'])
-def contentDetail(request):
-    level = request.GET.get('level')
-    standard = request.GET.get('standard')
-    id = request.GET.get('id')
-
-    module_path  = filePath(level, standard)
-    with open(module_path) as f:
-        data = json.load(f)
-
-    for obj in data:
-        if obj['ID'] == int(id):
-            data = obj
-
-    return JsonResponse({'paragraph': data})
-
-@csrf_exempt
-@api_view(['POST'])
-def contentUpdate(request):
-    level = request.data['params']['level']
-    standard = request.data['params']['standard']
-
-    id = request.data['params']['id']
-    updated_content = request.data['params']['formData']
-
-    module_path  = filePath(level, standard)
-
-    with open(module_path) as f:
-        data = json.load(f)
-
-    for obj in data:
-        if obj['ID'] == int(id):
-            obj['Heading1'] = updated_content['Heading1']
-            obj['Heading2'] = updated_content['Heading2']
-            obj['Heading3'] = updated_content['Heading3']
-            obj['Heading4'] = updated_content['Heading4']
-            obj['Paragraph'] = updated_content['Paragraph']
-            obj['PageNum'] = updated_content['PageNum']
-            
-            
-
-
-    with open(module_path, 'w') as f:
-        json.dump(data, f)
-
-    haveToPreProcess(obj['ID'], level, standard, updated_content['Paragraph'])
-
-    return Response('')
-
-@csrf_exempt
-@api_view(['POST'])
-def contentDelete(request):
-    
-    level = request.data['params']['level']
-    standard = request.data['params']['standard']
-    id = request.data['params']['id']
-    print(level,standard,id)
-    module_path  = filePath(level, standard)
-
-    with open(module_path) as f:
-       data = json.load(f)
-    
-    for i in range(len(data)):
-       if data[i]['ID'] == int(id):
-           data.pop(i)
-           break
-
-    with open(module_path, 'w') as f:
-       json.dump(data, f)
-
-    haveToPreProcess(id, level, standard, 'Deleted')
-    
-    return Response('')
-
-# ################################################################
-# #######################Levels###################################
-# ################################################################
-@csrf_exempt
-@api_view(['POST'])
-def levelSubmit(request):
-    data = request.data['params']['editingRow']
-
-    if 'id' in data:
-        level = Level.objects.get(id=data['id'])
-        serializer = LevelSerializer(instance=level, data=data)
-        if serializer.is_valid():
-            serializer.save()
-        
-    else:
-        newLevelNumber = Level.objects.aggregate(Max('levelNumber'))['levelNumber__max']+1
-        data['levelNumber'] = newLevelNumber
-        serializer = LevelSerializer(data=data)
-        if serializer.is_valid():
-            serializer.save()
-            level_new_dir = os.path.join(BASE_DIR, 'static/data/'+str(newLevelNumber)+'/')
-            os.makedirs(level_new_dir, exist_ok=True)
-
-    
-
-    return Response('')
-
-
-@csrf_exempt
-@api_view(['POST'])
-def levelDelete(request):
-    data = request.data['params']['rowData']
-    level = Level.objects.get(id=data['id'])
-    level.delete()
-    level_del_dir = os.path.join(BASE_DIR, 'static/data/'+str(data['levelNumber'])+'/')
-    shutil.rmtree(level_del_dir)
-
-    haveToPreProcess(data['id'], data['levelNumber'], 'No', 'LevelDeleted')
-
-    return Response('')
-
-
-# ################################################################
-# #######################Standards################################
-# ################################################################
-
-@csrf_exempt
-@api_view(['POST'])
-def standardSubmit(request):
-    data = request.data['params']['editingRow']
-
-    if 'id' in data:
-        standard = Standards.objects.get(id = data['id'])
-        current_path = os.path.join(BASE_DIR, 'static/data/'+str(standard.standardLevel)+'/'+standard.standardTitle+'.json')
-        new_path = os.path.join(BASE_DIR, 'static/data/'+str(standard.standardLevel)+'/'+data['standardTitle']+'.json')
-        serializer = StandardsSerializer(instance=standard, data=data)
-        if serializer.is_valid():
-            serializer.save()
-            os.rename(current_path, new_path)
-            
-    else:
-        levelRow = Level.objects.get(id = data['levelID'])
-        data['levelID'] = int(data['levelID'])
-        data['standardLevel'] = levelRow.levelNumber
-        serializer = StandardsSerializer(data=data)
-        if serializer.is_valid():
-            serializer.save()
-            newFile=[]
-            standard_new = os.path.join(BASE_DIR, 'static/data/'+str(levelRow.levelNumber)+'/'+data['standardTitle']+'.json')
-            with open(standard_new, 'w') as file:
-                # Write the JSON data to the file
-                json.dump(newFile, file)
-            file.close()
-
-        else:
-            print(serializer.errors)
-
-
-    return Response('')
-
-@csrf_exempt
-@api_view(['POST'])
-def standardDelete(request):
-    data = request.data['params']['rowData']
-    standard = Standards.objects.get(id=data['id'])
-    standard.delete()
-    haveToPreProcess(data['id'], data['standardLevel'], data['standardTitle'], 'StandardDeleted')
-
-    return Response('Item successfully deleted!')
-
-# ################################################################
-# #######################pre-process##############################
-# ################################################################
-@csrf_exempt
-@api_view(['POST'])
-def preprocess(request):
-    process_files = PreprocessData()
-    process_files.pre_process_files()
-    NewContentTracker.objects.all().delete()
-    return Response('Procssed successfully')
-
-# ################################################################
-# #######################Authentication###########################
-# ################################################################
-
-def loginPage(request):
-    pass
-
-
-def logoutPage(request):
-    pass
-
-# ################################################################
-# ################################################################
-# ################################################################
-
-def filePath(level_input, standard_input):    
-    standards_dir = os.path.join(BASE_DIR, 'static/data/')
-    file_path = ''
-    levels = next(os.walk(os.path.join(BASE_DIR, 'static/data')), (None, None, []))[1]
-    if str(level_input) in levels:
-        filenames = next(os.walk(standards_dir+level_input), (None, None, []))[2]
-        for file in filenames:
-            if str(standard_input) in file:
-                file_path = standards_dir+str(level_input)+'/'+file
-          
-    return file_path
-
-def haveToPreProcess(id, levelNumber, standardTitle, paragraph):
-
-    ######################################################
-    ############NewContentTracker#########################
-    newContent = {}
-    newContent['paragraphID'] = id
-    newContent['levelNumber'] = levelNumber
-    newContent['standardTitle'] = standardTitle
-    newContent['paragraph'] = paragraph
-
-    serializer = NewContentTrackerSerializer(data=newContent)
-    if serializer.is_valid():
-        serializer.save()
-
-    return ('Added')
--- a/data_api/CreateIndexES.py
+++ b/data_api/CreateIndexES.py
@ -0,0 +1,60 @@
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import bulk
+import json
+import os
+from os import walk
+from pathlib import Path
+
+class CreateIndexES:
+
+    def __init__(self):
+        self.ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
+        self.BASE_DIR = Path(__file__).resolve().parent.parent
+
+    def createIndex(self):
+        # Create the client instance
+        es = Elasticsearch(
+            "https://localhost:9200",
+            ca_certs="/etc/elasticsearch/certs/http_ca.crt",
+            basic_auth=("elastic", self.ELASTIC_PASSWORD)
+        )
+
+        index_name = "iddrs"
+
+        mapping = {
+        "mappings": {
+            "properties": {
+                "Title": {"type": "text"},
+                "Paragraph": {"type": "text"},
+            }
+        }
+    }
+        
+        if not es.indices.exists(index=index_name):
+            print("Index does not exist. Creating...")
+            es.indices.create(index=index_name, body=mapping)
+        elif es.indices.exists(index=index_name):
+            print("Index already exists. Deleting and recreating...")
+            # Delete the index (including all documents)
+            es.indices.delete(index=index_name, ignore=[400, 404])
+            es.indices.create(index=index_name, body=mapping)
+
+        #es.indices.create(index=index_name, body=mapping, ignore=400)  # Ignore if the index already exists
+#
+        filenames = next(walk(os.path.join(self.BASE_DIR,'static/data/json/')), (None, None, []))[2]
+        
+        for file in filenames:
+            with open(os.path.join(self.BASE_DIR,'static/data/json/')+file, 'r') as f:
+                data = json.load(f)
+            actions = [
+                {
+                    "_op_type": "index",
+                    "_index": index_name,
+                    #"_id": i + 1,
+                    "_source": document,
+                }
+                for i, document in enumerate(data)
+            ]
+            success, failed = bulk(es, actions, index=index_name, raise_on_error=False)
+            print(success)
+        
--- a/data_api/PreprocessFile.py
+++ b/data_api/PreprocessFile.py
@ -0,0 +1,243 @@
+from docx import Document
+import os
+import fitz
+import re
+import uuid
+import shutil
+import json
+import logging
+from pathlib import Path
+
+import spacy
+from spacy.matcher import Matcher
+
+from .models import Levels, StandardsList
+
+from .CreateIndexES import CreateIndexES
+
+
+class PreprocessFile:
+    def __init__(self):
+        self.BASE_DIR = Path(__file__).resolve().parent.parent
+
+    def find_summary_page(self, pdf_path, summary_text):
+        doc = fitz.open(pdf_path)
+        summary_count = 0
+
+        for page_num in range(len(doc)):
+            page = doc[page_num]
+            text_instances = page.get_text("text")
+
+            # Counting the number of occurrences of the summary text on the page
+            summary_count += text_instances.count(summary_text)
+
+            if summary_count >= 2:
+                return page_num
+            else:
+                page_num = 0
+        return page_num
+
+    def find_text_in_pdf_from_summary(self, pdf_path, search_text, summary_text):
+        summary_page = self.find_summary_page(pdf_path, summary_text)
+
+        if summary_page is None:
+            # print("Summary not found in the PDF.")
+            return None
+
+        doc = fitz.open(pdf_path)
+
+        for page_num in range(
+            summary_page + 1, len(doc)
+        ):  # Start searching after the 2nd summary
+            page = doc[page_num]
+            text_instances = page.get_text("text")
+
+            # Use regex to find instances of search_text without anything following it on the same line
+            regex_pattern = re.compile(rf"\b{search_text}\b(?![^\n]*\S)", re.IGNORECASE)
+            match = regex_pattern.search(text_instances)
+
+            if match:
+                # print(f"Text found on page {page_num + 1}, after the 2nd summary.")
+                return page_num + 1
+
+        # print("Text not found in the PDF.")
+        return None
+
+    # Custom serialization function
+    def custom_json_serialization(self, text):
+        # Replace newline characters with spaces
+        return text.replace("\n", " ")
+
+    def process_standards(self):
+        # BASE_DIR = Path(__file__).resolve().parent.parent
+        nlp = spacy.load("en_core_web_sm")
+        matcher = Matcher(nlp.vocab)
+        pattern1 = [{"LOWER": "shall"}]
+        pattern2 = [{"LOWER": "should"}]
+        pattern3 = [{"LOWER": "may"}]
+        pattern4 = [{"LOWER": "must"}]
+        pattern5 = [{"LOWER": "can"}]
+
+        matcher.add("Shall", [pattern1])
+        matcher.add("Should", [pattern2])
+        matcher.add("May", [pattern3])
+        matcher.add("Must", [pattern4])
+        matcher.add("Can", [pattern5])
+
+        root_json_dir = os.path.join(
+            self.BASE_DIR, "static/data/json/"
+        )  # "../Standards/json"
+        root_data_json_dir = os.path.join(
+            self.BASE_DIR, "static/data/"
+        )  # "../Standards/json"
+        summary_text = "Summary"
+        data = []
+
+        # for logging
+        number_of_successed_files = 0
+        number_of_sections = 0
+        number_of_sections_after_cleaning = 0
+        number_of_cleaned_sections = 0
+
+        # Check if the json directory exists
+        if os.path.exists(root_json_dir):
+            # Delete the directory and its contents
+            shutil.rmtree(root_json_dir)
+
+        # Create a new directory
+        os.makedirs(root_json_dir)
+
+        # Configure logging settings
+        log_file = os.path.join(
+            self.BASE_DIR, "static/data/json_log.log"
+        )  # "../Standards/json/json_log.log"  # Specify the path and filename for the log file
+        logging.basicConfig(
+            filename=log_file,  # Set the log file
+            level=logging.DEBUG,  # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
+            format="%(asctime)s - %(levelname)s - %(message)s",  # Define log message format
+            datefmt="%Y-%m-%d %H:%M:%S",  # Define date/time format
+        )
+
+        for standard in StandardsList.objects.all():
+            standard_file_pdf = standard.standardFilePDF
+            standard_file_word = standard.standardFileWord
+            json_path = os.path.join(root_json_dir, standard.standardTitle + ".json")
+            print(json_path)
+            sections = []
+            current_section = ""
+            data_per_file = []
+            # logging has to come here
+            number_of_sections_after_cleaning_per_file = 0
+            try:
+                word_doc = Document(standard_file_word.path)
+                for paragraph in word_doc.paragraphs:
+                    if (
+                        paragraph.style.name.startswith("Heading")
+                        or "Section" in paragraph.style.name
+                        or "Sub-section" in paragraph.style.name
+                    ):
+                        # If we're starting a new section, save off the old one
+                        if current_section:
+                            sections.append(current_section.strip())
+                            current_section = ""
+                        current_section += paragraph.text + "\n"
+                    else:
+                        # Otherwise, append text to current section
+                        current_section += paragraph.text + "\n"
+
+                # Append the last section to the list of sections if it exists
+                if current_section.strip():
+                    sections.append(current_section.strip())
+
+                # print all sections
+                for index, section in enumerate(sections):
+                    # for logging
+                    number_of_sections += 1
+                    if section != "" and len(section.split()) > 25:
+                        # for logging
+                        number_of_sections_after_cleaning += 1
+                        number_of_sections_after_cleaning_per_file += 1
+
+                        first_line = section.strip().splitlines()[0]
+                        text_to_search = first_line
+                        page_num = self.find_text_in_pdf_from_summary(
+                            standard_file_pdf, text_to_search, summary_text
+                        )
+
+                        doc = nlp(section)
+                        found_matches = matcher(doc)
+                        shall = should = may = must = can = False
+                        if found_matches:
+                            for match_id, start, end in found_matches:
+                                string_id = nlp.vocab.strings[match_id]
+                                span = doc[start:end]
+                                if string_id == "Shall":
+                                    shall = True
+
+                                if string_id == "Should":
+                                    should = True
+
+                                if string_id == "May":
+                                    may = True
+
+                                if string_id == "Must":
+                                    must = True
+
+                                if string_id == "Can":
+                                    can = True
+                        section_boj = {
+                            "ID": str(uuid.uuid4()),
+                            "Color": standard.levelID.levelColor,
+                            "Level": str(standard.levelNumber),
+                            "LevelName": "",
+                            "Title": standard.standardTitle,
+                            "Heading1": "",
+                            "Heading2": "",
+                            "Heading3": "",
+                            "Heading4": "",
+                            "Module": standard.standardTitle,
+                            "PageNum": page_num,
+                            "Paragraph": self.custom_json_serialization(section),
+                            "Can": can,
+                            "May": may,
+                            "Shall": shall,
+                            "Should": should,
+                            "Must": must,
+                        }
+                        data_per_file.append(section_boj)
+                        data.append(section_boj)
+                    else:
+                        # for logging
+                        number_of_cleaned_sections + 1
+            except Exception as e:
+                print(
+                    f"An error occurred while processing {standard.standardTitle}: {str(e)}"
+                )
+
+            try:
+                with open(json_path, "w") as json_file:
+                    json.dump(data_per_file, json_file, indent=4)
+                    logging.info(
+                        f"{number_of_sections_after_cleaning_per_file} seactions has been saved to: {json_path}"
+                    )
+                    number_of_successed_files += 1
+            except Exception as e:
+                logging.error(
+                    f"Number of successed saved files: {number_of_successed_files}"
+                )
+
+        logging.info(f"Number of successed saved files: {number_of_successed_files}")
+        logging.info(f"Number of seactions: {number_of_sections}")
+        logging.info(
+            f"Number of seactions after cleaning: {number_of_sections_after_cleaning}"
+        )
+        logging.info(f"Number of cleaned seactions: {number_of_cleaned_sections}")
+
+        with open(root_data_json_dir + "/data.json", "w") as json_file:
+            # Use json.dump to write the data to the file
+            json.dump(
+                data, json_file, indent=4
+            )  # Use indent for pretty formatting (optional)
+
+        # Close the log file (optional, usually done automatically)
+        logging.shutdown()
--- a/admin_api/init.py
+++ b/admin_api/init.py
--- a/data_api/pycache/CreateIndexES.cpython-310.pyc
+++ b/data_api/pycache/CreateIndexES.cpython-310.pyc
--- a/data_api/pycache/PreprocessFile.cpython-310.pyc
+++ b/data_api/pycache/PreprocessFile.cpython-310.pyc
--- a/data_api/pycache/init.cpython-310.pyc
+++ b/data_api/pycache/init.cpython-310.pyc
--- a/data_api/pycache/admin.cpython-310.pyc
+++ b/data_api/pycache/admin.cpython-310.pyc
--- a/data_api/pycache/apps.cpython-310.pyc
+++ b/data_api/pycache/apps.cpython-310.pyc
--- a/data_api/pycache/forms.cpython-310.pyc
+++ b/data_api/pycache/forms.cpython-310.pyc
--- a/data_api/pycache/models.cpython-310.pyc
+++ b/data_api/pycache/models.cpython-310.pyc
--- a/data_api/pycache/serializers.cpython-310.pyc
+++ b/data_api/pycache/serializers.cpython-310.pyc
--- a/data_api/pycache/urls.cpython-310.pyc
+++ b/data_api/pycache/urls.cpython-310.pyc
--- a/data_api/pycache/views.cpython-310.pyc
+++ b/data_api/pycache/views.cpython-310.pyc
--- a/admin_api/admin.py
+++ b/admin_api/admin.py
--- a/admin_api/api/init.py
+++ b/admin_api/api/init.py
--- a/admin_api/api/pycache/init.cpython-310.pyc
+++ b/admin_api/api/pycache/init.cpython-310.pyc
--- a/admin_api/api/pycache/urls.cpython-310.pyc
+++ b/admin_api/api/pycache/urls.cpython-310.pyc
--- a/admin_api/api/pycache/views.cpython-310.pyc
+++ b/admin_api/api/pycache/views.cpython-310.pyc
--- a/admin_api/api/serilaizer.py
+++ b/admin_api/api/serilaizer.py
--- a/admin_api/api/urls.py
+++ b/admin_api/api/urls.py
--- a/admin_api/api/views.py
+++ b/admin_api/api/views.py
@ -1,7 +1,5 @@
-from django.http import JsonResponse
 from rest_framework.response import Response
 from rest_framework.decorators import api_view
-
 from rest_framework_simplejwt.serializers import TokenObtainPairSerializer
 from rest_framework_simplejwt.views import TokenObtainPairView

--- a/admin_api/apps.py
+++ b/admin_api/apps.py
@ -1,6 +1,6 @@
 from django.apps import AppConfig


-class AdminApiConfig(AppConfig):
+class DataApiConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
-    name = 'admin_api'
+    name = 'data_api'
--- a/data_api/forms.py
+++ b/data_api/forms.py
@ -0,0 +1,7 @@
+from django import forms
+from .models import StandardsList
+
+class StandardUploadForm(forms.ModelForm):
+    class Meta:
+        model = StandardsList
+        fields = '__all__'
--- a/data_api/migrations/0001_initial.py
+++ b/data_api/migrations/0001_initial.py
@ -0,0 +1,34 @@
+# Generated by Django 4.1.3 on 2023-11-03 09:07
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='Levels',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('levelTitle', models.CharField(max_length=200)),
+                ('levelColor', models.CharField(max_length=200)),
+                ('levelNumber', models.IntegerField()),
+            ],
+        ),
+        migrations.CreateModel(
+            name='Standards',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('standardFile', models.FileField(blank=True, null=True, upload_to='')),
+                ('standardTitle', models.CharField(max_length=200)),
+                ('standardPath', models.CharField(blank=True, max_length=200, null=True)),
+                ('levelID', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='data_api.levels')),
+            ],
+        ),
+    ]
--- a/data_api/migrations/0002_rename_standards_standardslist.py
+++ b/data_api/migrations/0002_rename_standards_standardslist.py
@ -0,0 +1,17 @@
+# Generated by Django 4.1.3 on 2023-11-03 09:08
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.RenameModel(
+            old_name='Standards',
+            new_name='StandardsList',
+        ),
+    ]
--- a/data_api/migrations/0003_rename_standardfile_standardslist_standardfilepdf_and_more.py
+++ b/data_api/migrations/0003_rename_standardfile_standardslist_standardfilepdf_and_more.py
@ -0,0 +1,33 @@
+# Generated by Django 4.1.3 on 2023-11-03 21:15
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0002_rename_standards_standardslist'),
+    ]
+
+    operations = [
+        migrations.RenameField(
+            model_name='standardslist',
+            old_name='standardFile',
+            new_name='standardFilePDF',
+        ),
+        migrations.RenameField(
+            model_name='standardslist',
+            old_name='standardPath',
+            new_name='standardPathPDF',
+        ),
+        migrations.AddField(
+            model_name='standardslist',
+            name='standardFileWord',
+            field=models.FileField(blank=True, null=True, upload_to=''),
+        ),
+        migrations.AddField(
+            model_name='standardslist',
+            name='standardPathWord',
+            field=models.CharField(blank=True, max_length=200, null=True),
+        ),
+    ]
--- a/data_api/migrations/0004_standardslist_uploaded_at_and_more.py
+++ b/data_api/migrations/0004_standardslist_uploaded_at_and_more.py
@ -0,0 +1,31 @@
+# Generated by Django 4.1.3 on 2023-11-06 08:50
+
+import data_api.models
+from django.db import migrations, models
+import django.utils.timezone
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0003_rename_standardfile_standardslist_standardfilepdf_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='standardslist',
+            name='uploaded_at',
+            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
+            preserve_default=False,
+        ),
+        migrations.AlterField(
+            model_name='standardslist',
+            name='standardFilePDF',
+            field=models.FileField(blank=True, null=True, upload_to=data_api.models.dynamic_upload_to),
+        ),
+        migrations.AlterField(
+            model_name='standardslist',
+            name='standardFileWord',
+            field=models.FileField(blank=True, null=True, upload_to=data_api.models.dynamic_upload_to),
+        ),
+    ]
--- a/data_api/migrations/0005_standardslist_levelnumber_and_more.py
+++ b/data_api/migrations/0005_standardslist_levelnumber_and_more.py
@ -0,0 +1,29 @@
+# Generated by Django 4.1.3 on 2023-11-06 12:41
+
+import data_api.models
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0004_standardslist_uploaded_at_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='standardslist',
+            name='levelNumber',
+            field=models.IntegerField(blank=True, null=True),
+        ),
+        migrations.AlterField(
+            model_name='standardslist',
+            name='standardFilePDF',
+            field=models.FileField(upload_to=data_api.models.dynamic_upload_to),
+        ),
+        migrations.AlterField(
+            model_name='standardslist',
+            name='standardFileWord',
+            field=models.FileField(upload_to=data_api.models.dynamic_upload_to),
+        ),
+    ]
--- a/data_api/migrations/0006_remove_standardslist_standardpathpdf_and_more.py
+++ b/data_api/migrations/0006_remove_standardslist_standardpathpdf_and_more.py
@ -0,0 +1,21 @@
+# Generated by Django 4.1.3 on 2023-11-06 12:49
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0005_standardslist_levelnumber_and_more'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='standardslist',
+            name='standardPathPDF',
+        ),
+        migrations.RemoveField(
+            model_name='standardslist',
+            name='standardPathWord',
+        ),
+    ]
--- a/data_api/migrations/0007_standardslist_standardcolor.py
+++ b/data_api/migrations/0007_standardslist_standardcolor.py
@ -0,0 +1,18 @@
+# Generated by Django 4.1.3 on 2023-11-20 10:14
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0006_remove_standardslist_standardpathpdf_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='standardslist',
+            name='standardColor',
+            field=models.CharField(blank=True, max_length=200),
+        ),
+    ]
--- a/data_api/migrations/0008_fileevent.py
+++ b/data_api/migrations/0008_fileevent.py
@ -0,0 +1,22 @@
+# Generated by Django 4.1.3 on 2023-11-20 10:47
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0007_standardslist_standardcolor'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='FileEvent',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('event_type', models.CharField(choices=[('UPLOAD', 'File Uploaded'), ('DELETE', 'File Deleted')], max_length=10)),
+                ('file_name', models.CharField(max_length=255)),
+                ('timestamp', models.DateTimeField(auto_now_add=True)),
+            ],
+        ),
+    ]
--- a/data_api/migrations/0009_fileevent_indexed.py
+++ b/data_api/migrations/0009_fileevent_indexed.py
@ -0,0 +1,18 @@
+# Generated by Django 4.1.3 on 2023-11-20 13:03
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0008_fileevent'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='fileevent',
+            name='indexed',
+            field=models.BooleanField(default=False),
+        ),
+    ]
--- a/data_api/migrations/0010_remove_fileevent_indexed_fileevent_filestatus.py
+++ b/data_api/migrations/0010_remove_fileevent_indexed_fileevent_filestatus.py
@ -0,0 +1,22 @@
+# Generated by Django 4.1.3 on 2023-11-20 13:39
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_api', '0009_fileevent_indexed'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='fileevent',
+            name='indexed',
+        ),
+        migrations.AddField(
+            model_name='fileevent',
+            name='fileStatus',
+            field=models.CharField(default='Pending', max_length=255),
+        ),
+    ]
--- a/admin_api/migrations/init.py
+++ b/admin_api/migrations/init.py
--- a/data_api/migrations/pycache/0001_initial.cpython-310.pyc
+++ b/data_api/migrations/pycache/0001_initial.cpython-310.pyc
--- a/data_api/migrations/pycache/0002_rename_standards_standardslist.cpython-310.pyc
+++ b/data_api/migrations/pycache/0002_rename_standards_standardslist.cpython-310.pyc
--- a/data_api/migrations/pycache/0003_rename_standardfile_standardslist_standardfilepdf_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0003_rename_standardfile_standardslist_standardfilepdf_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0004_standardslist_uploaded_at_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0004_standardslist_uploaded_at_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0005_standardslist_levelnumber_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0005_standardslist_levelnumber_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0006_remove_standardslist_standardpathpdf_and_more.cpython-310.pyc
+++ b/data_api/migrations/pycache/0006_remove_standardslist_standardpathpdf_and_more.cpython-310.pyc
--- a/data_api/migrations/pycache/0007_standardslist_standardcolor.cpython-310.pyc
+++ b/data_api/migrations/pycache/0007_standardslist_standardcolor.cpython-310.pyc
--- a/data_api/migrations/pycache/0008_fileevent.cpython-310.pyc
+++ b/data_api/migrations/pycache/0008_fileevent.cpython-310.pyc
--- a/data_api/migrations/pycache/0009_fileevent_indexed.cpython-310.pyc
+++ b/data_api/migrations/pycache/0009_fileevent_indexed.cpython-310.pyc
--- a/data_api/migrations/pycache/0010_remove_fileevent_indexed_fileevent_filestatus.cpython-310.pyc
+++ b/data_api/migrations/pycache/0010_remove_fileevent_indexed_fileevent_filestatus.cpython-310.pyc
--- a/data_api/migrations/pycache/init.cpython-310.pyc
+++ b/data_api/migrations/pycache/init.cpython-310.pyc
--- a/data_api/models.py
+++ b/data_api/models.py
@ -0,0 +1,60 @@
+from django.db import models
+import os
+
+
+def dynamic_upload_to(instance, filename):
+    # Generate a dynamic folder name based on some criteria (e.g., user, date, etc.)
+    return os.path.join(
+        "iddrs_api/static/data/Standards", str(instance.levelNumber), filename
+    )
+
+
+# Create your models here.
+
+
+class Levels(models.Model):
+    # The Levels model defines the schema for levels in the database.
+    # It has fields for the level's title, color, and number.
+    levelTitle = models.CharField(max_length=200)
+    levelColor = models.CharField(max_length=200)
+    levelNumber = models.IntegerField()
+
+    def __str__(self):
+        return str(self.levelNumber)
+
+
+class StandardsList(models.Model):
+    # The StandardsList model defines the schema for standards in the database.
+    # It has a foreign key to Levels, fields for the standard's file, title and path,
+    # and a __str__ method to represent the standard by its title.
+    levelID = models.ForeignKey(Levels, on_delete=models.CASCADE, blank=True, null=True)
+    levelNumber = models.IntegerField(blank=True, null=True)
+    standardFilePDF = models.FileField(upload_to=dynamic_upload_to)  # , upload_to=""
+    standardFileWord = models.FileField(upload_to=dynamic_upload_to)  # , upload_to=""
+    standardTitle = models.CharField(max_length=200)
+    standardColor = models.CharField(max_length=200, blank=True)
+    uploaded_at = models.DateTimeField(auto_now_add=True)
+
+    def save(self, *args, **kwargs):
+        # Automatically set StdColor based on the associated Levels model's color
+        if self.levelID:
+            self.standardColor = self.levelID.levelColor
+        super().save(*args, **kwargs)
+
+    def __str__(self):
+        return self.standardTitle
+
+
+class FileEvent(models.Model):
+    EVENT_CHOICES = (
+        ("UPLOAD", "File Uploaded"),
+        ("DELETE", "File Deleted"),
+    )
+
+    event_type = models.CharField(max_length=10, choices=EVENT_CHOICES)
+    file_name = models.CharField(max_length=255)
+    timestamp = models.DateTimeField(auto_now_add=True)
+    fileStatus = models.CharField(default='Pending', max_length=255)
+
+    def __str__(self):
+        return f"{self.get_event_type_display()}: {self.file_name}"
--- a/data_api/serializers.py
+++ b/data_api/serializers.py
@ -0,0 +1,17 @@
+from rest_framework import serializers
+from .models import Levels, StandardsList, FileEvent
+
+class LevelSerializer(serializers.ModelSerializer):
+    class Meta:
+        model = Levels
+        fields = '__all__'
+
+class StandardsSerializer(serializers.ModelSerializer):
+    class Meta:
+        model = StandardsList
+        fields = '__all__'
+
+class FileEventSerializer(serializers.ModelSerializer):
+    class Meta:
+        model = FileEvent
+        fields = '__all__'
--- a/data_api/static/data/json/data.json
+++ b/data_api/static/data/json/data.json
@ -0,0 +1 @@
+[]
--- a/data_api/static/data/json_log.log
+++ b/data_api/static/data/json_log.log
@ -0,0 +1,35 @@
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - ERROR - Number of successed saved files: 0
+2023-11-09 16:17:19 - INFO - Number of successed saved files: 0
+2023-11-09 16:17:19 - INFO - Number of seactions: 0
+2023-11-09 16:17:19 - INFO - Number of seactions after cleaning: 0
+2023-11-09 16:17:19 - INFO - Number of cleaned seactions: 0
+2023-11-09 16:21:55 - INFO - /home/louai/Documents/BICC/IDDRS/iddrs_tool/iddrs_api/data_api/PreprocessFile.py changed, reloading.
--- a/admin_api/tests.py
+++ b/admin_api/tests.py
--- a/data_api/urls.py
+++ b/data_api/urls.py
@ -0,0 +1,25 @@
+from django.urls import path, include
+from rest_framework import routers
+from .views import LevelViewSet, StandardsViewSet, FileEventsViewSet
+from . import views
+
+router = routers.DefaultRouter()
+router.register(r'levels', LevelViewSet)
+router.register(r'standards', StandardsViewSet)
+router.register(r'fileEvents', FileEventsViewSet)
+
+urlpatterns = [
+    path('', include(router.urls)),
+
+    path('level-submit/', views.levelSubmit, name='level-submit'),
+    path('level-delete/', views.levelDelete, name='level-delete'),
+    
+    path('upload-standard/', views.upload_standard, name='upload-standard'),
+    path('standard-delete/', views.standardDelete, name='standard-delete'),
+
+    path('process-files/', views.processFiles, name='process-files'),
+
+    #path('api/', include('admin_api.api.urls')),
+
+
+]
--- a/data_api/views.py
+++ b/data_api/views.py
@ -0,0 +1,159 @@
+from rest_framework import viewsets
+from .models import Levels, StandardsList, FileEvent
+from .serializers import LevelSerializer, StandardsSerializer, FileEventSerializer
+from django.views.decorators.csrf import csrf_exempt
+import os
+from pathlib import Path
+from rest_framework.decorators import api_view, permission_classes
+from rest_framework.permissions import IsAuthenticated
+from rest_framework.response import Response
+from django.db.models import Max
+import shutil
+from .PreprocessFile import PreprocessFile
+from .CreateIndexES import CreateIndexES
+
+import logging
+
+logger = logging.getLogger(__name__) 
+
+# Create your views here.
+#@permission_classes([IsAuthenticated])
+class LevelViewSet(viewsets.ModelViewSet):
+    queryset = Levels.objects.all()
+    serializer_class = LevelSerializer
+
+#@permission_classes([IsAuthenticated])
+class StandardsViewSet(viewsets.ModelViewSet):
+    queryset = StandardsList.objects.all().order_by('levelNumber')
+    serializer_class = StandardsSerializer
+
+class FileEventsViewSet(viewsets.ModelViewSet):
+    queryset = FileEvent.objects.all().filter(fileStatus='Pending')
+    serializer_class = FileEventSerializer
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+# ################################################################
+# #######################Levels###################################
+# ################################################################
+def check_level_folder():
+    level_numbers = Levels.objects.values_list('levelNumber', flat=True)
+    for level_number in level_numbers:
+        level_folder = os.path.join(BASE_DIR,'static/data/Standards/'+str(level_number)+'/')
+        if not os.path.exists(level_folder):
+            os.makedirs(level_folder)
+
+@csrf_exempt
+@api_view(['POST'])
+def levelSubmit(request):
+    data = request.data['params']['editingRow']
+
+    if 'id' in data:
+        level = Levels.objects.get(id=data['id'])
+        serializer = LevelSerializer(instance=level, data=data)
+        if serializer.is_valid():
+            serializer.save()
+            check_level_folder()
+        
+    else:
+        newLevelNumber = Levels.objects.aggregate(Max('levelNumber'))['levelNumber__max']+1
+        data['levelNumber'] = newLevelNumber
+        serializer = LevelSerializer(data=data)
+        if serializer.is_valid():
+            serializer.save()
+            check_level_folder()
+
+    return Response('')
+
+@csrf_exempt
+@api_view(['POST'])
+def levelDelete(request):
+    data = request.data['params']['rowData']
+    level = Levels.objects.get(id=data['id'])
+    level.delete()
+    level_del_dir = os.path.join(BASE_DIR, 'static/data/Standards/'+str(data['levelNumber'])+'/')
+    shutil.rmtree(level_del_dir)
+
+    return Response('')
+
+# ################################################################
+# #######################Standards################################
+# ################################################################
+
+@csrf_exempt
+@api_view(['POST'])
+def upload_standard(request):
+    if request.method == 'POST':
+
+        level_number = request.data['selectedLevel']
+        pdfFile = request.FILES['pdfFile']
+        wordFile = request.FILES['wordFile']
+
+        standard_level_id = Levels.objects.filter(levelNumber=level_number).values_list('id', flat=True)[0]
+
+        standard = {
+            'levelID': standard_level_id,
+            'levelNumber': level_number,
+            'standardFilePDF': pdfFile,
+            'standardFileWord': wordFile,
+            'standardTitle': pdfFile.name.split('.pdf')[0]
+        }
+        serializer = StandardsSerializer(data=standard)
+        if serializer.is_valid():
+            serializer.save()
+            # Create a FileEvent for the upload
+            FileEvent.objects.create(event_type='UPLOAD', file_name=pdfFile.name.split('.pdf')[0])
+
+        else:
+            print('Invalid')
+
+    return Response('Done!')
+
+
+@csrf_exempt
+@api_view(['POST'])
+def standardDelete(request):
+    data = request.data['params']['stdData']
+    standard = StandardsList.objects.get(id = data['id'])
+    delete_file(standard.standardFilePDF.path)
+    delete_file(standard.standardFileWord.path)
+    standard.delete()
+    # Create a FileEvent for the delete
+    FileEvent.objects.create(event_type='DELETE', file_name=standard.standardFilePDF.name.split('.pdf')[0])
+
+
+    return Response('')
+
+
+
+def delete_file(file_path):
+    try:
+        os.remove(file_path)
+        print(f"File {file_path} deleted successfully.")
+    except FileNotFoundError:
+        print(f"File {file_path} not found.")
+    except PermissionError:
+        print(f"Permission error: Unable to delete {file_path}.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+@csrf_exempt
+@api_view(['POST'])
+def processFiles(request):
+    logger.info('Starting file Processing ...')
+    try:
+        process_files = PreprocessFile()
+        process_files.process_standards()
+        craetIndex = CreateIndexES()
+        craetIndex.createIndex()
+    except Exception as e:
+        logger.error(e)
+        raise
+    # delete all records in the FileEvents table
+    # FileEvent.objects.all().delete()
+    # set all records in FileEvents table in column indexed to true
+    FileEvent.objects.all().update(fileStatus='Indexed')
+    
+    
+    logger.info('File Processing completed')
+    return Response('Procssed successfully')
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/iddrs_api/pycache/settings.cpython-310.pyc
+++ b/iddrs_api/pycache/settings.cpython-310.pyc
--- a/iddrs_api/pycache/urls.cpython-310.pyc
+++ b/iddrs_api/pycache/urls.cpython-310.pyc
--- a/iddrs_api/settings.py
+++ b/iddrs_api/settings.py
@ -50,11 +50,11 @@ INSTALLED_APPS = [
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'rest_framework',
+    'rest_framework.authtoken',
    'rest_framework_simplejwt.token_blacklist',
    'django_filters',
    'search_tfidf',
-    'admin_api',
-    'user_auth',
+    'data_api',
    'corsheaders',
 ]

--- a/iddrs_api/urls.py
+++ b/iddrs_api/urls.py
@ -19,6 +19,5 @@ from django.urls import path, include
 urlpatterns = [
    path('admin/', admin.site.urls),
    path('client_api/', include('search_tfidf.urls')),
-    path('admin_api/', include('admin_api.urls')),
-    path('user_auth/', include('user_auth.urls')),
+    path('data_api/', include('data_api.urls')),
 ]
--- a/search_tfidf/pycache/PreprocessFile.cpython-310.pyc
+++ b/search_tfidf/pycache/PreprocessFile.cpython-310.pyc
--- a/search_tfidf/pycache/elasticSearch.cpython-310.pyc
+++ b/search_tfidf/pycache/elasticSearch.cpython-310.pyc
--- a/search_tfidf/pycache/tfidfSearch.cpython-310.pyc
+++ b/search_tfidf/pycache/tfidfSearch.cpython-310.pyc
--- a/search_tfidf/pycache/views.cpython-310.pyc
+++ b/search_tfidf/pycache/views.cpython-310.pyc
--- a/search_tfidf/elasticSearch.py
+++ b/search_tfidf/elasticSearch.py
@ -0,0 +1,71 @@
+from django.shortcuts import render
+from elasticsearch import Elasticsearch
+import os
+
+def build_search_query(phrase, min_score):
+
+  search_query = {
+    "size": 100,
+    "query": {
+      "multi_match": {
+        "query": phrase,
+        "fields": ["Paragraph", "Title"]  
+      }
+    },
+    "highlight": {
+         "fields": {
+             "Paragraph": {}
+         },
+         "pre_tags": [""],
+         "post_tags": [""],
+     },
+    "min_score": min_score
+  }
+
+  return search_query
+
+def eSearch (phrase):
+    # Set the password for connecting to Elasticsearch
+    ELASTIC_PASSWORD = "p-P7luUvrPggWrS4UQsy"
+    ca_certs="/etc/elasticsearch/certs/http_ca.crt"
+
+    # Get the Elasticsearch password from environment variable
+    # ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
+    # Get the CA certificates path from environment variable
+    # ca_certs = os.environ.get('CA_CERTS')
+    
+    # Create an Elasticsearch client instance to use for searching
+    # Connect to the local Elasticsearch instance on port 9200
+    # Use certificate authentication with the provided certificate
+    # Authenticate with the elastic user and the password set above
+    es = Elasticsearch(
+        "https://localhost:9200",
+        ca_certs=ca_certs,
+        basic_auth=("elastic", ELASTIC_PASSWORD)
+    )
+
+    # Search for products matching a specific category
+    number_of_hits = 0
+    min_score = 8
+    final_results = []
+    search_results = []
+    while number_of_hits == 0:
+        search_query = build_search_query(phrase, min_score)
+        search_results = es.search(index="iddrs", body=search_query)
+        number_of_hits = len(search_results["hits"]["hits"])
+        min_score = min_score - 1
+
+    # Process and display search results
+    for hit in search_results["hits"]["hits"]:
+        highlighted_texts = hit.get('highlight', {}).get('Paragraph', [])  # Use get() to avoid KeyError
+        original_paragraph = hit.get('_source', {}).get('Paragraph', [])
+        #print(highlighted_texts)
+        if highlighted_texts: # Check if highlight is not None
+            for highlighted_text in highlighted_texts:
+                original_paragraph = original_paragraph.replace(highlighted_text, f"<span style='background-color:#ffff00'>{highlighted_text}</span>")
+            hit["_source"]["Highlight"] = original_paragraph
+        else:
+            hit["_source"]["Highlight"] = []
+        final_results.append(hit["_source"])
+
+    return final_results , min_score + 1
--- a/search_tfidf/tests.py
+++ b/search_tfidf/tests.py
@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
--- a/search_tfidf/views.py
+++ b/search_tfidf/views.py
@ -6,6 +6,7 @@ from django.http import JsonResponse
 from django.views.decorators.csrf import csrf_exempt
 import json
 from .tfidfSearch import cosine_similarity
+from .elasticSearch import eSearch
 from rest_framework.decorators import api_view
 from pathlib import Path
 import os
@ -45,7 +46,8 @@ def get_input(request):
                return JsonResponse({"message": "Data received", "results":searchResults})

            else:
-                searchResults = cosine_similarity(phrase, title=False)
+                #searchResults = cosine_similarity(phrase, title=False)
+                searchResults = eSearch(phrase)
                return JsonResponse({"message": "Data received", "results":searchResults})


--- a/static/IDDRSStandards/1/IDDRS-1.10-Introduction-To-The-IDDRS.pdf
+++ b/static/IDDRSStandards/1/IDDRS-1.10-Introduction-To-The-IDDRS.pdf
--- a/static/IDDRSStandards/1/IDDRS-1.20-Glossary.pdf
+++ b/static/IDDRSStandards/1/IDDRS-1.20-Glossary.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.10-The-UN-Approach-To-DDR.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.10-The-UN-Approach-To-DDR.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.11-The-Legal-Framework-For-UNDDR.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.11-The-Legal-Framework-For-UNDDR.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.20-The-Politics-of-DDR.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.20-The-Politics-of-DDR.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.30-Community-Violence-Reduction.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.30-Community-Violence-Reduction.pdf
--- a/static/IDDRSStandards/2/IDDRS-2.40-Reintegration-as-Part-of-Sustaining-Peace.pdf
+++ b/static/IDDRSStandards/2/IDDRS-2.40-Reintegration-as-Part-of-Sustaining-Peace.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.10-Integrated-DDR-Planning-Processes-and-Structures.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.10-Integrated-DDR-Planning-Processes-and-Structures.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.20-DDR-Programme-Design.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.20-DDR-Programme-Design.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.30-National-Institutions-for-DDR.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.30-National-Institutions-for-DDR.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.40-Mission-and-Programme-Support-for-DDR.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.40-Mission-and-Programme-Support-for-DDR.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.41-Finance-and-Budgeting.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.41-Finance-and-Budgeting.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.42-Personnel-and-Staffing.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.42-Personnel-and-Staffing.pdf
--- a/static/IDDRSStandards/3/IDDRS-3.50-Monitoring-and-Evaluation-of-DDR-Programmes.pdf
+++ b/static/IDDRSStandards/3/IDDRS-3.50-Monitoring-and-Evaluation-of-DDR-Programmes.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.10-Disarmament.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.10-Disarmament.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.11-Transitional-Weapons-and-Ammunition-Management.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.11-Transitional-Weapons-and-Ammunition-Management.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.20-Demobilization.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.20-Demobilization.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.30-Reintegration.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.30-Reintegration.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.40-UN-Military-Roles-and-Responsibilities.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.40-UN-Military-Roles-and-Responsibilities.pdf
--- a/static/IDDRSStandards/4/IDDRS-4.50-Police-Roles-and-Responsibilities.pdf
+++ b/static/IDDRSStandards/4/IDDRS-4.50-Police-Roles-and-Responsibilities.pdf
--- a/Show More
+++ b/Show More