first pushh

This commit is contained in:
louai98 2023-05-12 13:41:17 +02:00
commit 38f2b96ea1
58 changed files with 365027 additions and 0 deletions

0
README.md Normal file
View File

BIN
db.sqlite3 Normal file

Binary file not shown.

0
iddrs_api/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

16
iddrs_api/asgi.py Normal file
View File

@ -0,0 +1,16 @@
"""
ASGI config for iddrs_api project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/4.1/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'iddrs_api.settings')
application = get_asgi_application()

136
iddrs_api/settings.py Normal file
View File

@ -0,0 +1,136 @@
"""
Django settings for iddrs_api project.
Generated by 'django-admin startproject' using Django 4.1.3.
For more information on this file, see
https://docs.djangoproject.com/en/4.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/4.1/ref/settings/
"""
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/4.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-t7_=(ogb*qe%wll00g&*tr#7542!qe*jxwg#2t*bw0!4pm&vi='
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# In this particular case, it is allowing cross-origin requests from
# the specified origin http://localhost:3000, which is the default origin
# for a locally served React application during development.
CORS_ALLOWED_ORIGINS = [
'http://localhost:3000',
]
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'rest_framework',
'django_filters',
'search_tfidf',
'corsheaders',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.common.CommonMiddleware',
]
ROOT_URLCONF = 'iddrs_api.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'iddrs_api.wsgi.application'
# Database
# https://docs.djangoproject.com/en/4.1/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
}
}
# Password validation
# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/4.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/4.1/howto/static-files/
STATIC_URL = 'static/'
# Default primary key field type
# https://docs.djangoproject.com/en/4.1/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'

22
iddrs_api/urls.py Normal file
View File

@ -0,0 +1,22 @@
"""iddrs_api URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/4.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path, include
urlpatterns = [
path('admin/', admin.site.urls),
path('', include('search_tfidf.urls')),
]

16
iddrs_api/wsgi.py Normal file
View File

@ -0,0 +1,16 @@
"""
WSGI config for iddrs_api project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/4.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'iddrs_api.settings')
application = get_wsgi_application()

22
manage.py Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'iddrs_api.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
[]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
[]

1080
media/usersResults/cvr.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
[]

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
[]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
[]

File diff suppressed because one or more lines are too long

0
search_tfidf/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

3
search_tfidf/admin.py Normal file
View File

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

6
search_tfidf/apps.py Normal file
View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class SearchTfidfConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'search_tfidf'

View File

@ -0,0 +1,93 @@
# Generated by Django 4.1.3 on 2023-04-28 13:01
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='FilteredResults',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('session_key', models.CharField(blank=True, max_length=17, null=True)),
('uniqueID', models.CharField(blank=True, max_length=200, null=True)),
('session_expiry_date', models.DateField(blank=True, null=True)),
('index', models.IntegerField(blank=True, null=True)),
('level', models.IntegerField(blank=True, null=True)),
('levelName', models.CharField(max_length=200)),
('title', models.CharField(max_length=200)),
('paragraph', models.TextField(blank=True, null=True)),
('color', models.CharField(max_length=10)),
('module', models.CharField(max_length=200)),
('heading1', models.CharField(max_length=200)),
('heading2', models.CharField(max_length=200)),
('heading3', models.CharField(max_length=200)),
('heading4', models.CharField(max_length=200)),
('pageNumber', models.IntegerField(blank=True, null=True)),
('sentence', models.CharField(blank=True, max_length=500, null=True)),
('shall', models.BooleanField(default=False)),
('should', models.BooleanField(default=False)),
('may', models.BooleanField(default=False)),
('must', models.BooleanField(default=False)),
('can', models.BooleanField(default=False)),
],
),
migrations.CreateModel(
name='Level',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('levelNumber', models.IntegerField()),
('levelName', models.CharField(max_length=200)),
('levelColor', models.CharField(default='#000000', max_length=10)),
],
),
migrations.CreateModel(
name='SearchResults',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('session_key', models.CharField(blank=True, max_length=17, null=True)),
('uniqueID', models.CharField(blank=True, max_length=200, null=True)),
('session_expiry_date', models.DateField(blank=True, null=True)),
('score', models.FloatField(blank=True, null=True)),
('index', models.IntegerField(blank=True, null=True)),
('level', models.IntegerField(blank=True, null=True)),
('levelName', models.CharField(max_length=200)),
('title', models.CharField(max_length=200)),
('paragraph', models.TextField(blank=True, null=True)),
('color', models.CharField(max_length=10)),
('module', models.CharField(max_length=200)),
('heading1', models.CharField(max_length=200)),
('heading2', models.CharField(max_length=200)),
('heading3', models.CharField(max_length=200)),
('heading4', models.CharField(max_length=200)),
('pageNumber', models.IntegerField(blank=True, null=True)),
('sentence', models.CharField(blank=True, max_length=500, null=True)),
('shall', models.BooleanField(default=False)),
('should', models.BooleanField(default=False)),
('may', models.BooleanField(default=False)),
('must', models.BooleanField(default=False)),
('can', models.BooleanField(default=False)),
],
),
migrations.CreateModel(
name='Standards',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('standardFile', models.FileField(blank=True, null=True, upload_to='')),
('standardLevel', models.IntegerField(blank=True, null=True)),
('standardTitle', models.CharField(max_length=200)),
('standardNumber', models.CharField(max_length=10)),
('standardPath', models.CharField(blank=True, max_length=200, null=True)),
('revision', models.BooleanField()),
('paragraph', models.TextField(blank=True, null=True)),
('levelID', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='search_tfidf.level')),
],
),
]

View File

76
search_tfidf/models.py Normal file
View File

@ -0,0 +1,76 @@
from django.db import models
# Create your models here.
class Level(models.Model):
levelNumber = models.IntegerField()
levelName = models.CharField(max_length=200)
levelColor = models.CharField(max_length=10, default='#000000')
def __str__(self):
return str(self.levelNumber)
class Standards(models.Model):
standardFile = models.FileField(blank=True, null=True) #, upload_to="iddrs2/static/IDDRSStandards"
standardLevel = models.IntegerField(blank=True, null=True)
standardTitle = models.CharField(max_length=200)
standardNumber = models.CharField(max_length=10)
standardPath = models.CharField(max_length=200,blank=True,null=True)
revision = models.BooleanField()
paragraph = models.TextField(blank=True,null=True)
levelID = models.ForeignKey(Level, on_delete=models.CASCADE, blank=True, null=True)
def __str__(self):
return self.standardNumber
class SearchResults(models.Model):
session_key = models.CharField(max_length=17,blank=True, null=True)
uniqueID = models.CharField(max_length=200,blank=True, null=True)
session_expiry_date = models.DateField(blank=True, null=True)
score = models.FloatField(blank=True, null=True)
index = models.IntegerField(blank=True, null=True)
level = models.IntegerField(blank=True, null=True)
levelName = models.CharField(max_length=200)
title = models.CharField(max_length=200)
paragraph = models.TextField(blank=True,null=True)
color = models.CharField(max_length=10)
module = models.CharField(max_length=200)
heading1 = models.CharField(max_length=200)
heading2 = models.CharField(max_length=200)
heading3 = models.CharField(max_length=200)
heading4 = models.CharField(max_length=200)
pageNumber = models.IntegerField(blank=True, null=True)
sentence = models.CharField(max_length=500,blank=True,null=True)
shall = models.BooleanField(default=False)
should = models.BooleanField(default=False)
may = models.BooleanField(default=False)
must = models.BooleanField(default=False)
can = models.BooleanField(default=False)
def __str__(self):
return self.heading1
class FilteredResults(models.Model):
session_key = models.CharField(max_length=17,blank=True, null=True)
uniqueID = models.CharField(max_length=200,blank=True, null=True)
session_expiry_date = models.DateField(blank=True, null=True)
index = models.IntegerField(blank=True, null=True)
level = models.IntegerField(blank=True, null=True)
levelName = models.CharField(max_length=200)
title = models.CharField(max_length=200)
paragraph = models.TextField(blank=True,null=True)
color = models.CharField(max_length=10)
module = models.CharField(max_length=200)
heading1 = models.CharField(max_length=200)
heading2 = models.CharField(max_length=200)
heading3 = models.CharField(max_length=200)
heading4 = models.CharField(max_length=200)
pageNumber = models.IntegerField(blank=True, null=True)
sentence = models.CharField(max_length=500,blank=True,null=True)
shall = models.BooleanField(default=False)
should = models.BooleanField(default=False)
may = models.BooleanField(default=False)
must = models.BooleanField(default=False)
can = models.BooleanField(default=False)
def __str__(self):
return self.heading1

View File

@ -0,0 +1,12 @@
from rest_framework import serializers
from .models import Level, Standards
class LevelSerializer(serializers.ModelSerializer):
class Meta:
model = Level
fields = '__all__'
class StandardsSerializer(serializers.ModelSerializer):
class Meta:
model = Standards
fields = '__all__'

3
search_tfidf/tests.py Normal file
View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

116
search_tfidf/tfidfSearch.py Normal file
View File

@ -0,0 +1,116 @@
'''This is a Python script that reads data from a JSON file, performs cosine similarity
analysis on the data based on a given phrase, and outputs the results as another JSON file. '''
'''The script imports necessary modules including pandas, numpy, sklearn, nltk, itemgetter, json, and os.'''
import pandas as pd
from pathlib import Path
import os
import json
import numpy as np
import numpy.linalg as LA
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from nltk.corpus import stopwords
from operator import itemgetter
'''A list of stop words is created using the nltk.corpus.stopwords module.'''
stopWords = stopwords.words('english')
'''The CountVectorizer and TfidfTransformer objects are instantiated
from the sklearn.feature_extraction.text module.'''
vectorizer = CountVectorizer(stop_words = stopWords)
transformer = TfidfTransformer()
'''The BASE_DIR variable is assigned the absolute path of the parent directory of the current file.'''
BASE_DIR = Path(__file__).resolve().parent.parent
'''The readfile() function reads a JSON file and returns a Pandas DataFrame object containing the data.'''
def readfile():
processedData = pd.read_json(os.path.join(BASE_DIR, 'static/json/data2.json'))
df = pd.DataFrame(processedData)
return df
'''The cosine_similarity() function takes in two arguments:
a phrase and a Boolean value indicating whether the phrase refers to a title of a module or a sentence.'''
def cosine_similarity(phrase, title):
'''The function reads data from the JSON file using the readfile() function,
extracts the relevant column containing sentences or titles depending on the value of the Boolean input,
and calculates the cosine similarity score between the input phrase and each sentence or title in the data
using the CountVectorizer and TfidfTransformer objects.'''
data = readfile()
docs = list()
if title == True:
docs = data['Module'].tolist()
else:
docs = data['Sentence'].tolist()
query = [phrase]
trainVectorizerArray = vectorizer.fit_transform(docs).toarray()
testVectorizerArray = vectorizer.transform(query).toarray()
cx = lambda a, b : round(np.inner(a, b)/(LA.norm(a)*LA.norm(b)), 6)
cosine_scores = []
for i, vector in enumerate(trainVectorizerArray):
for testV in testVectorizerArray:
cosine = cx(vector, testV)
cosine_scores.append((i, cosine))
'''The resulting cosine similarity scores are sorted in descending order and filtered to remove scores less than 0.2.'''
cosine_scores.sort(key=itemgetter(1),reverse=True)
filtered_cosine_scores = list(filter(lambda x: x[1] > 0.2, cosine_scores))
'''A new Pandas DataFrame is created to store the filtered results.
This DataFrame contains several columns extracted from the original data
based on the index of the highest cosine similarity score for each sentence or title.'''
paragraphIDs = []
dff = pd.DataFrame(columns=['Score','Index', 'Paragraph', 'Color', 'Level', 'LevelName',
'Title', 'Module', 'PageNum', 'Heading1', 'Heading2', 'Heading3', 'Heading4', 'Sentence','Shall','Should','May','Must','Can'])
for result in filtered_cosine_scores:
if data['ParagraphID'].iloc[result[0]] in paragraphIDs:
pass
else:
dff.loc[result[0], 'Score'] = result[1]
dff.loc[result[0], 'Index'] = result[0]
dff.loc[result[0], 'Level'] = data['Level'].iloc[result[0]]
dff.loc[result[0], 'LevelName'] = data['LevelName'].iloc[result[0]]
dff.loc[result[0], 'Title'] = data['Title'].iloc[result[0]]
dff.loc[result[0], 'Paragraph'] = data['Paragraph'].iloc[result[0]]
dff.loc[result[0], 'Color'] = data['Color'].iloc[result[0]]
dff.loc[result[0], 'Module'] = data['Module'].iloc[result[0]]
dff.loc[result[0], 'Heading1'] =data['Heading1'].iloc[result[0]]
dff.loc[result[0], 'Heading2'] =data['Heading2'].iloc[result[0]]
dff.loc[result[0], 'Heading3'] =data['Heading3'].iloc[result[0]]
dff.loc[result[0], 'Heading4'] =data['Heading4'].iloc[result[0]]
dff.loc[result[0], 'PageNum'] =data['PageNum'].iloc[result[0]]
dff.loc[result[0], 'Sentence'] =data['Sentence'].iloc[result[0]]
dff.loc[result[0], 'Shall'] =data['Shall'].iloc[result[0]]
dff.loc[result[0], 'Should'] =data['Should'].iloc[result[0]]
dff.loc[result[0], 'May'] =data['May'].iloc[result[0]]
dff.loc[result[0], 'Must'] =data['Must'].iloc[result[0]]
dff.loc[result[0], 'Can'] =data['Can'].iloc[result[0]]
paragraphIDs.append(data['ParagraphID'].iloc[result[0]])
'''The DataFrame is sorted based on the level of the paragraph (Level column)
and the cosine similarity score (Score column) in ascending and descending order respectively.'''
dff['Level'] = pd.Categorical(dff.Level, categories=[2,4,5,6,3])
dff = dff.sort_values(['Level','Score'], ascending=[True, False])
'''The resulting DataFrame is converted to a JSON object and written to a new JSON file named after the input phrase.'''
results = dff.reset_index().to_json(orient ='records')
results = json.loads(results)
with open(os.path.join(BASE_DIR, 'media/usersResults/'+phrase+'.json'), 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=True, indent=4)
'''The results are returned from the function.'''
return results

14
search_tfidf/urls.py Normal file
View File

@ -0,0 +1,14 @@
from django.urls import path, include
from rest_framework import routers
from .views import LevelViewSet, StandardsViewSet
from . import views
router = routers.DefaultRouter()
router.register(r'levels', LevelViewSet)
router.register(r'standards', StandardsViewSet)
urlpatterns = [
path('', include(router.urls)),
path('get_input/', views.get_input, name='get_input'),
]

40
search_tfidf/views.py Normal file
View File

@ -0,0 +1,40 @@
from django.shortcuts import render
from rest_framework import viewsets
from .models import Level, Standards
from .serializer import LevelSerializer, StandardsSerializer
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
from .tfidfSearch import cosine_similarity
# Create your views here.
class LevelViewSet(viewsets.ModelViewSet):
queryset = Level.objects.all()
serializer_class = LevelSerializer
class StandardsViewSet(viewsets.ModelViewSet):
queryset = Standards.objects.all()
serializer_class = StandardsSerializer
# get the submetted search phrase from the front-end
@csrf_exempt
def get_input(request):
if request.method == "POST":
# data is a json object that holds the searched phrase in a key
# called phrase
data = json.loads(request.body.decode('utf-8'))
if data is not None:
phrase = data['data']['phrase']
print(phrase)
if phrase[0] == '"' and phrase[-1] == '"':
phrase = phrase[1:-1]
searchResults = cosine_similarity(phrase, title=True)
return JsonResponse({"message": "Data received", "results":searchResults})
else:
searchResults = cosine_similarity(phrase, title=False)
return JsonResponse({"message": "Data received", "results":searchResults})
return JsonResponse({"message": "Invalid request"})

241254
static/json/data2.json Normal file

File diff suppressed because one or more lines are too long