This repository has been archived by the owner on Oct 3, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsection1.py
109 lines (95 loc) · 4.4 KB
/
section1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 9 21:31:51 2023
@author: nivee
"""
# !pip install aiml
# !pip install wikipedia
# !pip install scikit-learn
# !pip install nltk
import wikipedia
import aiml
import sklearn
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import numpy as np
import json, requests
import nltk
from nltk.stem import WordNetLemmatizer
# Create a Kernel object. No string encoding (all I/O is unicode)
kern = aiml.Kernel()
kern.setTextEncoding(None)
kern.bootstrap(learnFiles="MakeUpBot.xml")
lemmatizer = WordNetLemmatizer()
print("Welcome to this Makeup chatbot. Please feel free to ask questions from me!")
while True:
#get user input
try:
userInput = input("> ")
except (KeyboardInterrupt, EOFError) as e:
print("Bye!")
break
#pre-process user input and determine response agent (if needed)
responseAgent = 'aiml'
#activate selected response agent
if responseAgent == 'aiml':
answer = kern.respond(userInput)
if answer[0] == '#':
params = answer[1:].split('$')
cmd = int(params[0])
if cmd == 0:
print(params[1])
break
#else:
elif cmd == 1:
#what is maybelline, may be cmd 3 not 2 -> 2 is how is weather i think
succeeded = False
api_url = r"https://makeup-api.herokuapp.com/api/v1/products.json?"
inputBrandProd = params[1]
inputBrandProd = inputBrandProd.split(" ")
response = requests.get(api_url + r"brand=" + inputBrandProd[0] + r"&product_type=" + inputBrandProd[1])
if response.status_code == 200:
response_json = json.loads(response.content)
if response_json:
name = response_json[0]['name']
prod_type = response_json[0]['product_type']
description = response_json[0]['description']
print('\n\bName:\t' , name,'\n\n\bDescription:\n\t', description, "\n")
succeeded = True
if not succeeded:
print("Sorry, I could not find an example for the brand and product you gave me")
elif cmd == 99:
try:
# --------- MAKE SURE TO CHANGE THE CODE IN HERE TO NOT PLAGARISE ---------'
df = pd.read_csv('QA.csv').dropna()
#inputArray = userInput.split()
# lemmatizer.lemmatize(userInput)
inputArray = [userInput]
counter = 0
#https://goodboychan.github.io/python/datacamp/natural_language_processing/2020/07/17/04-TF-IDF-and-similarity-scores.html
# Create TfidfVectorizer object
vectorizer = TfidfVectorizer()
#https://stackoverflow.com/questions/58240401/matching-phrase-using-tf-idf-and-cosine-similarity
#for x in range(len(inputArray)):
# temp = [inputArray[x]]
# print (temp, " = temp")
#LEMATISE THE QUESTIONS ARRAY FIRST
similarity_index_list = cosine_similarity(vectorizer.fit_transform(df["Question"]), vectorizer.transform(inputArray)).flatten()
output = df.loc[similarity_index_list.argmax(), "Answer"]
for x in similarity_index_list:
if x <= 0.1:
counter = counter + 1
#only printing the answer if there is a suitable similarity level
if counter == len(similarity_index_list):
print ("I'm sorry, I don't have an answer for that.")
else:
print (output)
# --------- MAKE SURE TO CHANGE THE CODE IN HERE TO NOT PLAGARISE ---------
except:
print("I did not get that, please try again.")
else:
print(answer)
else:
print ("aiml not needed")