-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdatekb.py
87 lines (76 loc) · 2.42 KB
/
updatekb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
@author : Himanshu Mittal
@contact : https://www.linkedin.com/in/himanshumittal13/
@github : https://github.com/HimanshuMittal01
@created on: 07-10-2023 17:46:25
"""
import weaviate
import json
if __name__ == "__main__":
# Create client
client = weaviate.Client(
url = "http://0.0.0.0:8080",
additional_headers = {}
)
# Recreate full database from scratch
client.schema.delete_class("Circular")
client.schema.delete_class("Article")
# Create database for circulars
circular_obj_config = {
"class": "Circular",
"vectorizer": "text2vec-transformers",
"moduleConfig": {
"text2vec-openai": {},
"generative-openai": {}
}
}
client.schema.create_class(circular_obj_config)
# Create database for articles
circular_obj_config = {
"class": "Article",
"vectorizer": "text2vec-transformers",
"moduleConfig": {
"text2vec-openai": {},
"generative-openai": {}
}
}
client.schema.create_class(circular_obj_config)
# Load and add batches to weaviate [CIRCULAR]
data = None
with open("input/circulars.json") as f:
data = json.load(f)
# Configure batch
client.batch.configure(batch_size=100)
# Initialize a batch process
with client.batch as batch:
for i, d in enumerate(data["active_circulars"]):
print(f"importing question: {i+1}")
properties = {
"circular_id": d["id"],
"description": d["description"],
"date": d["date"],
}
batch.add_data_object(
data_object=properties,
class_name="Circular"
)
# Load and add batches to weaviate [ARTICLE]
data = None
with open("input/articles.json") as f:
data = json.load(f)
# Configure batch
client.batch.configure(batch_size=100)
# Initialize a batch process
with client.batch as batch:
for d in data["constitution_of_india"]:
print(f"importing article: {d['id']}")
properties = {
"article_id": d["id"],
"name": d["name"],
"part": d["part"],
"description": d["description"],
}
batch.add_data_object(
data_object=properties,
class_name="Article"
)