-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQuery-WD (1).py
126 lines (100 loc) · 3.89 KB
/
Query-WD (1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import json
from ibm_watson import DiscoveryV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
# Replace with your IBM Cloud API key and endpoint
api_key = '0IipvUSqpZb-IiZS13\6gfC'
service_url = 'https://api.us-south.discovery.watson.cloud.ibm.com/instances/04a74a82-53cb-\d195459'
# Specify your project ID
project_id = 'b499c3a3-53e9-43e0'
# Initialize the Watson Discovery service
authenticator = IAMAuthenticator(api_key)
discovery = DiscoveryV2(
version='2021-04-30',
authenticator=authenticator
)
discovery.set_service_url(service_url)
#Function to retrieve the collection ID within a project
def get_collection_id(project_id):
try:
response = discovery.list_collections(
project_id=project_id
).get_result()
# Assuming you have only one collection in the project, you can get the ID like this
collection_id = response['collections'][0]['collection_id']
print(collection_id)
return collection_id
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
# Function to retrieve a list of document IDs in a collection
def list_documents_in_collection(collection_id):
document_ids = []
try:
response = discovery.list_documents(
project_id=project_id,
collection_id=collection_id
).get_result()
print(response)
documents = response.get('documents', [])
print(documents)
for document in documents:
print(document['document_id'])
document_ids.append(document['document_id'])
return document_ids
except Exception as e:
print(f"An error occurred: {str(e)}")
return []
# Function to check document content by ID
def check_document_content(project_id, collection_id,document_id):
try:
response = discovery.get_document(
project_id=project_id,
collection_id=collection_id,
document_id=document_id
).get_result()
if response.get('status') == 'available':
document = discovery.get_document(
project_id=project_id,
collection_id=collection_id,
document_id=document_id
).get_result()
# Print the document content
print(json.dumps(document, indent=2))
else:
print(f"Document status: {response.get('status')}")
except Exception as e:
print(f"An error occurred: {str(e)}")
def retrieve_document_by_id(project_id, document_id):
try:
# Construct a query that filters by document ID
query = f'document_id:{document_id}'
response = discovery.query(
project_id=project_id,
query=query
).get_result()
# Check if any documents were found
if response.get('matching_results', 0) > 0:
# The document content can be found in the 'text' field of the first result
document_content = response['results'][0]['text']
return document_content
else:
print(f"No document found with ID: {document_id}")
return None
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
## Get the collection ID within the project
collection_id = get_collection_id(project_id)
if collection_id:
# Get a list of document IDs in the collection
document_ids = list_documents_in_collection(collection_id)
# Check content for each document
for doc_id in document_ids:
print(f"Checking document with ID: {doc_id}")
print(collection_id)
check_document_content(project_id,collection_id, doc_id)
# Get the content of a document by its ID
document_content = retrieve_document_by_id(project_id,doc_id)
if document_content:
# Print or use the document content as needed
print(document_content)