-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathnltk_utils.py
36 lines (32 loc) · 1.14 KB
/
nltk_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import nltk
from typing import List
import os
import ssl
def ensure_nltk_resources() -> None:
"""Ensure all required NLTK resources are downloaded."""
try:
# Handle SSL certificate verification issues
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
# Define required resources
required_resources = [
'punkt',
'stopwords',
'averaged_perceptron_tagger',
'punkt_tab'
]
# Download all required resources
for resource in required_resources:
try:
nltk.data.find(f'tokenizers/{resource}')
print(f"Found {resource}")
except LookupError:
print(f"Downloading {resource}...")
nltk.download(resource, quiet=True)
print(f"Successfully downloaded {resource}")
except Exception as e:
raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")