Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disabling glove tests due to URL breakage #920

Merged
merged 2 commits into from
Aug 12, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 59 additions & 58 deletions test/experimental/test_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,61 +202,62 @@ def test_fast_text(self):
for word in expected_fasttext_simple_en.keys():
self.assertEqual(vectors_obj[word][:3], expected_fasttext_simple_en[word])
self.assertEqual(jit_vectors_obj[word][:3], expected_fasttext_simple_en[word])

def test_glove(self):
# copy the asset file into the expected download location
# note that this is just a zip file with the first 100 entries of the GloVe 840B dataset
asset_name = 'glove.840B.300d.zip'
asset_path = get_asset_path(asset_name)

with tempfile.TemporaryDirectory() as dir_name:
data_path = os.path.join(dir_name, asset_name)
shutil.copy(asset_path, data_path)
vectors_obj = GloVe(root=dir_name, validate_file=False)
jit_vectors_obj = torch.jit.script(vectors_obj)

# The first 3 entries in each vector.
expected_glove = {
'the': [0.27204, -0.06203, -0.1884],
'people': [-0.19686, 0.11579, -0.41091],
}

for word in expected_glove.keys():
self.assertEqual(vectors_obj[word][:3], expected_glove[word])
self.assertEqual(jit_vectors_obj[word][:3], expected_glove[word])

def test_glove_different_dims(self):
# copy the asset file into the expected download location
# note that this is just a zip file with 1 line txt files used to test that the
# correct files are being loaded
asset_name = 'glove.6B.zip'
asset_path = get_asset_path(asset_name)

with tempfile.TemporaryDirectory() as dir_name:
data_path = os.path.join(dir_name, asset_name)
shutil.copy(asset_path, data_path)

glove_50d = GloVe(name='6B', dim=50, root=dir_name, validate_file=False)
glove_100d = GloVe(name='6B', dim=100, root=dir_name, validate_file=False)
glove_200d = GloVe(name='6B', dim=200, root=dir_name, validate_file=False)
glove_300d = GloVe(name='6B', dim=300, root=dir_name, validate_file=False)
vectors_objects = [glove_50d, glove_100d, glove_200d, glove_300d]

# The first 3 entries in each vector.
expected_glove_50d = {
'the': [0.418, 0.24968, -0.41242],
}
expected_glove_100d = {
'the': [-0.038194, -0.24487, 0.72812],
}
expected_glove_200d = {
'the': [-0.071549, 0.093459, 0.023738],
}
expected_glove_300d = {
'the': [0.04656, 0.21318, -0.0074364],
}
expected_gloves = [expected_glove_50d, expected_glove_100d, expected_glove_200d, expected_glove_300d]

for vectors_obj, expected_glove in zip(vectors_objects, expected_gloves):
for word in expected_glove.keys():
self.assertEqual(vectors_obj[word][:3], expected_glove[word])

# TODO: reenable test once the GloVe dataset url starts working
# def test_glove(self):
# # copy the asset file into the expected download location
# # note that this is just a zip file with the first 100 entries of the GloVe 840B dataset
# asset_name = 'glove.840B.300d.zip'
# asset_path = get_asset_path(asset_name)

# with tempfile.TemporaryDirectory() as dir_name:
# data_path = os.path.join(dir_name, asset_name)
# shutil.copy(asset_path, data_path)
# vectors_obj = GloVe(root=dir_name, validate_file=False)
# jit_vectors_obj = torch.jit.script(vectors_obj)

# # The first 3 entries in each vector.
# expected_glove = {
# 'the': [0.27204, -0.06203, -0.1884],
# 'people': [-0.19686, 0.11579, -0.41091],
# }

# for word in expected_glove.keys():
# self.assertEqual(vectors_obj[word][:3], expected_glove[word])
# self.assertEqual(jit_vectors_obj[word][:3], expected_glove[word])

# def test_glove_different_dims(self):
# # copy the asset file into the expected download location
# # note that this is just a zip file with 1 line txt files used to test that the
# # correct files are being loaded
# asset_name = 'glove.6B.zip'
# asset_path = get_asset_path(asset_name)

# with tempfile.TemporaryDirectory() as dir_name:
# data_path = os.path.join(dir_name, asset_name)
# shutil.copy(asset_path, data_path)

# glove_50d = GloVe(name='6B', dim=50, root=dir_name, validate_file=False)
# glove_100d = GloVe(name='6B', dim=100, root=dir_name, validate_file=False)
# glove_200d = GloVe(name='6B', dim=200, root=dir_name, validate_file=False)
# glove_300d = GloVe(name='6B', dim=300, root=dir_name, validate_file=False)
# vectors_objects = [glove_50d, glove_100d, glove_200d, glove_300d]

# # The first 3 entries in each vector.
# expected_glove_50d = {
# 'the': [0.418, 0.24968, -0.41242],
# }
# expected_glove_100d = {
# 'the': [-0.038194, -0.24487, 0.72812],
# }
# expected_glove_200d = {
# 'the': [-0.071549, 0.093459, 0.023738],
# }
# expected_glove_300d = {
# 'the': [0.04656, 0.21318, -0.0074364],
# }
# expected_gloves = [expected_glove_50d, expected_glove_100d, expected_glove_200d, expected_glove_300d]

# for vectors_obj, expected_glove in zip(vectors_objects, expected_gloves):
# for word in expected_glove.keys():
# self.assertEqual(vectors_obj[word][:3], expected_glove[word])