-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataset_reduction.py
61 lines (46 loc) · 1.49 KB
/
Dataset_reduction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -*- coding: utf-8 -*-
"""
This file reduces the size of a dataset and
creates a new file containing the reduced
dataset. This dataset is randomized each time.
-----------------------------------------------
Created on Wed Sep 4 01:34:45 2024
-----------------------------------------------
@author: Andrew Francey
"""
import pickle
import os
import random as rand
## The datapath of the dataset to reduce.
datapath = os.path.dirname(__file__) + '\\Dataset\\64images.pickle'
## Loads in the pickled dataset.
with open(datapath, 'rb') as datafile:
data = pickle.load(datafile)
datafile.close()
## Precentage to reduce the dataset by.
reduction = int(input("Percentage to reduce dataset by: "))/100
## Size of the dataset.
data_size = len(data)
## Size of the reduced dataset
new_size = int(data_size*(1-reduction))
print(new_size)
## Initialize the new data list and a list to store the used indices.
new_data = []
used_index = []
## Pick random unused data points from the original dataset.
while len(new_data) < new_size:
i = int(rand.uniform(0, data_size))
if not(i in used_index):
new_data.append(data[i])
used_index.append(i)
## Determine what to name this new dataset.
i = 0
DatasetExists = True
while DatasetExists:
DatasetPath = datapath[:-7] + 'Reduced' + str(i) + datapath[-7:]
DatasetExists = os.path.exists(DatasetPath)
i += 1
## Save the new dataset.
with open(DatasetPath, 'wb') as newfile:
pickle.dump(new_data, newfile)
newfile.close()