-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgoes_helper.py
181 lines (140 loc) · 5.92 KB
/
goes_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import os
import numpy as np
def check_npy_files_for_nan(folder_path, output_txt_path):
nan_file_count = 0
not_nan_file_count = 0
nan_file_names = []
# Loop through files in the folder
for file_name in os.listdir(folder_path):
if file_name.endswith('.npy'):
file_path = os.path.join(folder_path, file_name)
# Load .npy file
try:
data = np.load(file_path)
# Check for NaN values
if np.isnan(data).any():
nan_file_count += 1
nan_file_names.append(file_name)
else:
not_nan_file_count += 1
except Exception as e:
print(f"Could not load {file_name}: {e}")
# Write names of files with NaNs to a text file
with open(output_txt_path, 'w') as f:
for name in nan_file_names:
f.write(name + '\n')
# Print counts of files
print(f"Files with NaN values: {nan_file_count}")
print(f"Files without NaN values: {not_nan_file_count}")
def delete_files_from_list(folder_path, txt_file_path):
with open(txt_file_path, 'r') as file:
file_names = file.readlines()
deleted_count = 0
not_found_count = 0
for file_name in file_names:
# Strip whitespace and join path
file_name = file_name.strip()
file_path = os.path.join(folder_path, file_name)
# Check if file exists, then delete
if os.path.exists(file_path):
os.remove(file_path)
deleted_count += 1
print(f"Deleted: {file_path}")
else:
not_found_count += 1
print(f"File not found: {file_path}")
# Summary
print(f"\nTotal files deleted: {deleted_count}")
print(f"Total files not found: {not_found_count}")
def calculate_mean_std(directory, output_file):
# Initialize arrays to store sums and counts for mean and std calculation
channel_sums = np.zeros(17)
channel_sums_squared = np.zeros(17)
total_count = 0
# Loop through all .npy files in the specified directory
for filename in os.listdir(directory):
if filename.endswith('.npy'):
# Load the .npy file
data = np.load(os.path.join(directory, filename))
# Ensure the data has the expected shape (C, H, W) for 17 channels
if data.shape[0] != 17:
print(f"Warning: {filename} does not have 17 channels, skipping.")
continue
# Calculate mean and std for this file
channel_sums += data.sum(axis=(1, 2)) # Sum across height and width
channel_sums_squared += (data ** 2).sum(axis=(1, 2))
total_count += data.shape[1] * data.shape[2] # H * W for each channel
# Calculate mean for each channel
means = channel_sums / total_count
# Calculate std for each channel
stds = np.sqrt((channel_sums_squared / total_count) - (means ** 2))
# Prepare output
with open(output_file, 'w') as f:
f.write("Channel\tMean\tStd\n")
for i in range(17):
f.write(f"{i + 1}\t{means[i]:.6f}\t{stds[i]:.6f}\n")
print(f"Mean and standard deviation calculated for {total_count} pixels across all channels.")
print(f"Results saved to {output_file}")
def normalise_data(folder_path):
max_min = [
[187, 260], # Band 8
[181, 270], # Band 9
[171, 277], # Band 10
[181, 323], # Band 11
[181, 330], # Band 13
[172, 330] # Band 14
]
bands = [7, 8, 9, 10, 12, 13] # Adjusted to reflect 0-based indexing
for file_name in os.listdir(folder_path):
if file_name.endswith('.npy'):
file_path = os.path.join(folder_path, file_name)
try:
# Load the .npy file
data = np.load(file_path)
# Ensure data shape is (17, 500, 500)
if data.shape != (17, 500, 500):
print(f"Skipping {file_name}: Unexpected shape {data.shape}.")
continue
# Normalize the specified bands
for j, i in enumerate(bands):
# Apply min-max normalization
data[i] = (data[i] - max_min[j][0]) / (max_min[j][1] - max_min[j][0])
# Clip values to keep them within [0, 1]
data[i] = np.clip(data[i], 0, 1)
# Save the normalized data back to the .npy file
if data.shape != (17, 500, 500):
print('error, stopping before saving')
break
else:
np.save(file_path, data)
print(f"Normalized {file_name}.")
except Exception as e:
print(f"Could not process {file_name}: {e}")
def split_npy_files(directory, train_file='train_goes.txt', val_file='val_goes.txt', val_split=0.1):
# Find all .npy files in the directory and subdirectories
npy_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.npy'):
npy_files.append(os.path.join(root, file))
# Sort the list to ensure consistency
npy_files.sort()
# Determine the split point
split_index = int(len(npy_files) * (1 - val_split))
# Split the files into training and validation sets
train_files = npy_files[:split_index]
val_files = npy_files[split_index:]
# Write training filenames to train_goes.txt
with open(train_file, 'w') as f:
for file in train_files:
f.write(file + '\n')
# Write validation filenames to val_goes.txt
with open(val_file, 'w') as f:
for file in val_files:
f.write(file + '\n')
print(f"Training files written to {train_file}")
print(f"Validation files written to {val_file}")
if __name__ == '__main__':
directory = '/u/mickellals/Datasets/vis_data_subset'
split_npy_files(directory)
print("Done")