Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

diff: support time-series files with different resolutions #1268

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions src/mintpy/cli/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
diff.py timeseries_ERA5_ramp_demErr.h5 ../GIANT/Stack/LS-PARAMS.h5 -o mintpy_giant.h5
diff.py reconUnwrapIfgram.h5 ./inputs/ifgramStack.h5 -o diffUnwrapIfgram.h5

# different resolutions
diff.py timeseries.h5 inputs/SET.h5 -o timeseries_SET.h5
diff.py timeseries_SET.h5 ion.h5 -o timeseries_SET_ion.h5

# different file types
diff.py filt_20220905_20230220.unw ./inputs/ERA5.h5 -o filt_20220905_20230220_ERA5.unw
diff.py timeseries.h5 ./inputs/ITRF14.h5 -o timeseries_ITRF14.h5
Expand Down
41 changes: 38 additions & 3 deletions src/mintpy/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import time

import numpy as np
from skimage.transform import resize

from mintpy.objects import (
IFGRAM_DSET_NAMES,
Expand Down Expand Up @@ -57,6 +58,7 @@ def check_reference(atr1, atr2):

def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8):
"""Calculate the difference between two time-series files.
file1.shape and file2.shape are different.

Parameters: file1 - str, path of file1
file2 - str, path of file2
Expand All @@ -71,6 +73,8 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)
atr2 = readfile.read_attribute(file2)
k1 = atr1['FILE_TYPE']
k2 = atr2['FILE_TYPE']
length1, width1 = int(atr1['LENGTH']), int(atr1['WIDTH'])
length2, width2 = int(atr2['LENGTH']), int(atr2['WIDTH'])
date_list1 = timeseries(file1).get_date_list()
if k2 == 'timeseries':
date_list2 = timeseries(file2).get_date_list()
Expand All @@ -79,8 +83,29 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)
date_list2 = giantTimeseries(file2).get_date_list()
unit_fac = 0.001

# check reference point
# check file size
different_size = False
if length1 != length2 or width1 != width2:
different_size = True
kwargs = dict(
output_shape=(length1, width1),
order=1,
mode='constant',
anti_aliasing=True,
preserve_range=True,
)
print('WARNING: file 1/2 have different sizes:')
print(f' file 1: ({atr1["LENGTH"]}, {atr1["WIDTH"]})')
print(f' file 2: ({atr2["LENGTH"]}, {atr2["WIDTH"]})')
if different_size and not force_diff:
raise Exception('To enforce the differencing anyway, use --force option.')

# check reference date / point
ref_date, ref_y, ref_x = check_reference(atr1, atr2)
if ref_date:
ref_data = readfile.read(file2, datasetName=ref_date, resize2shape=(length1, width1))[0]
if different_size:
ref_data = resize(ref_data, **kwargs)

# check dates shared by two timeseries files
date_list_shared = [i for i in date_list1 if i in date_list2]
Expand All @@ -95,12 +120,22 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)
else:
raise Exception('To enforce the differencing anyway, use --force option.')

# get reference matrix
if ref_y and ref_x:
ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1)
ref_val = readfile.read(file2, datasetName=date_list_shared, box=ref_box)[0] * unit_fac
ref_val = []
for date in date_list_shared:
ref_val.append(readfile.read(file2, datasetName=date, box=ref_box,resize2shape=(length1, width1))[0])
ref_val = np.array(ref_val)* unit_fac
else:
ref_val = None

# resample data2
data2_resample = []
for date in date_list_shared:
data2_resample.append(readfile.read(file2, datasetName=date, resize2shape=(length1, width1))[0])
data2_resample = np.array(data2_resample)* unit_fac

# instantiate the output file
writefile.layout_hdf5(out_file, ref_file=file1)

Expand All @@ -121,7 +156,7 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)

# read data2 (consider different reference_date/pixel)
print(f'read from file: {file2}')
data2 = readfile.read(file2, datasetName=date_list_shared, box=box)[0] * unit_fac
data2 = data2_resample[:,box[1]:box[3],box[0]:box[2]]

if ref_val is not None:
print(f'* referencing data from {os.path.basename(file2)} to y/x: {ref_y}/{ref_x}')
Expand Down
41 changes: 33 additions & 8 deletions src/mintpy/utils/readfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import h5py
import numpy as np
from numpy.typing import DTypeLike
from skimage.transform import resize

from mintpy.objects import (
DSET_UNIT_DICT,
Expand Down Expand Up @@ -317,14 +318,17 @@ def gdal_to_numpy_dtype(gdal_dtype: Union[str, int]) -> np.dtype:

#########################################################################
def read(fname, box=None, datasetName=None, print_msg=True, xstep=1, ystep=1, data_type=None,
no_data_values=None):
resize2shape=None, no_data_values=None):
"""Read one dataset and its attributes from input file.

Parameters: fname - str, path of file to read
datasetName - str or list of str, slice names
box - 4-tuple of int area to read, defined in (x0, y0, x1, y1) in pixel coordinate
as defined in the (resized) shape.
x/ystep - int, number of pixels to pick/multilook for each output pixel
data_type - numpy data type, e.g. np.float32, np.bool_, etc. Change the output data type
resize2shape - tuple of 2 int, resize the native matrix to the given shape,
set to None for not resizing
no_data_values - list of 2 numbers, change the no-data-value in the output
Returns: data - 2/3/4D matrix in numpy.array format, return None if failed
atr - dictionary, attributes of data, return None if failed
Expand All @@ -340,6 +344,7 @@ def read(fname, box=None, datasetName=None, print_msg=True, xstep=1, ystep=1, da
data, atr = readfile.read('geometryRadar.h5', datasetName='bperp')
data, atr = readfile.read('100120-110214.unw', box=(100,1100, 500, 2500))
"""
vprint = print if print_msg else lambda *args, **kwargs: None
fname = os.fspath(fname) # Convert from possible pathlib.Path
# metadata
dsname4atr = None #used to determine UNIT
Expand All @@ -349,36 +354,56 @@ def read(fname, box=None, datasetName=None, print_msg=True, xstep=1, ystep=1, da
dsname4atr = datasetName.split('-')[0]
atr = read_attribute(fname, datasetName=dsname4atr)

# box
# check: box & resize2shape
length, width = int(atr['LENGTH']), int(atr['WIDTH'])
# ignore resize arg if it is the same as the original shape
if resize2shape and resize2shape == (length, width):
resize2shape = None
if not box:
box = (0, 0, width, length)
box2read = (0, 0, width, length) if resize2shape else box

# read data
kwargs = dict(
datasetName=datasetName,
box=box,
box=box2read,
xstep=xstep,
ystep=ystep,
)

fext = os.path.splitext(os.path.basename(fname))[1].lower()
if fext in ['.h5', '.he5']:
data = read_hdf5_file(fname, print_msg=print_msg, **kwargs)

else:
data, atr = read_binary_file(fname, **kwargs)

# resize
if resize2shape:
# link: https://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.resize
data = resize(
data,
output_shape=resize2shape,
order=1,
mode='constant',
anti_aliasing=True,
preserve_range=True,
)

# subset by box
if tuple(box) != (0, 0, width, length):
data = data[
box[1]:box[3],
box[0]:box[2],
]

# customized output data type
if data_type is not None and data_type != data.dtype:
if print_msg:
print(f'convert numpy array from {data.dtype} to {data_type}')
vprint(f'convert numpy array from {data.dtype} to {data_type}')
data = np.array(data, dtype=data_type)

# convert no-data-value
if isinstance(no_data_values, list):
if print_msg:
print(f'convert no-data-value from {no_data_values[0]} to {no_data_values[1]}')
vprint(f'convert no-data-value from {no_data_values[0]} to {no_data_values[1]}')
data[data == no_data_values[0]] = no_data_values[1]

return data, atr
Expand Down