Skip to content

Commit

Permalink
Bug/sc 458619/allow negative values on most common approx (#159)
Browse files Browse the repository at this point in the history
Co-authored-by: cayetanobv <[email protected]>
  • Loading branch information
rantolin and cayetanobv authored Jan 14, 2025
1 parent 4db2d59 commit e3bb2b1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

<!-- insertion marker -->

## [0.10.2] 2025-01-14

<small>[Compare with latest](https://github.com/CartoDB/raster-loader/compare/v0.10.1...HEAD)</small>

### Fixed

- Fix: OverflowError error when casting approx sum to integer ([46cab53](https://github.com/CartoDB/raster-loader/commit/46cab53bbf71a86a7df784922956eb03f9dbb327) by Roberto Antolín).
- Fix: Compute approximate most common negative values ([f9f5ff5](https://github.com/CartoDB/raster-loader/commit/f9f5ff5010b1aea0d13afbea6d1869d4094fa7d7) by Roberto Antolín).

## [0.10.1] 2025-01-13

<small>[Compare with latest](https://github.com/CartoDB/raster-loader/compare/57d55999704fb003da2947db65d5617e27c5c104...HEAD)</small>
Expand Down
33 changes: 28 additions & 5 deletions raster_loader/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,12 @@ def not_enough_samples():
)
if not raster_is_masked:
for band in bands:
not_masked_samples[band].append(sample[band - 1])
band_sample = sample[band - 1]
is_valid_sample = not (
np.isinf(band_sample) or np.isnan(band_sample)
)
if is_valid_sample:
not_masked_samples[band].append(band_sample)

iterations += 1

Expand All @@ -474,10 +479,22 @@ def not_enough_samples():

def most_common_approx(samples: List[Union[int, float]]) -> Dict[int, int]:
"""Compute the most common values in a list of int samples."""
counts = np.bincount(samples)
print("Computing most common values...")

samples_array = np.array(samples)
min_val = int(np.floor(samples_array.min()))
max_val = int(np.ceil(samples_array.max()))

# +2 allows to include max_val in the last bin
bins = np.arange(min_val, max_val + 2)

counts, bin_edges = np.histogram(samples_array, bins=bins)

nth = min(DEFAULT_MAX_MOST_COMMON, len(counts))
counts = np.bincount(samples)
idx = np.argpartition(counts, -nth)[-nth:]
return dict([(int(i), int(counts[i])) for i in idx if counts[i] > 0])

return {int(bin_edges[i]): int(counts[i]) for i in idx if counts[i] > 0}


def compute_quantiles(data: List[Union[int, float]], cast_function: Callable) -> dict:
Expand Down Expand Up @@ -518,8 +535,14 @@ def raster_band_approx_stats(
_sum = 0
sum_squares = 0
if count > 0:
_sum = int(np.sum(samples_band))
sum_squares = int(np.sum(np.array(samples_band) ** 2))
try:
_sum = int(np.sum(samples_band))
except (OverflowError, ValueError):
_sum = 0
try:
sum_squares = int(np.sum(np.array(samples_band) ** 2))
except (OverflowError, ValueError):
sum_squares = 0

if basic_stats:
quantiles = None
Expand Down

0 comments on commit e3bb2b1

Please sign in to comment.