From cd67c05b5a698311598225708b9140ee5c17f129 Mon Sep 17 00:00:00 2001 From: Jorge Tarrero Date: Wed, 13 Mar 2024 15:20:22 +0100 Subject: [PATCH] Add labels to BQ uploaded tables (#131) --- raster_loader/io/bigquery.py | 15 +++++++++++++++ raster_loader/tests/bigquery/test_io.py | 22 ++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/raster_loader/io/bigquery.py b/raster_loader/io/bigquery.py index a938cd5..37bf8f8 100644 --- a/raster_loader/io/bigquery.py +++ b/raster_loader/io/bigquery.py @@ -2,7 +2,9 @@ import json import pandas as pd import rasterio +import re +from raster_loader import __version__ from raster_loader.errors import import_error_bigquery, IncompatibleRasterException from raster_loader.utils import ask_yes_no_question, batched from raster_loader.io.common import ( @@ -161,6 +163,9 @@ def done_callback(job): print("Writing metadata to BigQuery...") self.write_metadata(metadata, append_records, fqn) + print("Updating labels...") + self.update_labels(fqn, self.get_labels(__version__)) + except IncompatibleRasterException as e: raise IOError("Error uploading to BigQuery: {}".format(e.message)) @@ -224,6 +229,16 @@ def get_metadata(self, fqn): return json.loads(rows[0]["metadata"]) + def get_labels(self, version: str): + return { + "raster_loader": re.sub(r"[^a-z0-9_-]", "_", version.lower()), + } + + def update_labels(self, fqn, labels): + table = self.client.get_table(fqn) + table.labels = labels + table = self.client.update_table(table, ["labels"]) + def write_metadata( self, metadata, diff --git a/raster_loader/tests/bigquery/test_io.py b/raster_loader/tests/bigquery/test_io.py index 351e322..daaf89c 100644 --- a/raster_loader/tests/bigquery/test_io.py +++ b/raster_loader/tests/bigquery/test_io.py @@ -107,6 +107,9 @@ def test_rasterio_to_bigquery_with_raster_default_band_name(): list(expected_dataframe.band_1), key=lambda x: x if x is not None else b"" ) + table = connector.client.get_table(fqn) + assert table.labels.get("raster_loader") is not None + @pytest.mark.integration_test def test_rasterio_to_bigquery_appending_rows(): @@ -364,6 +367,7 @@ def test_rasterio_to_table_wrong_band_name_block(*args, **kwargs): "raster_loader.io.bigquery.BigQueryConnection.check_if_table_exists", return_value=False, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) @patch("raster_loader.io.bigquery.ask_yes_no_question", return_value=False) def test_rasterio_to_table(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" @@ -389,6 +393,7 @@ def test_rasterio_to_table(*args, **kwargs): @patch("raster_loader.io.common.rasterio_metadata", return_value={}) @patch("raster_loader.io.common.get_number_of_blocks", return_value=1) @patch("raster_loader.io.bigquery.BigQueryConnection.write_metadata", return_value=None) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_overwrite(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -424,6 +429,7 @@ def test_rasterio_to_table_overwrite(*args, **kwargs): "num_pixels": 1, }, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_is_not_empty_append(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -498,6 +504,7 @@ def test_rasterio_to_table_keyboard_interrupt(*args, **kwargs): "raster_loader.io.bigquery.BigQueryConnection.check_if_table_exists", return_value=False, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_with_chunk_size(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -515,6 +522,7 @@ def test_rasterio_to_table_with_chunk_size(*args, **kwargs): "raster_loader.io.bigquery.BigQueryConnection.check_if_table_exists", return_value=False, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_with_one_chunk_size(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -567,6 +575,7 @@ def test_rasterio_to_table_invalid_raster(*args, **kwargs): "num_pixels": 1, }, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_bigquery_valid_raster(*args, **kwargs): table_name = "test_mosaic_valid_raster".upper() connector = mocks.MockBigQueryConnection() @@ -599,3 +608,16 @@ def test_append_with_different_resolution(*args, **kwargs): os.path.join(fixtures_dir, "mosaic_cog.tif"), f"{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{table_name}", ) + + +def test_get_labels(*args, **kwargs): + connector = mocks.MockBigQueryConnection() + + cases = { + "": {"raster_loader": ""}, + "0.1.0": {"raster_loader": "0_1_0"}, + "0.1.0 something": {"raster_loader": "0_1_0_something"}, + "0.1.0+17$g1d1f3a3H": {"raster_loader": "0_1_0_17_g1d1f3a3h"}, + } + for version, expected_labels in cases.items(): + assert connector.get_labels(version) == expected_labels