diff --git a/CHANGELOG.md b/CHANGELOG.md
index e148d2f3..b7091244 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,15 @@ Formatted as described on [https://keepachangelog.com](https://keepachangelog.co
 - `ewatercycle.esmvaltool.search.search_esgf` can now be used to find climate model ensembles on ESGF that have the required input variables for generating forcing data ([#422](https://github.com/eWaterCycle/ewatercycle/pull/422)).
 - `ewatercycle.observation.caravan.get_caravan_data()` ([#432](https://github.com/eWaterCycle/ewatercycle/issues/432))
 
+### Fixed
+
+- `get_usgs_data()` throws error ([#414](https://github.com/eWaterCycle/ewatercycle/issues/414))
+- `get_usgs_data()` and 1get_grdc_data()` both return xarray.Dataset ([#253](https://github.com/eWaterCycle/ewatercycle/issues/253))
+
+### Removed
+
+- Caching mechanism from `get_usgs_data()` ([#240](https://github.com/eWaterCycle/ewatercycle/issues/240))
+
 ## [2.1.1] (2024-06-03)
 
 ### Added
diff --git a/README.md b/README.md
index df3ed8f5..d286c86a 100644
--- a/README.md
+++ b/README.md
@@ -93,12 +93,12 @@ cfg_file, cfg_dir = model.setup(
 
 model.initialize(cfg_file)
 
-observations_df, station_info = ewatercycle.observation.grdc.get_grdc_data(
+observations_df = ewatercycle.observation.grdc.get_grdc_data(
     station_id=4147380,
     start_time=model.start_time_as_isostr,
     end_time=model.end_time_as_isostr,
     column='observation',
-)
+).observation.to_dataframe()
 
 simulated_discharge = []
 timestamps = []
diff --git a/docs/observations.rst b/docs/observations.rst
index 430fb555..6177d315 100644
--- a/docs/observations.rst
+++ b/docs/observations.rst
@@ -6,7 +6,7 @@ The eWaterCycle platform supports observations relevant for calibrating and vali
 USGS
 ----
 
-The `U.S. Geological Survey Water Services <https://waterservices.usgs.gov/>`_ provides public discharge data for a large number of US based stations. In eWaterCycle we make use of the `USGS web service <https://waterservices.usgs.gov/test-tools/?service=iv>`_ to automatically retrieve this data.
+The `U.S. Geological Survey Water Services <https://waterservices.usgs.gov/>`_ provides public discharge data for a large number of US based stations. In eWaterCycle (:py:func:`ewatercycle.observation.usgs.get_usgs_data`) we make use of the `USGS web service <https://waterservices.usgs.gov/test-tools/?service=iv>`_ to automatically retrieve this data.
 The Discharge timestamp is corrected to the UTC timezone. Units are converted from cubic feet per second to cubic meter per second.
 
 GRDC
diff --git a/docs/user_guide/03_models_obs_analysis.ipynb b/docs/user_guide/03_models_obs_analysis.ipynb
index d0a5472d..38ac64e6 100644
--- a/docs/user_guide/03_models_obs_analysis.ipynb
+++ b/docs/user_guide/03_models_obs_analysis.ipynb
@@ -605,14 +605,14 @@
    "source": [
     "grdc_station_id = \"6335020\"\n",
     "\n",
-    "observations, metadata = ewatercycle.observation.grdc.get_grdc_data(\n",
+    "observations = ewatercycle.observation.grdc.get_grdc_data(\n",
     "    station_id=grdc_station_id,\n",
     "    start_time=\"1990-01-01T00:00:00Z\",  # or: model_instance.start_time_as_isostr\n",
     "    end_time=\"1990-12-15T00:00:00Z\",\n",
     "    column=\"GRDC\",\n",
     ")\n",
     "\n",
-    "observations.head()"
+    "observations.GRDC.to_dataframe().head()"
    ]
   },
   {
@@ -639,7 +639,7 @@
     }
    ],
    "source": [
-    "print(metadata)"
+    "print(observations.attrs)"
    ]
   },
   {
diff --git a/src/ewatercycle/observation/grdc.py b/src/ewatercycle/observation/grdc.py
index 534a19e0..a649f923 100644
--- a/src/ewatercycle/observation/grdc.py
+++ b/src/ewatercycle/observation/grdc.py
@@ -1,9 +1,12 @@
 """Global Runoff Data Centre module."""
+
 import logging
 import os
-from typing import Dict, Optional, Tuple, Union
+from typing import Dict, Optional, Union
 
 import pandas as pd
+import xarray as xr
+from numpy import nan
 
 from ewatercycle import CFG
 from ewatercycle.util import get_time, to_absolute_path
@@ -17,15 +20,18 @@ def get_grdc_data(
     station_id: str,
     start_time: str,
     end_time: str,
-    parameter: str = "Q",
     data_home: Optional[str] = None,
     column: str = "streamflow",
-) -> Tuple[pd.core.frame.DataFrame, MetaDataType]:
+) -> xr.Dataset:
     """Get river discharge data from Global Runoff Data Centre (GRDC).
 
     Requires the GRDC daily data files in a local directory. The GRDC daily data
-    files can be ordered at
-    https://www.bafg.de/GRDC/EN/02_srvcs/21_tmsrs/riverdischarge_node.html
+    NetCDF file can be downloaded at
+    https://www.bafg.de/GRDC/EN/02_srvcs/21_tmsrs/riverdischarge_node.html .
+    The downloaded zip file contains a file named GRDC-Daily.nc.
+
+    This function will first try to read data from the GRDC-Daily.nc file in the ``data_home`` directory.
+    If that fails it will look for the GRDC Export (ASCII text) formatted file for example ``6435060_Q_Day.Cmd.txt``.
 
     Args:
         station_id: The station id to get. The station id can be found in the
@@ -35,55 +41,50 @@ def get_grdc_data(
             'YYYY-MM-DDTHH:MM:SSZ'.
         end_time: End time of model in  UTC and ISO format string e.g.
             'YYYY-MM-DDTHH:MM:SSZ'.
-        parameter: optional. The parameter code to get, e.g. ('Q') discharge,
-            cubic meters per second.
         data_home : optional. The directory where the daily grdc data is
             located. If left out will use the grdc_location in the eWaterCycle
             configuration file.
         column: optional. Name of column in dataframe. Default: "streamflow".
 
     Returns:
-        grdc data in a dataframe and metadata.
+        grdc data in a xarray dataset. Shaped like a filtered version of the GRDC daily NetCDF file.
+
+    Raises:
+        ValueError: If no data for the requested station id and period could not be found.
 
     Examples:
+
         .. code-block:: python
 
             from ewatercycle.observation.grdc import get_grdc_data
 
-            df, meta = get_grdc_data('6335020',
-                                    '2000-01-01T00:00Z',
-                                    '2001-01-01T00:00Z')
-            df.describe()
-                     streamflow
-            count   4382.000000
-            mean    2328.992469
-            std	    1190.181058
-            min	     881.000000
-            25%	    1550.000000
-            50%	    2000.000000
-            75%	    2730.000000
-            max	   11300.000000
-
-            meta
-            {'grdc_file_name': '/home/myusername/git/eWaterCycle/ewatercycle/6335020_Q_Day.Cmd.txt',
-            'id_from_grdc': 6335020,
-            'file_generation_date': '2019-03-27',
-            'river_name': 'RHINE RIVER',
-            'station_name': 'REES',
-            'country_code': 'DE',
-            'grdc_latitude_in_arc_degree': 51.756918,
-            'grdc_longitude_in_arc_degree': 6.395395,
-            'grdc_catchment_area_in_km2': 159300.0,
-            'altitude_masl': 8.0,
-            'dataSetContent': 'MEAN DAILY DISCHARGE (Q)',
-            'units': 'm³/s',
-            'time_series': '1814-11 - 2016-12',
-            'no_of_years': 203,
-            'last_update': '2018-05-24',
-            'nrMeasurements': 'NA',
-            'UserStartTime': '2000-01-01T00:00Z',
-            'UserEndTime': '2001-01-01T00:00Z',
-            'nrMissingData': 0}
+            ds = get_grdc_data('6435060',
+                               '2000-01-01T00:00Z',
+                               '2001-01-01T00:00Z')
+            ds
+            <xarray.Dataset> Size: 5kB
+            Dimensions:              (time: 367)
+            Coordinates:
+            * time                 (time) datetime64[ns] 3kB 2000-01-01 ... 2001-01-01
+                id                   int32 4B 6435060
+            Data variables:
+                streamflow           (time) float32 1kB ...
+                area                 float32 4B ...
+                country              <U2 8B ...
+                geo_x                float32 4B ...
+                geo_y                float32 4B ...
+                geo_z                float32 4B ...
+                owneroforiginaldata  <U38 152B ...
+                river_name           <U11 44B 'RHINE RIVER'
+                station_name         <U6 24B 'LOBITH'
+                timezone             float32 4B ...
+            Attributes:
+                title:          Mean daily discharge (Q)
+                Conventions:    CF-1.7
+                references:     grdc.bafg.de
+                institution:    GRDC
+                history:        Download from GRDC Database, 21/06/2024
+                missing_value:  -999.000
     """  # noqa: E501
     if data_home:
         data_path = to_absolute_path(data_home)
@@ -98,12 +99,30 @@ def get_grdc_data(
     if not data_path.exists():
         raise ValueError(f"The grdc directory {data_path} does not exist!")
 
-    # Read the raw data
-    raw_file = data_path / f"{station_id}_{parameter}_Day.Cmd.txt"
+    # Read the NetCDF file
+    nc_file = data_path / "GRDC-Daily.nc"
+    station_not_in_nc = False
+    if nc_file.exists():
+        ds = xr.open_dataset(nc_file)
+        if int(station_id) in ds["id"]:
+            ds = ds.sel(
+                id=int(station_id),
+                time=slice(get_time(start_time).date(), get_time(end_time).date()),
+            )
+            return ds.rename({"runoff_mean": column})
+        station_not_in_nc = True
+
+    # Read the text data
+    raw_file = data_path / f"{station_id}_Q_Day.Cmd.txt"
     if not raw_file.exists():
-        raise ValueError(f"The grdc file {raw_file} does not exist!")
+        if nc_file.exists() and station_not_in_nc:
+            raise ValueError(
+                f"The grdc station {station_id} is not in the {nc_file} file and {raw_file} does not exist!"  # noqa: E501
+            )
+        else:
+            raise ValueError(f"The grdc file {raw_file} does not exist!")
 
-    # Convert the raw data to an xarray
+    # Convert the raw data to an dataframe
     metadata, df = _grdc_read(
         raw_file,
         start=get_time(start_time).date(),
@@ -111,17 +130,102 @@ def get_grdc_data(
         column=column,
     )
 
-    # Add start/end_time to metadata
-    metadata["UserStartTime"] = start_time
-    metadata["UserEndTime"] = end_time
-
-    # Add number of missing data to metadata
-    metadata["nrMissingData"] = _count_missing_data(df, column)
-
-    # Shpw info about data
-    _log_metadata(metadata)
+    ds = xr.Dataset.from_dict(
+        {
+            "coords": {
+                "time": {
+                    "dims": ("time",),
+                    "attrs": {"long_name": "time"},
+                    "data": df.index.values,
+                },
+                "id": {
+                    "dims": (),
+                    "attrs": {"long_name": "grdc number"},
+                    "data": int(station_id),
+                },
+            },
+            "dims": {
+                "time": len(df.index),
+            },
+            "attrs": {
+                "title": metadata["dataSetContent"],
+                "Conventions": "CF-1.7",
+                "references": "grdc.bafg.de",
+                "institution": "GRDC",
+                "history": f"Converted from {raw_file.name} of {metadata['file_generation_date']} to netcdf by eWaterCycle Python package",
+                "missing_value": "-999.000",
+            },
+            "data_vars": {
+                column: {
+                    "dims": ("time",),
+                    "attrs": {"units": "m3/s", "long_name": "Mean daily discharge (Q)"},
+                    "data": df[column].values,
+                },
+                "area": {
+                    "dims": (),
+                    "attrs": {"units": "km2", "long_name": "catchment area"},
+                    "data": metadata["grdc_catchment_area_in_km2"],
+                },
+                "country": {
+                    "dims": (),
+                    "attrs": {
+                        "long_name": "country name",
+                        "iso2": "ISO 3166-1 alpha-2 - two-letter country code",
+                    },
+                    "data": metadata["country_code"],
+                },
+                "geo_x": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "degree_east",
+                        "long_name": "station longitude (WGS84)",
+                    },
+                    "data": metadata["grdc_longitude_in_arc_degree"],
+                },
+                "geo_y": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "degree_north",
+                        "long_name": "station latitude (WGS84)",
+                    },
+                    "data": metadata["grdc_latitude_in_arc_degree"],
+                },
+                "geo_z": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "m",
+                        "long_name": "station altitude (m above sea level)",
+                    },
+                    "data": metadata["altitude_masl"],
+                },
+                "owneroforiginaldata": {
+                    "dims": (),
+                    "attrs": {"long_name": "Owner of original data"},
+                    "data": metadata["Owner of original data"],
+                },
+                "river_name": {
+                    "dims": (),
+                    "attrs": {"long_name": "river name"},
+                    "data": metadata["river_name"],
+                },
+                "station_name": {
+                    "dims": (),
+                    "attrs": {"long_name": "station name"},
+                    "data": metadata["station_name"],
+                },
+                "timezone": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "00:00",
+                        "long_name": "utc offset, in relation to the national capital",
+                    },
+                    "data": nan,
+                },
+            },
+        }
+    )
 
-    return df, metadata
+    return ds
 
 
 def _grdc_read(grdc_station_path, start, end, column):
@@ -166,7 +270,6 @@ def _grdc_metadata_reader(grdc_station_path, all_lines):
     # that function was based on earlier work by Edwin Sutanudjaja
     # from Utrecht University.
     # https://github.com/edwinkost/discharge_analysis_IWMI
-    # Modified by Susan Branchett
 
     # initiating a dictionary that will contain all GRDC attributes:
     attribute_grdc = {}
@@ -257,48 +360,10 @@ def _grdc_metadata_reader(grdc_station_path, all_lines):
             attribute_grdc["units"] = "NA"
 
         try:
-            attribute_grdc["time_series"] = str(all_lines[23].split(":")[1].strip())
-        except (IndexError, ValueError):
-            attribute_grdc["time_series"] = "NA"
-
-        try:
-            attribute_grdc["no_of_years"] = int(all_lines[24].split(":")[1].strip())
-        except (IndexError, ValueError):
-            attribute_grdc["no_of_years"] = "NA"
-
-        try:
-            attribute_grdc["last_update"] = str(all_lines[25].split(":")[1].strip())
-        except (IndexError, ValueError):
-            attribute_grdc["last_update"] = "NA"
-
-        try:
-            attribute_grdc["nrMeasurements"] = int(
-                str(all_lines[33].split(":")[1].strip())
+            attribute_grdc["Owner of original data"] = (
+                all_lines[18].split(":")[1].strip()
             )
         except (IndexError, ValueError):
-            attribute_grdc["nrMeasurements"] = "NA"
+            attribute_grdc["Owner of original data"] = "Unknown"
 
     return attribute_grdc
-
-
-def _count_missing_data(df, column):
-    """Return number of missing data."""
-    return df[column].isna().sum()
-
-
-def _log_metadata(metadata):
-    """Print some information about data."""
-    coords = (
-        metadata["grdc_latitude_in_arc_degree"],
-        metadata["grdc_longitude_in_arc_degree"],
-    )
-    message = (
-        f"GRDC station {metadata['id_from_grdc']} is selected. "
-        f"The river name is: {metadata['river_name']}."
-        f"The coordinates are: {coords}."
-        f"The catchment area in km2 is: {metadata['grdc_catchment_area_in_km2']}. "
-        f"There are {metadata['nrMissingData']} missing values during "
-        f"{metadata['UserStartTime']}_{metadata['UserEndTime']} at this station. "
-        f"See the metadata for more information."
-    )
-    logger.info("%s", message)
diff --git a/src/ewatercycle/observation/usgs.py b/src/ewatercycle/observation/usgs.py
index 7904c594..38776719 100644
--- a/src/ewatercycle/observation/usgs.py
+++ b/src/ewatercycle/observation/usgs.py
@@ -1,128 +1,101 @@
-import os
-from datetime import datetime
-
+"""Module to retrieve river discharge data from the USGS REST web service."""
 import numpy as np
+import pandas as pd
 import xarray as xr
 from pyoos.collectors.usgs.usgs_rest import UsgsRest
 from pyoos.parsers.waterml import WaterML11ToPaegan
 
+from ewatercycle.util import get_time
+
+
+def _xml_to_xarray(waterml_data: str) -> xr.Dataset:
+    # Convert the raw data to an xarray
+    data = WaterML11ToPaegan(waterml_data).feature
+
+    # We expect only 1 station
+    if len(data.elements) == 0:
+        raise ValueError("Data does not contain any station data")
+
+    station = data.elements[0]
+
+    # Unit conversion from cubic feet per second to cubic meter per second
+    values = np.array(
+        [float(point.members[0]["value"]) / 35.315 for point in station.elements],
+        dtype=np.float32,
+    )
+    # Convert the time to a numpy array of datetime64 without timezone
+    times = pd.to_datetime([point.time for point in station.elements]).to_numpy(
+        dtype="datetime64[ns]"
+    )
+    attrs = {"units": "m3/s"}
+
+    # Create the xarray dataset
+    ds = xr.Dataset({"streamflow": (["time"], values, attrs)}, coords={"time": times})
+
+    # Set some nice attributes
+    ds.attrs["title"] = "USGS Data from streamflow data"
+    ds.attrs["station"] = station.name
+    ds.attrs["stationid"] = station.get_uid()
+    ds.attrs["location"] = (station.location.y, station.location.x)
+
+    return ds
+
+
+def _download_usgs_data(
+    station_id: str,
+    start_time: str,
+    end_time: str,
+):
+    discharge_parameter = "00060"
+    collector = UsgsRest()
+    collector.filter(
+        start=get_time(start_time),
+        end=get_time(end_time),
+        variables=[discharge_parameter],
+        features=[station_id],
+    )
+    return collector.raw()
+
 
-def get_usgs_data(station_id, start_date, end_date, parameter="00060", cache_dir=None):
+def get_usgs_data(
+    station_id: str,
+    start_time: str,
+    end_time: str,
+) -> xr.Dataset:
     """Get river discharge data from the USGS REST web service.
 
     See `U.S. Geological Survey Water Services
     <https://waterservices.usgs.gov/>`_ (USGS)
 
-    Parameters
-    ----------
-    station_id : str
-        The station id to get
-    start_date : str
-        String for start date in the format: 'YYYY-MM-dd', e.g. '1980-01-01'
-    end_date : str
-        String for start date in the format: 'YYYY-MM-dd', e.g. '2018-12-31'
-    parameter : str
-        The parameter code to get, e.g. ('00060') discharge, cubic feet per second
-    cache_dir : str
-        Directory where files retrieved from the web service are cached.
-        If set to None then USGS_DATA_HOME env var will be used as cache directory.
-
-    Examples
-    --------
-    >>> from ewatercycle.observation.usgs import get_usgs_data
-    >>> data = get_usgs_data('03109500', '2000-01-01', '2000-12-31', cache_dir='.')
-    >>> data
-        <xarray.Dataset>
+    Args:
+        station_id: The station id to get
+        start_time: Start time of model in UTC and ISO format string e.g.
+            'YYYY-MM-DDTHH:MM:SSZ'.
+        end_time: End time of model in  UTC and ISO format string e.g.
+            'YYYY-MM-DDTHH:MM:SSZ'.
+
+    Returns:
+        Xarray dataset with the streamflow data
+        with unit and other metadata in the variable and global attributes.
+
+    Examples:
+
+        To get observations from the Little Beaver Creek.
+
+        >>> from ewatercycle.observation.usgs import get_usgs_data
+        >>> data = get_usgs_data('03109500', '2000-01-01T00:00:00Z', '2000-12-31T00:00:00Z')
+        >>> data
+        <xarray.Dataset> Size: 96kB
         Dimensions:     (time: 8032)
         Coordinates:
-          * time        (time) datetime64[ns] 2000-01-04T05:00:00 ... 2000-12-23T04:00:00
+          * time        (time) datetime64[ns] 64kB 2000-01-04T05:00:00 ... 2000-12-23...
         Data variables:
-            Streamflow  (time) float32 8.296758 10.420501 ... 10.647034 11.694747
+            streamflow  (time) float32 32kB 8.297 10.42 17.58 ... 8.552 10.65 11.69
         Attributes:
             title:      USGS Data from streamflow data
             station:    Little Beaver Creek near East Liverpool OH
             stationid:  03109500
-            location:   (40.6758974, -80.5406244)
+            location:   (np.float64(40.6758974), np.float64(-80.5406244))
     """  # noqa: E501
-    if cache_dir is None:
-        cache_dir = os.environ["USGS_DATA_HOME"]
-
-    # Check if we have the netcdf data
-    netcdf = os.path.join(
-        cache_dir,
-        "USGS_"
-        + station_id
-        + "_"
-        + parameter
-        + "_"
-        + start_date
-        + "_"
-        + end_date
-        + ".nc",
-    )
-    if os.path.exists(netcdf):
-        return xr.open_dataset(netcdf)
-
-    # Download the data if needed
-    out = os.path.join(
-        cache_dir,
-        "USGS_"
-        + station_id
-        + "_"
-        + parameter
-        + "_"
-        + start_date
-        + "_"
-        + end_date
-        + ".wml",
-    )
-    if not os.path.exists(out):
-        collector = UsgsRest()
-        collector.filter(
-            start=datetime.strptime(start_date, "%Y-%m-%d"),
-            end=datetime.strptime(end_date, "%Y-%m-%d"),
-            variables=[parameter],
-            features=[station_id],
-        )
-        data = collector.raw()
-        with open(out, "w") as file:
-            file.write(data)
-        collector.clear()
-    else:
-        with open(out, "r") as file:
-            data = file.read()
-
-    # Convert the raw data to an xarray
-    data = WaterML11ToPaegan(data).feature
-
-    # We expect only 1 station
-    if len(data.elements) == 0:
-        raise ValueError("Data does not contain any station data")
-    else:
-        station = data.elements[0]
-
-        # Unit conversion from cubic feet to cubic meter per second
-        values = np.array(
-            [float(point.members[0]["value"]) / 35.315 for point in station.elements],
-            dtype=np.float32,
-        )
-        times = [point.time for point in station.elements]
-
-        attrs = {
-            "units": "cubic meters per second",
-        }
-
-        # Create the xarray dataset
-        ds = xr.Dataset(
-            {"streamflow": (["time"], values, attrs)}, coords={"time": times}
-        )
-
-        # Set some nice attributes
-        ds.attrs["title"] = "USGS Data from streamflow data"
-        ds.attrs["station"] = station.name
-        ds.attrs["stationid"] = station.get_uid()
-        ds.attrs["location"] = (station.location.y, station.location.x)
-
-        ds.to_netcdf(netcdf)
-
-        return ds
+    wml_data = _download_usgs_data(station_id, start_time, end_time)
+    return _xml_to_xarray(wml_data)
diff --git a/tests/src/observation/test_grdc.py b/tests/src/observation/test_grdc.py
index 1f2e5488..7922e2c9 100644
--- a/tests/src/observation/test_grdc.py
+++ b/tests/src/observation/test_grdc.py
@@ -1,9 +1,10 @@
 from datetime import datetime
+from pathlib import Path
 
 import numpy as np
-import pandas as pd
 import pytest
-from pandas.testing import assert_frame_equal
+import xarray as xr
+from xarray.testing import assert_allclose
 
 from ewatercycle import CFG
 from ewatercycle.observation.grdc import get_grdc_data
@@ -58,55 +59,123 @@ def sample_grdc_file(tmp_path):
 
 
 @pytest.fixture
-def expected_results(tmp_path, sample_grdc_file):
-    data = pd.DataFrame(
-        {"streamflow": [123.0, 456.0, np.nan]},
-        index=[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
+def expected_results():
+    return xr.Dataset.from_dict(
+        {
+            "coords": {
+                "time": {
+                    "dims": ("time",),
+                    "attrs": {"long_name": "time"},
+                    "data": [
+                        datetime(2000, 1, 1, 0, 0),
+                        datetime(2000, 1, 2, 0, 0),
+                        datetime(2000, 1, 3, 0, 0),
+                    ],
+                },
+                "id": {
+                    "dims": (),
+                    "attrs": {"long_name": "grdc number"},
+                    "data": 42424242,
+                },
+            },
+            "attrs": {
+                "title": "MEAN DAILY DISCHARGE (Q)",
+                "Conventions": "CF-1.7",
+                "references": "grdc.bafg.de",
+                "institution": "GRDC",
+                "history": "Converted from 42424242_Q_Day.Cmd.txt of 2000-02-02 to netcdf by eWaterCycle Python package",
+                "missing_value": "-999.000",
+            },
+            "dims": {"time": 3},
+            "data_vars": {
+                "streamflow": {
+                    "dims": ("time",),
+                    "attrs": {"units": "m3/s", "long_name": "Mean daily discharge (Q)"},
+                    "data": [123.0, 456.0, np.nan],
+                },
+                "area": {
+                    "dims": (),
+                    "attrs": {"units": "km2", "long_name": "catchment area"},
+                    "data": 4242.0,
+                },
+                "country": {
+                    "dims": (),
+                    "attrs": {
+                        "long_name": "country name",
+                        "iso2": "ISO 3166-1 alpha-2 - two-letter country code",
+                    },
+                    "data": "NA",
+                },
+                "geo_x": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "degree_east",
+                        "long_name": "station longitude (WGS84)",
+                    },
+                    "data": 4.955153,
+                },
+                "geo_y": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "degree_north",
+                        "long_name": "station latitude (WGS84)",
+                    },
+                    "data": 52.356154,
+                },
+                "geo_z": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "m",
+                        "long_name": "station altitude (m above sea level)",
+                    },
+                    "data": 8.0,
+                },
+                "owneroforiginaldata": {
+                    "dims": (),
+                    "attrs": {"long_name": "Owner of original data"},
+                    "data": "Unknown",
+                },
+                "river_name": {
+                    "dims": (),
+                    "attrs": {"long_name": "river name"},
+                    "data": "SOME RIVER",
+                },
+                "station_name": {
+                    "dims": (),
+                    "attrs": {"long_name": "station name"},
+                    "data": "SOME",
+                },
+                "timezone": {
+                    "dims": (),
+                    "attrs": {
+                        "units": "00:00",
+                        "long_name": "utc offset, in relation to the national capital",
+                    },
+                    "data": np.nan,
+                },
+            },
+        }
     )
-    data.index.rename("time", inplace=True)
-    metadata = {
-        "altitude_masl": 8.0,
-        "country_code": "NA",
-        "dataSetContent": "MEAN DAILY DISCHARGE (Q)",
-        "file_generation_date": "2000-02-02",
-        "grdc_catchment_area_in_km2": 4242.0,
-        "grdc_file_name": str(tmp_path / sample_grdc_file),
-        "grdc_latitude_in_arc_degree": 52.356154,
-        "grdc_longitude_in_arc_degree": 4.955153,
-        "id_from_grdc": 42424242,
-        "last_update": "2000-02-01",
-        "no_of_years": 1,
-        "nrMeasurements": 3,
-        "river_name": "SOME RIVER",
-        "station_name": "SOME",
-        "time_series": "2000-01 - 2000-01",
-        "units": "m³/s",
-        "UserEndTime": "2000-02-01T00:00Z",
-        "UserStartTime": "2000-01-01T00:00Z",
-        "nrMissingData": 1,
-    }
-    return data, metadata
-
-
-def test_get_grdc_data_with_datahome(tmp_path, expected_results):
-    expected_data, expected_metadata = expected_results
-    result_data, result_metadata = get_grdc_data(
+
+
+def test_get_grdc_data_with_datahome(
+    tmp_path, expected_results: xr.Dataset, sample_grdc_file
+):
+    result_data = get_grdc_data(
         "42424242", "2000-01-01T00:00Z", "2000-02-01T00:00Z", data_home=str(tmp_path)
     )
 
-    assert_frame_equal(result_data, expected_data)
-    assert result_metadata == expected_metadata
+    print(result_data.to_dict())
+    assert_allclose(result_data, expected_results)
 
 
-def test_get_grdc_data_with_cfg(expected_results, tmp_path):
+def test_get_grdc_data_with_cfg(
+    expected_results: xr.Dataset, tmp_path, sample_grdc_file
+):
     CFG.grdc_location = tmp_path
-    expected_data, expected_metadata = expected_results
-    result_data, result_metadata = get_grdc_data(
-        "42424242", "2000-01-01T00:00Z", "2000-02-01T00:00Z"
-    )
+    result_data = get_grdc_data("42424242", "2000-01-01T00:00Z", "2000-02-01T00:00Z")
 
-    assert_frame_equal(result_data, expected_data)
-    assert result_metadata == expected_metadata
+    assert_allclose(result_data, expected_results)
 
 
 def test_get_grdc_data_without_file(tmp_path):
@@ -119,13 +188,137 @@ def test_get_grdc_data_without_file(tmp_path):
         )
 
 
-def test_get_grdc_dat_custom_column_name(expected_results, tmp_path):
-    CFG.grdc_location = str(tmp_path)
-    result_data, result_metadata = get_grdc_data(
+def test_get_grdc_data_custom_column_name(
+    expected_results: xr.Dataset, tmp_path: Path, sample_grdc_file
+):
+    CFG.grdc_location = tmp_path
+    result_data = get_grdc_data(
         "42424242", "2000-01-01T00:00Z", "2000-02-01T00:00Z", column="observation"
     )
 
-    expected_default_data, expected_metadata = expected_results
-    expected_data = expected_default_data.rename(columns={"streamflow": "observation"})
-    assert_frame_equal(result_data, expected_data)
-    assert result_metadata == expected_metadata
+    expected_data = expected_results.rename({"streamflow": "observation"})
+
+    assert_allclose(result_data, expected_data)
+
+
+@pytest.fixture
+def sample_nc_file(tmp_path):
+    fn = tmp_path / "GRDC-Daily.nc"
+    ds = xr.Dataset.from_dict(
+        {
+            "coords": {
+                "time": {
+                    "dims": ("time",),
+                    "attrs": {"long_name": "time"},
+                    "data": [
+                        datetime(2000, 1, 1, 0, 0),
+                        datetime(2000, 1, 2, 0, 0),
+                        datetime(2000, 1, 3, 0, 0),
+                    ],
+                },
+                "id": {
+                    "dims": ("id",),
+                    "attrs": {"long_name": "grdc number"},
+                    "data": [42424242],
+                },
+            },
+            "attrs": {
+                "title": "MEAN DAILY DISCHARGE (Q)",
+                "Conventions": "CF-1.7",
+                "references": "grdc.bafg.de",
+                "institution": "GRDC",
+                "history": "Converted from 42424242_Q_Day.Cmd.txt of 2000-02-02 to netcdf by eWaterCycle Python package",
+                "missing_value": "-999.000",
+            },
+            "dims": {"time": 3, "id": 1},
+            "data_vars": {
+                "runoff_mean": {
+                    "dims": ("time", "id"),
+                    "attrs": {"units": "m3/s", "long_name": "Mean daily discharge (Q)"},
+                    "data": [[123.0], [456.0], [np.nan]],
+                },
+                "area": {
+                    "dims": ("id",),
+                    "attrs": {"units": "km2", "long_name": "catchment area"},
+                    "data": [4242.0],
+                },
+                "country": {
+                    "dims": ("id",),
+                    "attrs": {
+                        "long_name": "country name",
+                        "iso2": "ISO 3166-1 alpha-2 - two-letter country code",
+                    },
+                    "data": ["NA"],
+                },
+                "geo_x": {
+                    "dims": ("id",),
+                    "attrs": {
+                        "units": "degree_east",
+                        "long_name": "station longitude (WGS84)",
+                    },
+                    "data": [4.955153],
+                },
+                "geo_y": {
+                    "dims": ("id",),
+                    "attrs": {
+                        "units": "degree_north",
+                        "long_name": "station latitude (WGS84)",
+                    },
+                    "data": [52.356154],
+                },
+                "geo_z": {
+                    "dims": ("id",),
+                    "attrs": {
+                        "units": "m",
+                        "long_name": "station altitude (m above sea level)",
+                    },
+                    "data": [8.0],
+                },
+                "owneroforiginaldata": {
+                    "dims": ("id",),
+                    "attrs": {"long_name": "Owner of original data"},
+                    "data": ["Unknown"],
+                },
+                "river_name": {
+                    "dims": ("id",),
+                    "attrs": {"long_name": "river name"},
+                    "data": ["SOME RIVER"],
+                },
+                "station_name": {
+                    "dims": ("id",),
+                    "attrs": {"long_name": "station name"},
+                    "data": ["SOME"],
+                },
+                "timezone": {
+                    "dims": ("id",),
+                    "attrs": {
+                        "units": "00:00",
+                        "long_name": "utc offset, in relation to the national capital",
+                    },
+                    "data": [np.nan],
+                },
+            },
+        }
+    )
+    ds.to_netcdf(fn)
+    return str(tmp_path)
+
+
+def test_get_grdc_data_from_nc(sample_nc_file, expected_results: xr.Dataset):
+    result_data = get_grdc_data(
+        "42424242", "2000-01-01T00:00Z", "2000-02-01T00:00Z", data_home=sample_nc_file
+    )
+    assert_allclose(result_data, expected_results)
+
+
+def test_get_grdc_data_from_nc_missing_and_no_txtfile(tmp_path, sample_nc_file):
+    with pytest.raises(
+        ValueError,
+        match="The grdc station 42424243 is not in the .*/GRDC-Daily.nc file and .*/42424243_Q_Day.Cmd.txt does not exist!",
+    ):
+        get_grdc_data(
+            "42424243",
+            "2000-01-01T00:00Z",
+            "2000-02-01T00:00Z",
+            data_home=str(tmp_path),
+        )
diff --git a/tests/src/observation/test_usgs.py b/tests/src/observation/test_usgs.py
new file mode 100644
index 00000000..6f47b9a0
--- /dev/null
+++ b/tests/src/observation/test_usgs.py
@@ -0,0 +1,207 @@
+import datetime
+from textwrap import dedent
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from ewatercycle.observation.usgs import _xml_to_xarray
+
+
+@pytest.fixture
+def waterml_data():
+    """This was generated by running
+
+    ```python
+    from ewatercycle.observation.usgs import _download_usgs_data
+    print(_download_usgs_data("03109500", "2000-01-06T00:00:00", "2000-01-07T00:00:00"))
+    ```
+    """
+    return dedent(
+        """\
+            <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+            <ns1:timeSeriesResponse
+                xsi:schemaLocation="http://www.cuahsi.org/waterML/1.1/ http://waterservices.usgs.gov/WaterML-1.1.xsd"
+                xmlns:ns1="http://www.cuahsi.org/waterML/1.1/"
+                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                <ns1:queryInfo xmlns:ns2="http://www.cuahsi.org/waterML/1.1/">
+                    <ns2:queryURL>
+                        http://nwis.waterservices.usgs.gov/nwis/iv/startDT=2000-01-06T00%3A00&amp;endDT=2000-01-07T00%3A00&amp;parameterCd=00060&amp;sites=03109500&amp;format=waterml%2C1.1</ns2:queryURL>
+                    <ns2:criteria>
+                        <ns2:locationParam>[ALL:03109500]</ns2:locationParam>
+                        <ns2:variableParam>[00060]</ns2:variableParam>
+                        <ns2:timeParam>
+                            <ns2:beginDateTime>2000-01-06T00:00:00.000</ns2:beginDateTime>
+                            <ns2:endDateTime>2000-01-07T00:00:00.000</ns2:endDateTime>
+                        </ns2:timeParam>
+                    </ns2:criteria>
+                    <ns2:note title="filter:sites">[ALL:03109500]</ns2:note>
+                    <ns2:note title="filter:timeRange">[mode=RANGE, modifiedSince=null]
+                        interval={INTERVAL[2000-01-06T00:00:00.000Z/2000-01-07T00:00:00.000Z]}</ns2:note>
+                    <ns2:note title="filter:methodId">methodIds=[ALL]</ns2:note>
+                    <ns2:note title="requestDT">2024-07-05T08:06:53.782Z</ns2:note>
+                    <ns2:note title="requestId">8a673850-3aa5-11ef-9e48-4cd98f8df011</ns2:note>
+                    <ns2:note title="disclaimer">Provisional data are subject to revision. Go to
+                        http://waterdata.usgs.gov/nwis/help/?provisional for more information.</ns2:note>
+                    <ns2:note title="server">nadww01</ns2:note>
+                </ns1:queryInfo>
+                <ns1:timeSeries name="USGS:03109500:00060:00000" xmlns:ns1="http://www.cuahsi.org/waterML/1.1/">
+                    <ns1:sourceInfo xsi:type="ns1:SiteInfoType"
+                        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                        <ns1:siteName>Little Beaver Creek near East Liverpool OH</ns1:siteName>
+                        <ns1:siteCode network="NWIS" agencyCode="USGS">03109500</ns1:siteCode>
+                        <ns1:timeZoneInfo siteUsesDaylightSavingsTime="true">
+                            <ns1:defaultTimeZone zoneOffset="-05:00" zoneAbbreviation="EST" />
+                            <ns1:daylightSavingsTimeZone zoneOffset="-04:00" zoneAbbreviation="EDT" />
+                        </ns1:timeZoneInfo>
+                        <ns1:geoLocation>
+                            <ns1:geogLocation xsi:type="ns1:LatLonPointType" srs="EPSG:4326">
+                                <ns1:latitude>40.6758974</ns1:latitude>
+                                <ns1:longitude>-80.5406244</ns1:longitude>
+                            </ns1:geogLocation>
+                        </ns1:geoLocation>
+                        <ns1:siteProperty name="siteTypeCd">ST</ns1:siteProperty>
+                        <ns1:siteProperty name="hucCd">05030101</ns1:siteProperty>
+                        <ns1:siteProperty name="stateCd">39</ns1:siteProperty>
+                        <ns1:siteProperty name="countyCd">39029</ns1:siteProperty>
+                    </ns1:sourceInfo>
+                    <ns1:variable ns1:oid="45807197">
+                        <ns1:variableCode network="NWIS" vocabulary="NWIS:UnitValues" default="true"
+                            variableID="45807197">00060</ns1:variableCode>
+                        <ns1:variableName>Streamflow, ft&amp;#179;/s</ns1:variableName>
+                        <ns1:variableDescription>Discharge, cubic feet per second</ns1:variableDescription>
+                        <ns1:valueType>Derived Value</ns1:valueType>
+                        <ns1:unit>
+                            <ns1:unitCode>ft3/s</ns1:unitCode>
+                        </ns1:unit>
+                        <ns1:options>
+                            <ns1:option name="Statistic" optionCode="00000" />
+                        </ns1:options>
+                        <ns1:noDataValue>-999999.0</ns1:noDataValue>
+                    </ns1:variable>
+                    <ns1:values>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T00:00:00.000-05:00">1570</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T01:00:00.000-05:00">1510</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T02:00:00.000-05:00">1430</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T03:00:00.000-05:00">1370</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T04:00:00.000-05:00">1320</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T05:00:00.000-05:00">1260</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T06:00:00.000-05:00">1220</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T07:00:00.000-05:00">1180</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T08:00:00.000-05:00">1140</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T09:00:00.000-05:00">1110</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T10:00:00.000-05:00">1070</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T11:00:00.000-05:00">1040</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T12:00:00.000-05:00">1020</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T13:00:00.000-05:00">995</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T14:00:00.000-05:00">967</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T15:00:00.000-05:00">947</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T16:00:00.000-05:00">920</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T17:00:00.000-05:00">901</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T18:00:00.000-05:00">888</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T19:00:00.000-05:00">868</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T20:00:00.000-05:00">849</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T21:00:00.000-05:00">831</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T22:00:00.000-05:00">818</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-06T23:00:00.000-05:00">806</ns1:value>
+                        <ns1:value qualifiers="A [91]" dateTime="2000-01-07T00:00:00.000-05:00">788</ns1:value>
+                        <ns1:qualifier qualifierID="0" ns1:network="NWIS" ns1:vocabulary="uv_rmk_cd">
+                            <ns1:qualifierCode>[91]</ns1:qualifierCode>
+                            <ns1:qualifierDescription>Returned when there is no matching qualifier.</ns1:qualifierDescription>
+                        </ns1:qualifier>
+                        <ns1:qualifier qualifierID="1" ns1:network="NWIS" ns1:vocabulary="uv_rmk_cd">
+                            <ns1:qualifierCode>A</ns1:qualifierCode>
+                            <ns1:qualifierDescription>Approved for publication -- Processing and review
+                                completed.</ns1:qualifierDescription>
+                        </ns1:qualifier>
+                        <ns1:method methodID="110067">
+                            <ns1:methodDescription></ns1:methodDescription>
+                        </ns1:method>
+                    </ns1:values>
+                </ns1:timeSeries>
+            </ns1:timeSeriesResponse>
+    """
+    )
+
+
+def test_xml_to_xarray(waterml_data: str):
+    result = _xml_to_xarray(waterml_data)
+    expected = xr.Dataset.from_dict(
+        {
+            "coords": {
+                "time": {
+                    "dims": ("time",),
+                    "attrs": {},
+                    "data": [
+                        datetime.datetime(2000, 1, 6, 5, 0),
+                        datetime.datetime(2000, 1, 6, 6, 0),
+                        datetime.datetime(2000, 1, 6, 7, 0),
+                        datetime.datetime(2000, 1, 6, 8, 0),
+                        datetime.datetime(2000, 1, 6, 9, 0),
+                        datetime.datetime(2000, 1, 6, 10, 0),
+                        datetime.datetime(2000, 1, 6, 11, 0),
+                        datetime.datetime(2000, 1, 6, 12, 0),
+                        datetime.datetime(2000, 1, 6, 13, 0),
+                        datetime.datetime(2000, 1, 6, 14, 0),
+                        datetime.datetime(2000, 1, 6, 15, 0),
+                        datetime.datetime(2000, 1, 6, 16, 0),
+                        datetime.datetime(2000, 1, 6, 17, 0),
+                        datetime.datetime(2000, 1, 6, 18, 0),
+                        datetime.datetime(2000, 1, 6, 19, 0),
+                        datetime.datetime(2000, 1, 6, 20, 0),
+                        datetime.datetime(2000, 1, 6, 21, 0),
+                        datetime.datetime(2000, 1, 6, 22, 0),
+                        datetime.datetime(2000, 1, 6, 23, 0),
+                        datetime.datetime(2000, 1, 7, 0, 0),
+                        datetime.datetime(2000, 1, 7, 1, 0),
+                        datetime.datetime(2000, 1, 7, 2, 0),
+                        datetime.datetime(2000, 1, 7, 3, 0),
+                        datetime.datetime(2000, 1, 7, 4, 0),
+                        datetime.datetime(2000, 1, 7, 5, 0),
+                    ],
+                }
+            },
+            "attrs": {
+                "title": "USGS Data from streamflow data",
+                "station": "Little Beaver Creek near East Liverpool OH",
+                "stationid": "03109500",
+                "location": (np.float64(40.6758974), np.float64(-80.5406244)),
+            },
+            "dims": {"time": 25},
+            "data_vars": {
+                "streamflow": {
+                    "dims": ("time",),
+                    "attrs": {"units": "m3/s"},
+                    "data": [
+                        44.45703125,
+                        42.758033752441406,
+                        40.49271011352539,
+                        38.7937126159668,
+                        37.37788391113281,
+                        35.678890228271484,
+                        34.546226501464844,
+                        33.4135627746582,
+                        32.28089904785156,
+                        31.4314022064209,
+                        30.29874038696289,
+                        29.449241638183594,
+                        28.882911682128906,
+                        28.174997329711914,
+                        27.382131576538086,
+                        26.815799713134766,
+                        26.051252365112305,
+                        25.51323890686035,
+                        25.145122528076172,
+                        24.57879066467285,
+                        24.040775299072266,
+                        23.531078338623047,
+                        23.162961959838867,
+                        22.823162078857422,
+                        22.313465118408203,
+                    ],
+                }
+            },
+        }
+    )
+
+    xr.testing.assert_identical(result, expected)