Skip to content

Commit

Permalink
Tweak metadata and pyarrow schema methods to work for all tables.
Browse files Browse the repository at this point in the history
  • Loading branch information
zaneselvans committed Jan 8, 2024
1 parent eadf625 commit 5982442
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/pudl/metadata/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,9 @@ def to_pyarrow(self) -> pa.Field:
name=self.name,
type=self.to_pyarrow_dtype(),
nullable=(not self.constraints.required),
metadata={"description": self.description},
metadata={
"description": self.description if self.description is not None else ""
},
)

def to_sql( # noqa: C901
Expand Down Expand Up @@ -1318,9 +1320,10 @@ def to_pyarrow(self) -> pa.Schema:
"""Construct a PyArrow schema for the resource."""
fields = [field.to_pyarrow() for field in self.schema.fields]
metadata = {
"description": self.description,
"primary_key": ",".join(self.schema.primary_key),
"description": self.description if self.description is not None else ""
}
if self.schema.primary_key is not None:
metadata |= {"primary_key": ",".join(self.schema.primary_key)}
return pa.schema(fields=fields, metadata=metadata)

def to_pandas_dtypes(self, **kwargs: Any) -> dict[str, str | pd.CategoricalDtype]:
Expand Down
2 changes: 2 additions & 0 deletions src/pudl/metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@
"city": {
"type": "string",
# TODO: Disambiguate column. City means different things in different tables.
"description": "Name of the city.",
},
"co2_mass_measurement_code": {
"type": "string",
Expand Down Expand Up @@ -2581,6 +2582,7 @@
"street_address": {
"type": "string",
# TODO: Disambiguate as this means different things in different tables.
"description": "Physical street address.",
},
"subcritical_tech": {
"type": "boolean",
Expand Down
6 changes: 6 additions & 0 deletions src/pudl/metadata/resources/pudl.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
"sources": ["pudl"],
},
"core_pudl__assn_utilities_plants": {
"title": "PUDL Utility-Plant Associations",
"description": "Associations between PUDL utility IDs and PUDL plant IDs. This table is read in from a spreadsheet stored in the PUDL repository: src/pudl/package_data/glue/pudl_id_mapping.xlsx",
"schema": {
"fields": ["utility_id_pudl", "plant_id_pudl"],
"primary_key": ["utility_id_pudl", "plant_id_pudl"],
Expand All @@ -65,6 +67,8 @@
"sources": ["pudl"],
},
"core_pudl__codes_datasources": {
"title": "PUDL Data Sources",
"description": "Static table defining codes associated with the data sources that PUDL integrates.",
"schema": {
"fields": [
"datasource",
Expand All @@ -79,6 +83,8 @@
"sources": ["pudl"],
},
"out_ferc714__hourly_predicted_state_demand": {
"title": "Estimated Hourly State Electricity Demand",
"description": "Estimated hourly electricity demand for each state, scaled such that it matches the total electricity sales by state reported in EIA 861.",
"schema": {
"fields": [
"state_id_fips",
Expand Down

0 comments on commit 5982442

Please sign in to comment.