diff --git a/docs/release_notes.rst b/docs/release_notes.rst index a2b05da514..19c19aa4c6 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -303,13 +303,13 @@ Analysis complete estimates of generator heat rates and thus fuel costs and emissions. Thanks to :user:`grgmiller` for his contribution, which was integrated by :user:`cmgosnell`! See PRs :pr:`1096,1608` and issues :issue:`1468,1478`. -* Integrated :mod:`pudl.analysis.ferc1_eia` from our RMI collaboration repo, which uses - logistic regression to match FERC1 plants data to EIA 860 records. While far from - perfect, this baseline model utilizes the manually created training data and plant IDs - to perform record linkage on the FERC1 data and EIA plant parts list created in - :mod:`pudl.analysis.plant_parts_eia`. See issue :issue:`1064` & PR :pr:`2224`. To - account for 1:m matches in the manual data, we added ``plant_match_ferc1`` as a plant - part in :mod:`pudl.analysis.plant_parts_eia`. +* Integrated :mod:`pudl.analysis.eia_ferc1_record_linkage` from our RMI collaboration + repo, which uses logistic regression to match FERC1 plants data to EIA 860 records. + While far from perfect, this baseline model utilizes the manually created training + data and plant IDs to perform record linkage on the FERC1 data and EIA plant parts + list created in :mod:`pudl.analysis.plant_parts_eia`. See issue :issue:`1064` & PR + :pr:`2224`. To account for 1:m matches in the manual data, we added + ``plant_match_ferc1`` as a plant part in :mod:`pudl.analysis.plant_parts_eia`. * Refined how we are associating generation and fuel data in :mod:`pudl.analysis.allocate_gen_fuel`, which was renamed from ``allocate_net_gen``. Energy source codes that show up in the :ref:`core_eia923__monthly_generation_fuel` or diff --git a/migrations/versions/773df9b1eb8c_apply_naming_convention_to_all_assets.py b/migrations/versions/4b08158ae952_apply_naming_convention_to_all_assets.py similarity index 97% rename from migrations/versions/773df9b1eb8c_apply_naming_convention_to_all_assets.py rename to migrations/versions/4b08158ae952_apply_naming_convention_to_all_assets.py index cb1575258a..e623f2a241 100644 --- a/migrations/versions/773df9b1eb8c_apply_naming_convention_to_all_assets.py +++ b/migrations/versions/4b08158ae952_apply_naming_convention_to_all_assets.py @@ -1,8 +1,8 @@ """Apply naming convention to all assets -Revision ID: 773df9b1eb8c +Revision ID: 4b08158ae952 Revises: -Create Date: 2023-11-30 16:23:04.422005 +Create Date: 2023-12-13 11:50:58.385316 """ import sqlalchemy as sa @@ -10,7 +10,7 @@ from sqlalchemy.dialects import sqlite # revision identifiers, used by Alembic. -revision = '773df9b1eb8c' +revision = '4b08158ae952' down_revision = None branch_labels = None depends_on = None @@ -262,9 +262,9 @@ def upgrade() -> None: sa.PrimaryKeyConstraint('code', name=op.f('pk_core_ferc1__codes_power_purchase_types')) ) op.create_table('core_ferc714__respondent_id', - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), - sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False, comment='FERC Form 714 respondent ID. Note that this ID does not correspond to FERC respondent IDs from other forms.'), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True, comment='Name of the utility, balancing area authority, or planning authority responding to FERC Form 714.'), + sa.Column('eia_code', sa.Integer(), nullable=True, comment='EIA utility or balancing area authority ID associated with this FERC Form 714 respondent. Note that many utilities are also balancing authorities and in many cases EIA uses the same integer ID to identify a utility in its role as a balancing authority AND as a utility, but there is no requirement that these IDs be the same, and in a number of cases they are different.'), sa.PrimaryKeyConstraint('respondent_id_ferc714', name=op.f('pk_core_ferc714__respondent_id')) ) op.create_table('core_ferc__codes_accounts', @@ -287,13 +287,13 @@ def upgrade() -> None: op.create_table('core_pudl__codes_subdivisions', sa.Column('country_code', sa.Enum('USA', 'CAN'), nullable=False, comment='Three letter ISO-3166 country code (e.g. USA or CAN).'), sa.Column('country_name', sa.Text(), nullable=True, comment='Full country name (e.g. United States of America).'), - sa.Column('subdivision_code', sa.Enum('NE', 'HI', 'OK', 'MA', 'QC', 'GA', 'SK', 'SD', 'WI', 'ID', 'KS', 'IA', 'NU', 'ON', 'AZ', 'IL', 'UT', 'FL', 'KY', 'MB', 'MD', 'CO', 'MO', 'AL', 'AB', 'NB', 'MT', 'NH', 'BC', 'MI', 'NS', 'OH', 'NJ', 'MS', 'GU', 'NT', 'WA', 'AS', 'OR', 'VT', 'SC', 'ND', 'NY', 'PR', 'NM', 'PE', 'VI', 'DC', 'WY', 'LA', 'IN', 'NV', 'AR', 'PA', 'ME', 'TX', 'YT', 'CT', 'MN', 'WV', 'AK', 'NC', 'RI', 'CA', 'MP', 'DE', 'NL', 'VA', 'TN'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), + sa.Column('subdivision_code', sa.Enum('NJ', 'KY', 'PE', 'NT', 'DE', 'MP', 'ON', 'MB', 'OK', 'SD', 'GU', 'CA', 'CT', 'NE', 'AS', 'UT', 'KS', 'MO', 'NC', 'QC', 'NB', 'AL', 'AR', 'BC', 'NU', 'OR', 'VT', 'NM', 'PR', 'WI', 'WA', 'PA', 'RI', 'TX', 'GA', 'MA', 'AB', 'AZ', 'YT', 'ME', 'VI', 'SC', 'IN', 'WV', 'FL', 'NY', 'MS', 'ID', 'DC', 'IL', 'MI', 'CO', 'LA', 'NL', 'MD', 'OH', 'NS', 'AK', 'SK', 'VA', 'MT', 'ND', 'NH', 'MN', 'TN', 'IA', 'HI', 'WY', 'NV'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), sa.Column('subdivision_name', sa.Text(), nullable=True, comment='Full name of political subdivision (e.g. US state or Canadian province names like California or Alberta.'), sa.Column('subdivision_type', sa.Text(), nullable=True, comment='ISO-3166 political subdivision type. E.g. state, province, outlying_area.'), sa.Column('timezone_approx', sa.Enum('Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Ciudad_Juarez', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Nuuk', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qostanay', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Kyiv', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kanton', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu'), nullable=True, comment='IANA timezone name of the timezone which encompasses the largest portion of the population in the associated geographic area.'), sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), sa.Column('division_name_us_census', sa.Text(), nullable=True, comment='Longer human readable name describing the US Census division.'), - sa.Column('division_code_us_census', sa.Enum('WNC', 'NEW', 'PCN', 'WSC', 'SAT', 'PCC', 'ESC', 'ENC', 'MAT', 'MTN'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), + sa.Column('division_code_us_census', sa.Enum('SAT', 'PCC', 'WNC', 'WSC', 'MTN', 'NEW', 'ENC', 'MAT', 'ESC', 'PCN'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), sa.Column('region_name_us_census', sa.Text(), nullable=True, comment='Human-readable name of a US Census region.'), sa.Column('is_epacems_state', sa.Boolean(), nullable=True, comment="Indicates whether the associated state reports data within the EPA's Continuous Emissions Monitoring System."), sa.PrimaryKeyConstraint('country_code', 'subdivision_code', name=op.f('pk_core_pudl__codes_subdivisions')) @@ -892,7 +892,7 @@ def upgrade() -> None: sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', name=op.f('pk_core_eia__entity_generators')) ) op.create_table('core_ferc714__hourly_demand_pa', - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False, comment='FERC Form 714 respondent ID. Note that this ID does not correspond to FERC respondent IDs from other forms.'), sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), sa.Column('timezone', sa.Enum('America/New_York', 'America/Chicago', 'America/Denver', 'America/Los_Angeles', 'America/Anchorage', 'Pacific/Honolulu'), nullable=True, comment='IANA timezone name'), @@ -1091,10 +1091,10 @@ def upgrade() -> None: sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_out_eia923__monthly_generation_fuel_combined')) ) op.create_table('out_ferc714__respondents_with_fips', - sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('eia_code', sa.Integer(), nullable=True, comment='EIA utility or balancing area authority ID associated with this FERC Form 714 respondent. Note that many utilities are also balancing authorities and in many cases EIA uses the same integer ID to identify a utility in its role as a balancing authority AND as a utility, but there is no requirement that these IDs be the same, and in a number of cases they are different.'), sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=True), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=True, comment='FERC Form 714 respondent ID. Note that this ID does not correspond to FERC respondent IDs from other forms.'), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True, comment='Name of the utility, balancing area authority, or planning authority responding to FERC Form 714.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -1109,16 +1109,16 @@ def upgrade() -> None: ) op.create_table('out_ferc714__summarized_demand', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False, comment='FERC Form 714 respondent ID. Note that this ID does not correspond to FERC respondent IDs from other forms.'), sa.Column('demand_annual_mwh', sa.Float(), nullable=True), sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), sa.Column('population_density_km2', sa.Float(), nullable=True, comment='Average population per sq. km area of a service territory.'), sa.Column('demand_annual_per_capita_mwh', sa.Float(), nullable=True, comment='Per-capita annual demand, averaged using Census county-level population estimates.'), sa.Column('demand_density_mwh_km2', sa.Float(), nullable=True, comment='Annual demand per km2 of a given service territory.'), - sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('eia_code', sa.Integer(), nullable=True, comment='EIA utility or balancing area authority ID associated with this FERC Form 714 respondent. Note that many utilities are also balancing authorities and in many cases EIA uses the same integer ID to identify a utility in its role as a balancing authority AND as a utility, but there is no requirement that these IDs be the same, and in a number of cases they are different.'), sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True, comment='Name of the utility, balancing area authority, or planning authority responding to FERC Form 714.'), sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), @@ -2658,7 +2658,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.Column('plant_type', sa.Enum('na_category', 'run_of_river', 'hydro', 'storage', 'run_of_river_with_storage'), nullable=True), + sa.Column('plant_type', sa.Enum('run_of_river', 'na_category', 'run_of_river_with_storage', 'storage', 'hydro'), nullable=True), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -2776,7 +2776,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_id_ferc1', sa.Integer(), nullable=True, comment='Algorithmically assigned PUDL FERC Plant ID. WARNING: NOT STABLE BETWEEN PUDL DB INITIALIZATIONS.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), - sa.Column('plant_type', sa.Enum('na_category', 'steam', 'combustion_turbine', 'geothermal', 'combined_cycle', 'internal_combustion', 'nuclear', 'solar_thermal', 'wind', 'photovoltaic'), nullable=True), + sa.Column('plant_type', sa.Enum('solar_thermal', 'internal_combustion', 'wind', 'combustion_turbine', 'combined_cycle', 'steam', 'na_category', 'photovoltaic', 'nuclear', 'geothermal'), nullable=True), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -3312,7 +3312,7 @@ def upgrade() -> None: sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('owner_utility_name_eia', sa.Text(), nullable=True, comment='The name of the EIA owner utility.'), - sa.Column('owner_state', sa.Enum('NE', 'HI', 'OK', 'MA', 'QC', 'GA', 'SK', 'SD', 'WI', 'ID', 'KS', 'IA', 'NU', 'ON', 'AZ', 'IL', 'UT', 'FL', 'KY', 'MB', 'MD', 'CO', 'MO', 'AL', 'AB', 'NB', 'MT', 'NH', 'BC', 'MI', 'NS', 'OH', 'NJ', 'MS', 'GU', 'NT', 'WA', 'AS', 'OR', 'VT', 'SC', 'ND', 'NY', 'PR', 'NM', 'PE', 'VI', 'DC', 'WY', 'LA', 'IN', 'NV', 'AR', 'PA', 'ME', 'TX', 'YT', 'CT', 'MN', 'WV', 'AK', 'NC', 'RI', 'CA', 'MP', 'DE', 'NL', 'VA', 'TN'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('NJ', 'KY', 'PE', 'NT', 'DE', 'MP', 'ON', 'MB', 'OK', 'SD', 'GU', 'CA', 'CT', 'NE', 'AS', 'UT', 'KS', 'MO', 'NC', 'QC', 'NB', 'AL', 'AR', 'BC', 'NU', 'OR', 'VT', 'NM', 'PR', 'WI', 'WA', 'PA', 'RI', 'TX', 'GA', 'MA', 'AB', 'AZ', 'YT', 'ME', 'VI', 'SC', 'IN', 'WV', 'FL', 'NY', 'MS', 'ID', 'DC', 'IL', 'MI', 'CO', 'LA', 'NL', 'MD', 'OH', 'NS', 'AK', 'SK', 'VA', 'MT', 'ND', 'NH', 'MN', 'TN', 'IA', 'HI', 'WY', 'NV'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), @@ -3324,46 +3324,6 @@ def upgrade() -> None: sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['core_eia860__scd_generators.plant_id_eia', 'core_eia860__scd_generators.generator_id', 'core_eia860__scd_generators.report_date'], name=op.f('fk_core_eia860__scd_ownership_plant_id_eia_core_eia860__scd_generators')), sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia', name=op.f('pk_core_eia860__scd_ownership')) ) - op.create_table('mega_generators_eia', - sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), - sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), - sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), - sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.Column('ferc_acct_name', sa.Enum('Hydraulic', 'Nuclear', 'Steam', 'Other'), nullable=True, comment='Name of FERC account, derived from technology description and prime mover code.'), - sa.Column('generator_operating_year', sa.Integer(), nullable=True, comment='Year a generator went into service.'), - sa.Column('operational_status_pudl', sa.Enum('operating', 'retired', 'proposed'), nullable=True, comment='The operating status of the asset using PUDL categories.'), - sa.Column('capacity_eoy_mw', sa.Float(), nullable=True, comment='Total end of year installed (nameplate) capacity for a plant part, in megawatts.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), - sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=True, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['core_eia__codes_energy_sources.code'], name=op.f('fk_mega_generators_eia_energy_source_code_1_core_eia__codes_energy_sources')), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['core_eia860__scd_generators.plant_id_eia', 'core_eia860__scd_generators.generator_id', 'core_eia860__scd_generators.report_date'], name=op.f('fk_mega_generators_eia_plant_id_eia_core_eia860__scd_generators')), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['core_pudl__entity_plants_pudl.plant_id_pudl'], name=op.f('fk_mega_generators_eia_plant_id_pudl_core_pudl__entity_plants_pudl')), - sa.ForeignKeyConstraint(['prime_mover_code'], ['core_eia__codes_prime_movers.code'], name=op.f('fk_mega_generators_eia_prime_mover_code_core_eia__codes_prime_movers')), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['core_eia860__scd_utilities.utility_id_eia', 'core_eia860__scd_utilities.report_date'], name=op.f('fk_mega_generators_eia_utility_id_eia_core_eia860__scd_utilities')), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['core_pudl__entity_utilities_pudl.utility_id_pudl'], name=op.f('fk_mega_generators_eia_utility_id_pudl_core_pudl__entity_utilities_pudl')) - ) op.create_table('out_eia860__yearly_ownership', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -3373,7 +3333,7 @@ def upgrade() -> None: sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('owner_utility_name_eia', sa.Text(), nullable=True, comment='The name of the EIA owner utility.'), sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('owner_state', sa.Enum('NE', 'HI', 'OK', 'MA', 'QC', 'GA', 'SK', 'SD', 'WI', 'ID', 'KS', 'IA', 'NU', 'ON', 'AZ', 'IL', 'UT', 'FL', 'KY', 'MB', 'MD', 'CO', 'MO', 'AL', 'AB', 'NB', 'MT', 'NH', 'BC', 'MI', 'NS', 'OH', 'NJ', 'MS', 'GU', 'NT', 'WA', 'AS', 'OR', 'VT', 'SC', 'ND', 'NY', 'PR', 'NM', 'PE', 'VI', 'DC', 'WY', 'LA', 'IN', 'NV', 'AR', 'PA', 'ME', 'TX', 'YT', 'CT', 'MN', 'WV', 'AK', 'NC', 'RI', 'CA', 'MP', 'DE', 'NL', 'VA', 'TN'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('NJ', 'KY', 'PE', 'NT', 'DE', 'MP', 'ON', 'MB', 'OK', 'SD', 'GU', 'CA', 'CT', 'NE', 'AS', 'UT', 'KS', 'MO', 'NC', 'QC', 'NB', 'AL', 'AR', 'BC', 'NU', 'OR', 'VT', 'NM', 'PR', 'WI', 'WA', 'PA', 'RI', 'TX', 'GA', 'MA', 'AB', 'AZ', 'YT', 'ME', 'VI', 'SC', 'IN', 'WV', 'FL', 'NY', 'MS', 'ID', 'DC', 'IL', 'MI', 'CO', 'LA', 'NL', 'MD', 'OH', 'NS', 'AK', 'SK', 'VA', 'MT', 'ND', 'NH', 'MN', 'TN', 'IA', 'HI', 'WY', 'NV'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), @@ -3834,7 +3794,7 @@ def upgrade() -> None: sa.Column('record_id_eia', sa.Text(), nullable=False, comment='Identifier for EIA plant parts analysis records.'), sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), - sa.Column('plant_part', sa.Enum('plant_ferc_acct', 'plant_operating_year', 'plant_technology', 'plant_prime_mover', 'plant_prime_fuel', 'plant_gen', 'plant', 'plant_match_ferc1', 'plant_unit'), nullable=True, comment='The part of the plant a record corresponds to.'), + sa.Column('plant_part', sa.Enum('plant_unit', 'plant_gen', 'plant', 'plant_operating_year', 'plant_prime_fuel', 'plant_prime_mover', 'plant_technology', 'plant_ferc_acct', 'plant_match_ferc1'), nullable=True, comment='The part of the plant a record corresponds to.'), sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -3843,7 +3803,7 @@ def upgrade() -> None: sa.Column('ferc_acct_name', sa.Enum('Hydraulic', 'Nuclear', 'Steam', 'Other'), nullable=True, comment='Name of FERC account, derived from technology description and prime mover code.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('true_gran', sa.Boolean(), nullable=True, comment='Indicates whether a plant part list record is associated with the highest priority plant part for all identical records.'), - sa.Column('appro_part_label', sa.Enum('plant_ferc_acct', 'plant_operating_year', 'plant_technology', 'plant_prime_mover', 'plant_prime_fuel', 'plant_gen', 'plant', 'plant_match_ferc1', 'plant_unit'), nullable=True, comment='Plant part of the associated true granularity record.'), + sa.Column('appro_part_label', sa.Enum('plant_unit', 'plant_gen', 'plant', 'plant_operating_year', 'plant_prime_fuel', 'plant_prime_mover', 'plant_technology', 'plant_ferc_acct', 'plant_match_ferc1'), nullable=True, comment='Plant part of the associated true granularity record.'), sa.Column('appro_record_id_eia', sa.Text(), nullable=True, comment='EIA record ID of the associated true granularity record.'), sa.Column('ferc1_generator_agg_id', sa.Integer(), nullable=True, comment='ID dynamically assigned by PUDL to EIA records with multiple matches to a single FERC ID in the FERC-EIA manual matching process.'), sa.Column('capacity_eoy_mw', sa.Float(), nullable=True, comment='Total end of year installed (nameplate) capacity for a plant part, in megawatts.'), @@ -3887,7 +3847,7 @@ def upgrade() -> None: sa.Column('record_id_eia', sa.Text(), nullable=True, comment='Identifier for EIA plant parts analysis records.'), sa.Column('match_type', sa.Text(), nullable=True, comment='Indicates the source and validation of the match between EIA and FERC. Match types include matches was generated from the model, verified by the training data, overridden by the training data, etc.'), sa.Column('plant_name_ppe', sa.Text(), nullable=True, comment='Derived plant name that includes EIA plant name and other strings associated with ID and PK columns of the plant part.'), - sa.Column('plant_part', sa.Enum('plant_ferc_acct', 'plant_operating_year', 'plant_technology', 'plant_prime_mover', 'plant_prime_fuel', 'plant_gen', 'plant', 'plant_match_ferc1', 'plant_unit'), nullable=True, comment='The part of the plant a record corresponds to.'), + sa.Column('plant_part', sa.Enum('plant_unit', 'plant_gen', 'plant', 'plant_operating_year', 'plant_prime_fuel', 'plant_prime_mover', 'plant_technology', 'plant_ferc_acct', 'plant_match_ferc1'), nullable=True, comment='The part of the plant a record corresponds to.'), sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=True, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), @@ -3903,7 +3863,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('true_gran', sa.Boolean(), nullable=True, comment='Indicates whether a plant part list record is associated with the highest priority plant part for all identical records.'), - sa.Column('appro_part_label', sa.Enum('plant_ferc_acct', 'plant_operating_year', 'plant_technology', 'plant_prime_mover', 'plant_prime_fuel', 'plant_gen', 'plant', 'plant_match_ferc1', 'plant_unit'), nullable=True, comment='Plant part of the associated true granularity record.'), + sa.Column('appro_part_label', sa.Enum('plant_unit', 'plant_gen', 'plant', 'plant_operating_year', 'plant_prime_fuel', 'plant_prime_mover', 'plant_technology', 'plant_ferc_acct', 'plant_match_ferc1'), nullable=True, comment='Plant part of the associated true granularity record.'), sa.Column('appro_record_id_eia', sa.Text(), nullable=True, comment='EIA record ID of the associated true granularity record.'), sa.Column('record_count', sa.Integer(), nullable=True, comment='Number of distinct generator IDs that partcipated in the aggregation for a plant part list record.'), sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), @@ -4010,64 +3970,11 @@ def upgrade() -> None: sa.ForeignKeyConstraint(['utility_id_pudl'], ['core_pudl__entity_utilities_pudl.utility_id_pudl'], name=op.f('fk_out_pudl__yearly_assn_eia_ferc1_plant_parts_utility_id_pudl_core_pudl__entity_utilities_pudl')), sa.PrimaryKeyConstraint('record_id_ferc1', name=op.f('pk_out_pudl__yearly_assn_eia_ferc1_plant_parts')) ) - op.create_table('plant_parts_eia', - sa.Column('record_id_eia', sa.Text(), nullable=False, comment='Identifier for EIA plant parts analysis records.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), - sa.Column('plant_part', sa.Enum('plant_ferc_acct', 'plant_operating_year', 'plant_technology', 'plant_prime_mover', 'plant_prime_fuel', 'plant_gen', 'plant', 'plant_match_ferc1', 'plant_unit'), nullable=True, comment='The part of the plant a record corresponds to.'), - sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code_1', sa.Enum('WO', 'GEO', 'TDF', 'SG', 'SUN', 'NUC', 'DFO', 'MSN', 'SUB', 'WC', 'OTH', 'AB', 'RC', 'LFG', 'SGC', 'SLW', 'SGP', 'WDL', 'BFG', 'WH', 'MSB', 'ANT', 'BLQ', 'LIG', 'OBS', 'OG', 'SC', 'MWH', 'NG', 'PC', 'RFO', 'BIT', 'WAT', 'OBG', 'PG', 'JF', 'WDS', 'KER', 'WND', 'MSW', 'OBL', 'PUR'), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), - sa.Column('technology_description', sa.Enum('All Other', 'Natural Gas Fired Combustion Turbine', 'Wood/Wood Waste Biomass', 'Conventional Steam Coal', 'Coal Integrated Gasification Combined Cycle', 'Other Gases', 'Petroleum Coke', 'Landfill Gas', 'Geothermal', 'Hydrokinetic', 'Natural Gas Internal Combustion Engine', 'Natural Gas Fired Combined Cycle', 'Natural Gas with Compressed Air Storage', 'Batteries', 'Natural Gas Steam Turbine', 'Onshore Wind Turbine', 'Petroleum Liquids', 'Solar Thermal with Energy Storage', 'Other Waste Biomass', 'Conventional Hydroelectric', 'Municipal Solid Waste', 'Solar Photovoltaic', 'Solar Thermal without Energy Storage', 'Hydroelectric Pumped Storage', 'Nuclear', 'Flywheels', 'Other Natural Gas', 'Offshore Wind Turbine'), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), - sa.Column('ferc_acct_name', sa.Enum('Hydraulic', 'Nuclear', 'Steam', 'Other'), nullable=True, comment='Name of FERC account, derived from technology description and prime mover code.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('true_gran', sa.Boolean(), nullable=True, comment='Indicates whether a plant part list record is associated with the highest priority plant part for all identical records.'), - sa.Column('appro_part_label', sa.Enum('plant_ferc_acct', 'plant_operating_year', 'plant_technology', 'plant_prime_mover', 'plant_prime_fuel', 'plant_gen', 'plant', 'plant_match_ferc1', 'plant_unit'), nullable=True, comment='Plant part of the associated true granularity record.'), - sa.Column('appro_record_id_eia', sa.Text(), nullable=True, comment='EIA record ID of the associated true granularity record.'), - sa.Column('ferc1_generator_agg_id', sa.Integer(), nullable=True, comment='ID dynamically assigned by PUDL to EIA records with multiple matches to a single FERC ID in the FERC-EIA manual matching process.'), - sa.Column('capacity_eoy_mw', sa.Float(), nullable=True, comment='Total end of year installed (nameplate) capacity for a plant part, in megawatts.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), - sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('generator_operating_year', sa.Integer(), nullable=True, comment='Year a generator went into service.'), - sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.Column('operational_status_pudl', sa.Enum('operating', 'retired', 'proposed'), nullable=True, comment='The operating status of the asset using PUDL categories.'), - sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=True, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), - sa.Column('ownership_dupe', sa.Boolean(), nullable=True, comment='Whether a plant part record has a duplicate record with different ownership status.'), - sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('plant_name_ppe', sa.Text(), nullable=True, comment='Derived plant name that includes EIA plant name and other strings associated with ID and PK columns of the plant part.'), - sa.Column('plant_part_id_eia', sa.Text(), nullable=True, comment='Contains EIA plant ID, plant part, ownership, and EIA utility id'), - sa.Column('record_count', sa.Integer(), nullable=True, comment='Number of distinct generator IDs that partcipated in the aggregation for a plant part list record.'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), - sa.Column('plant_id_report_year', sa.Text(), nullable=True, comment='PUDL plant ID and report year of the record.'), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['core_eia__codes_energy_sources.code'], name=op.f('fk_plant_parts_eia_energy_source_code_1_core_eia__codes_energy_sources')), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['core_eia860__scd_generators.plant_id_eia', 'core_eia860__scd_generators.generator_id', 'core_eia860__scd_generators.report_date'], name=op.f('fk_plant_parts_eia_plant_id_eia_core_eia860__scd_generators')), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['core_pudl__entity_plants_pudl.plant_id_pudl'], name=op.f('fk_plant_parts_eia_plant_id_pudl_core_pudl__entity_plants_pudl')), - sa.ForeignKeyConstraint(['prime_mover_code'], ['core_eia__codes_prime_movers.code'], name=op.f('fk_plant_parts_eia_prime_mover_code_core_eia__codes_prime_movers')), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['core_eia860__scd_utilities.utility_id_eia', 'core_eia860__scd_utilities.report_date'], name=op.f('fk_plant_parts_eia_utility_id_eia_core_eia860__scd_utilities')), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['core_pudl__entity_utilities_pudl.utility_id_pudl'], name=op.f('fk_plant_parts_eia_utility_id_pudl_core_pudl__entity_utilities_pudl')), - sa.PrimaryKeyConstraint('record_id_eia', name=op.f('pk_plant_parts_eia')) - ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('plant_parts_eia') op.drop_table('out_pudl__yearly_assn_eia_ferc1_plant_parts') op.drop_table('out_eia__yearly_plant_parts') op.drop_table('out_eia__yearly_generators_by_ownership') @@ -4079,7 +3986,6 @@ def downgrade() -> None: op.drop_table('out_eia923__yearly_generation') op.drop_table('out_eia923__yearly_boiler_fuel') op.drop_table('out_eia860__yearly_ownership') - op.drop_table('mega_generators_eia') op.drop_table('core_eia860__scd_ownership') op.drop_table('core_eia860__assn_yearly_boiler_emissions_control_equipment') op.drop_table('core_eia860__assn_boiler_stack_flue') diff --git a/src/pudl/analysis/eia_ferc1_record_linkage.py b/src/pudl/analysis/eia_ferc1_record_linkage.py index 36347b1e0f..27844f65ec 100644 --- a/src/pudl/analysis/eia_ferc1_record_linkage.py +++ b/src/pudl/analysis/eia_ferc1_record_linkage.py @@ -1051,7 +1051,7 @@ def add_null_overrides(connects_ferc1_eia): logger.debug(f"Found {len(null_overrides)} null overrides") # List of EIA columns to null. Ideally would like to get this from elsewhere, but # compiling this here for now... - eia_cols_to_null = Resource.from_id("plant_parts_eia").get_field_names() + eia_cols_to_null = Resource.from_id("out_eia__yearly_plant_parts").get_field_names() # Make all EIA values NA for record_id_ferc1 values in the Null overrides list and # make the match_type column say "overriden" connects_ferc1_eia.loc[ diff --git a/src/pudl/metadata/resources/eia.py b/src/pudl/metadata/resources/eia.py index bebf089c51..72efd02d5f 100644 --- a/src/pudl/metadata/resources/eia.py +++ b/src/pudl/metadata/resources/eia.py @@ -525,48 +525,6 @@ "etl_group": "entity_eia", "field_namespace": "eia", }, - "out_eia__yearly_generators_by_ownership": { - "description": "A mega table of all EIA generators with ownership integrated.", - "schema": { - "fields": [ - "plant_id_eia", - "generator_id", - "report_date", - "unit_id_pudl", - "plant_id_pudl", - "plant_name_eia", - "utility_id_eia", - "utility_id_pudl", - "utility_name_eia", - "technology_description", - "energy_source_code_1", - "prime_mover_code", - "generator_operating_date", - "generator_retirement_date", - "operational_status", - "capacity_mw", - "fuel_type_code_pudl", - "planned_generator_retirement_date", - "capacity_factor", - "fuel_cost_from_eiaapi", - "fuel_cost_per_mmbtu", - "fuel_cost_per_mwh", - "unit_heat_rate_mmbtu_per_mwh", - "net_generation_mwh", - "total_fuel_cost", - "total_mmbtu", - "ferc_acct_name", - "generator_operating_year", - "operational_status_pudl", - "capacity_eoy_mw", - "fraction_owned", - "ownership_record_type", - ], - }, - "sources": ["eia860", "eia923"], - "etl_group": "outputs", - "field_namespace": "eia", - }, "core_eia__codes_momentary_interruptions": { "description": "A coding table for utility definitions of momentary service interruptions.", "schema": { @@ -623,60 +581,6 @@ "etl_group": "entity_eia", "field_namespace": "eia", }, - "out_eia__yearly_plant_parts": { - "description": "Output table with the aggregation of all EIA plant parts. For use with matching to FERC 1.", - "schema": { - "fields": [ - "record_id_eia", - "plant_id_eia", - "report_date", - "plant_part", - "generator_id", - "unit_id_pudl", - "prime_mover_code", - "energy_source_code_1", - "technology_description", - "ferc_acct_name", - "utility_id_eia", - "true_gran", - "appro_part_label", - "appro_record_id_eia", - "ferc1_generator_agg_id", - "capacity_eoy_mw", - "capacity_factor", - "capacity_mw", - "construction_year", - "fraction_owned", - "fuel_cost_per_mmbtu", - "fuel_cost_per_mwh", - "fuel_type_code_pudl", - "generator_retirement_date", - "unit_heat_rate_mmbtu_per_mwh", - "installation_year", - "net_generation_mwh", - "generator_operating_year", - "operational_status", - "operational_status_pudl", - "ownership_record_type", - "ownership_dupe", - "planned_generator_retirement_date", - "plant_id_pudl", - "plant_name_eia", - "plant_name_ppe", - "plant_part_id_eia", - "record_count", - "total_fuel_cost", - "total_mmbtu", - "utility_id_pudl", - "report_year", - "plant_id_report_year", - ], - "primary_key": ["record_id_eia"], - }, - "sources": ["eia860", "eia923"], - "etl_group": "outputs", - "field_namespace": "eia", - }, "core_eia__codes_prime_movers": { "description": "Long descriptions explaining the short prime mover codes reported in the EIA-860 and EIA-923.", "schema": { diff --git a/src/pudl/metadata/resources/ferc1.py b/src/pudl/metadata/resources/ferc1.py index b76bd418c3..6f64e0ccf9 100644 --- a/src/pudl/metadata/resources/ferc1.py +++ b/src/pudl/metadata/resources/ferc1.py @@ -802,16 +802,6 @@ "etl_group": "ferc1", "field_namespace": "ferc1", }, - "retained_earnings_appropriations_ferc1": { - "description": "Retained Earnings - some of the unstructed part of schedule 118.", - "schema": { - "fields": ["utility_id_ferc1", "report_year", "utility_type", "record_id"], - }, - "sources": ["ferc1"], - "etl_group": "ferc1_disabled", - "field_namespace": "ferc1", - "create_database_schema": False, - }, "core_ferc1__yearly_operating_revenues_sched300": { "description": ( "Electric operating revenues - The structed part of schedule 300." diff --git a/src/pudl/metadata/resources/ferc1_eia_record_linkage.py b/src/pudl/metadata/resources/ferc1_eia_record_linkage.py index 5592735261..2163307820 100644 --- a/src/pudl/metadata/resources/ferc1_eia_record_linkage.py +++ b/src/pudl/metadata/resources/ferc1_eia_record_linkage.py @@ -2,7 +2,7 @@ from typing import Any RESOURCE_METADATA: dict[str, dict[str, Any]] = { - "plant_parts_eia": { + "out_eia__yearly_plant_parts": { "description": """Output table with the aggregation of all EIA plant parts. For use with matching to FERC 1. Practically speaking, a plant is a collection of generator(s). There are many @@ -80,7 +80,7 @@ "etl_group": "outputs", "field_namespace": "eia", }, - "mega_generators_eia": { + "out_eia__yearly_generators_by_ownership": { "description": "A mega table of all EIA generators with ownership integrated.", "schema": { "fields": [