diff --git a/docker/Dockerfile b/docker/Dockerfile index cadabad1f..4ee50f739 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM mambaorg/micromamba:2.0.3-ubuntu24.04 +FROM mambaorg/micromamba:2.0.5-ubuntu24.04 ENV CONTAINER_HOME=/home/$MAMBA_USER ENV PGDATA=${CONTAINER_HOME}/pgdata diff --git a/environments/conda-linux-64.lock.yml b/environments/conda-linux-64.lock.yml index 030a6cf6f..97663c14b 100644 --- a/environments/conda-linux-64.lock.yml +++ b/environments/conda-linux-64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 881fda227b1ccfa653b8df9a535ee504519e3051b410e102a354414c22f66ce6 +# input_hash: 17868cb060258ffba68e6ec8a027e690aeaaf903491bc3cbdf948787c99c390b channels: - conda-forge @@ -21,7 +21,7 @@ dependencies: - anyio=4.8.0=pyhd8ed1ab_0 - aom=3.9.1=hac33072_0 - appdirs=1.4.4=pyhd8ed1ab_1 - - arelle-release=2.36.7=pyhd8ed1ab_0 + - arelle-release=2.36.9=pyhd8ed1ab_0 - argon2-cffi=23.1.0=pyhd8ed1ab_1 - argon2-cffi-bindings=21.2.0=py312h66e93f0_5 - arrow=1.3.0=pyhd8ed1ab_1 @@ -57,13 +57,13 @@ dependencies: - backports=1.0=pyhd8ed1ab_5 - backports.tarfile=1.2.0=pyhd8ed1ab_1 - bcrypt=4.2.1=py312h12e396e_0 - - beautifulsoup4=4.13.0=pyha770c72_0 + - beautifulsoup4=4.13.2=pyha770c72_0 - bleach=6.2.0=pyh29332c3_4 - bleach-with-css=6.2.0=h82add2a_4 - blinker=1.9.0=pyhff2d567_0 - blosc=1.21.6=he440d0b_1 - - boto3=1.36.11=pyhd8ed1ab_0 - - botocore=1.36.11=pyge310_1234567_0 + - boto3=1.36.12=pyhd8ed1ab_0 + - botocore=1.36.12=pyge310_1234567_0 - bottleneck=1.4.2=py312hc0a28a1_0 - branca=0.8.1=pyhd8ed1ab_0 - brotli=1.1.0=hb9d3cd8_2 @@ -109,14 +109,13 @@ dependencies: - dagster-webserver=1.9.11=pyhc5d1ea7_0 - dask-core=2025.1.0=pyhd8ed1ab_0 - dask-expr=2.0.0=pyhd8ed1ab_0 - - databricks-sdk=0.42.0=pyhd8ed1ab_0 + - databricks-sdk=0.43.0=pyhd8ed1ab_0 - datasette=0.65.1=pyhd8ed1ab_0 - dav1d=1.2.1=hd590300_0 - dbus=1.13.6=h5008d03_3 - debugpy=1.8.12=py312h2ec8cdc_0 - decorator=5.1.1=pyhd8ed1ab_1 - defusedxml=0.7.1=pyhd8ed1ab_0 - - deltalake=0.24.0=py312h07cb367_0 - deprecated=1.2.18=pyhd8ed1ab_0 - distlib=0.3.9=pyhd8ed1ab_1 - dnspython=2.7.0=pyhff2d567_1 @@ -182,7 +181,7 @@ dependencies: - graphite2=1.3.13=h59595ed_1003 - graphql-core=3.2.6=pyh29332c3_0 - graphql-relay=3.2.0=pyhd8ed1ab_1 - - graphviz=12.2.1=h618ab57_0 + - graphviz=12.2.1=h5ae0cbf_1 - greenlet=3.1.1=py312h2ec8cdc_1 - grpcio=1.67.1=py312hacea422_1 - grpcio-health-checking=1.67.1=pyhd8ed1ab_1 @@ -384,7 +383,7 @@ dependencies: - pandoc=3.6.2=ha770c72_0 - pandocfilters=1.5.0=pyhd8ed1ab_0 - pango=1.56.1=h861ebed_0 - - paramiko=3.5.0=pyhd8ed1ab_1 + - paramiko=3.5.1=pyhd8ed1ab_0 - parso=0.8.4=pyhd8ed1ab_1 - partd=1.4.2=pyhd8ed1ab_0 - pastel=0.2.1=pyhd8ed1ab_0 @@ -418,7 +417,6 @@ dependencies: - pure_eval=0.2.3=pyhd8ed1ab_1 - pyarrow=18.1.0=py312h7900ff3_0 - pyarrow-core=18.1.0=py312h01725c0_0_cpu - - pyarrow-hotfix=0.6=pyhd8ed1ab_1 - pyasn1=0.6.1=pyhd8ed1ab_2 - pyasn1-modules=0.4.1=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_3 @@ -582,7 +580,7 @@ dependencies: - xlsxwriter=3.2.2=pyhd8ed1ab_0 - xorg-libice=1.1.2=hb9d3cd8_0 - xorg-libsm=1.2.5=he73a12e_0 - - xorg-libx11=1.8.10=h4f16b4b_1 + - xorg-libx11=1.8.11=h4f16b4b_0 - xorg-libxau=1.0.12=hb9d3cd8_0 - xorg-libxcomposite=0.4.6=hb9d3cd8_2 - xorg-libxcursor=1.2.3=hb9d3cd8_0 diff --git a/environments/conda-lock.yml b/environments/conda-lock.yml index 8489cb989..b72ba02a8 100644 --- a/environments/conda-lock.yml +++ b/environments/conda-lock.yml @@ -15,9 +15,9 @@ version: 1 metadata: content_hash: - linux-64: 881fda227b1ccfa653b8df9a535ee504519e3051b410e102a354414c22f66ce6 - osx-64: 0e66c2c5134fd7dcc98c7ea75aa4c0a3b06f62bf45fbb2ede2f6c4252e8b4e3c - osx-arm64: fedcc3f67e42150aed97e57c4e65edd4dfd8085c958ca43b4a841e6c2bc98245 + linux-64: 17868cb060258ffba68e6ec8a027e690aeaaf903491bc3cbdf948787c99c390b + osx-64: 3ba3060f95f7cca7b0b57dcc26b27cf7362f666479bfecb76c998d5ed4b74e2f + osx-arm64: 73cdf399b1dae90c8f5b420504c02e42883d416d1790e0d8390943326d33eea8 channels: - url: conda-forge used_env_vars: [] @@ -666,7 +666,7 @@ package: category: main optional: false - name: arelle-release - version: 2.36.7 + version: 2.36.9 manager: conda platform: linux-64 dependencies: @@ -681,14 +681,14 @@ package: python: ">=3.9,<3.13" python-dateutil: 2.* regex: "" - url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.36.7-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.36.9-pyhd8ed1ab_0.conda hash: - md5: 00cf32010e58fbec8e6c0b9f80107697 - sha256: a28ae1b201eedeef52b46ff9a284b1c91df05ff7fdbecba96adaa85582c9d53d + md5: 9ece791f1f00560d5b3a5ceb24b28bca + sha256: a0b12e726303a2497074001b184eec96e807a388ea84a38e98ccb1fd7053cde7 category: main optional: false - name: arelle-release - version: 2.36.7 + version: 2.36.9 manager: conda platform: osx-64 dependencies: @@ -703,14 +703,14 @@ package: lxml: ">=4,<6" numpy: ">=1,<3" pillow: ">=10,<12" - url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.36.7-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.36.9-pyhd8ed1ab_0.conda hash: - md5: 00cf32010e58fbec8e6c0b9f80107697 - sha256: a28ae1b201eedeef52b46ff9a284b1c91df05ff7fdbecba96adaa85582c9d53d + md5: 9ece791f1f00560d5b3a5ceb24b28bca + sha256: a0b12e726303a2497074001b184eec96e807a388ea84a38e98ccb1fd7053cde7 category: main optional: false - name: arelle-release - version: 2.36.7 + version: 2.36.9 manager: conda platform: osx-arm64 dependencies: @@ -725,10 +725,10 @@ package: lxml: ">=4,<6" numpy: ">=1,<3" pillow: ">=10,<12" - url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.36.7-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.36.9-pyhd8ed1ab_0.conda hash: - md5: 00cf32010e58fbec8e6c0b9f80107697 - sha256: a28ae1b201eedeef52b46ff9a284b1c91df05ff7fdbecba96adaa85582c9d53d + md5: 9ece791f1f00560d5b3a5ceb24b28bca + sha256: a0b12e726303a2497074001b184eec96e807a388ea84a38e98ccb1fd7053cde7 category: main optional: false - name: argon2-cffi @@ -2223,45 +2223,45 @@ package: category: main optional: false - name: beautifulsoup4 - version: 4.13.0 + version: 4.13.2 manager: conda platform: linux-64 dependencies: python: ">=3.9" soupsieve: ">=1.2" typing-extensions: "" - url: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.2-pyha770c72_0.conda hash: - md5: ad3754a495d170cb598f93f05c651adf - sha256: 089dcb5c91dab1423856147f604b389c55b3edede63e2367066d6ba27c6adef8 + md5: 22b08b8f283909afee4cfff36b79f083 + sha256: edc85562d1302c2f29005e23a71e1541b1c20647f47f9a1f690767e297f57b7b category: main optional: false - name: beautifulsoup4 - version: 4.13.0 + version: 4.13.2 manager: conda platform: osx-64 dependencies: typing-extensions: "" python: ">=3.9" soupsieve: ">=1.2" - url: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.2-pyha770c72_0.conda hash: - md5: ad3754a495d170cb598f93f05c651adf - sha256: 089dcb5c91dab1423856147f604b389c55b3edede63e2367066d6ba27c6adef8 + md5: 22b08b8f283909afee4cfff36b79f083 + sha256: edc85562d1302c2f29005e23a71e1541b1c20647f47f9a1f690767e297f57b7b category: main optional: false - name: beautifulsoup4 - version: 4.13.0 + version: 4.13.2 manager: conda platform: osx-arm64 dependencies: typing-extensions: "" python: ">=3.9" soupsieve: ">=1.2" - url: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.2-pyha770c72_0.conda hash: - md5: ad3754a495d170cb598f93f05c651adf - sha256: 089dcb5c91dab1423856147f604b389c55b3edede63e2367066d6ba27c6adef8 + md5: 22b08b8f283909afee4cfff36b79f083 + sha256: edc85562d1302c2f29005e23a71e1541b1c20647f47f9a1f690767e297f57b7b category: main optional: false - name: bleach @@ -2431,52 +2431,52 @@ package: category: main optional: false - name: boto3 - version: 1.36.11 + version: 1.36.12 manager: conda platform: linux-64 dependencies: - botocore: ">=1.36.11,<1.37.0" + botocore: ">=1.36.12,<1.37.0" jmespath: ">=0.7.1,<2.0.0" python: ">=3.9" s3transfer: ">=0.11.0,<0.12.0" - url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.36.11-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.36.12-pyhd8ed1ab_0.conda hash: - md5: 292aba4c704f45c687c36588cbbb7c91 - sha256: 87f2a93711739d0344aee7743655b23c563a2f605a331c293d311189310252a3 + md5: 1d7b8b58f9ca9b03d57edcc8798ae0ea + sha256: 870944d2d1d7f5d2a5af03a20446287417bef168b522c9a1fbd408b4e48329f4 category: main optional: false - name: boto3 - version: 1.36.11 + version: 1.36.12 manager: conda platform: osx-64 dependencies: python: ">=3.9" jmespath: ">=0.7.1,<2.0.0" s3transfer: ">=0.11.0,<0.12.0" - botocore: ">=1.36.11,<1.37.0" - url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.36.11-pyhd8ed1ab_0.conda + botocore: ">=1.36.12,<1.37.0" + url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.36.12-pyhd8ed1ab_0.conda hash: - md5: 292aba4c704f45c687c36588cbbb7c91 - sha256: 87f2a93711739d0344aee7743655b23c563a2f605a331c293d311189310252a3 + md5: 1d7b8b58f9ca9b03d57edcc8798ae0ea + sha256: 870944d2d1d7f5d2a5af03a20446287417bef168b522c9a1fbd408b4e48329f4 category: main optional: false - name: boto3 - version: 1.36.11 + version: 1.36.12 manager: conda platform: osx-arm64 dependencies: python: ">=3.9" jmespath: ">=0.7.1,<2.0.0" s3transfer: ">=0.11.0,<0.12.0" - botocore: ">=1.36.11,<1.37.0" - url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.36.11-pyhd8ed1ab_0.conda + botocore: ">=1.36.12,<1.37.0" + url: https://conda.anaconda.org/conda-forge/noarch/boto3-1.36.12-pyhd8ed1ab_0.conda hash: - md5: 292aba4c704f45c687c36588cbbb7c91 - sha256: 87f2a93711739d0344aee7743655b23c563a2f605a331c293d311189310252a3 + md5: 1d7b8b58f9ca9b03d57edcc8798ae0ea + sha256: 870944d2d1d7f5d2a5af03a20446287417bef168b522c9a1fbd408b4e48329f4 category: main optional: false - name: botocore - version: 1.36.11 + version: 1.36.12 manager: conda platform: linux-64 dependencies: @@ -2484,14 +2484,14 @@ package: python: ">=3.10" python-dateutil: ">=2.1,<3.0.0" urllib3: ">=1.25.4,!=2.2.0,<3" - url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.11-pyge310_1234567_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.12-pyge310_1234567_0.conda hash: - md5: 95f1ce38f87d17b91f19169f0f118540 - sha256: dbeee67c71e6db1d0facc4530635ff3ea905d0ac7bc48e789bdd91ccd91289bf + md5: 1fd48a6a5125e273d46cb1398da645fe + sha256: e547fbb24893229722b61123277a5f90abb7f61197623cdb04e46ee5f5cbff1b category: main optional: false - name: botocore - version: 1.36.11 + version: 1.36.12 manager: conda platform: osx-64 dependencies: @@ -2499,14 +2499,14 @@ package: python-dateutil: ">=2.1,<3.0.0" jmespath: ">=0.7.1,<2.0.0" urllib3: ">=1.25.4,!=2.2.0,<3" - url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.11-pyge310_1234567_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.12-pyge310_1234567_0.conda hash: - md5: 95f1ce38f87d17b91f19169f0f118540 - sha256: dbeee67c71e6db1d0facc4530635ff3ea905d0ac7bc48e789bdd91ccd91289bf + md5: 1fd48a6a5125e273d46cb1398da645fe + sha256: e547fbb24893229722b61123277a5f90abb7f61197623cdb04e46ee5f5cbff1b category: main optional: false - name: botocore - version: 1.36.11 + version: 1.36.12 manager: conda platform: osx-arm64 dependencies: @@ -2514,10 +2514,10 @@ package: python-dateutil: ">=2.1,<3.0.0" jmespath: ">=0.7.1,<2.0.0" urllib3: ">=1.25.4,!=2.2.0,<3" - url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.11-pyge310_1234567_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.12-pyge310_1234567_0.conda hash: - md5: 95f1ce38f87d17b91f19169f0f118540 - sha256: dbeee67c71e6db1d0facc4530635ff3ea905d0ac7bc48e789bdd91ccd91289bf + md5: 1fd48a6a5125e273d46cb1398da645fe + sha256: e547fbb24893229722b61123277a5f90abb7f61197623cdb04e46ee5f5cbff1b category: main optional: false - name: bottleneck @@ -4598,45 +4598,45 @@ package: category: main optional: false - name: databricks-sdk - version: 0.42.0 + version: 0.43.0 manager: conda platform: linux-64 dependencies: google-auth: ">=2.0,<3" python: ">=3.9" requests: ">=2.28.1,<3" - url: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.42.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.43.0-pyhd8ed1ab_0.conda hash: - md5: ab734df729200eabfb8b31e8c99e7ce9 - sha256: 8c5c2326c401f5d64e851c7e6a40419f7d59ad145909e129f5a5cf1edc45fc82 + md5: b56db2df08e71054541f172008a0ce50 + sha256: 512c1b314c51017450c8baa56b112611106bbffe3e4e08b496e2ae09c50294c8 category: main optional: false - name: databricks-sdk - version: 0.42.0 + version: 0.43.0 manager: conda platform: osx-64 dependencies: python: ">=3.9" requests: ">=2.28.1,<3" google-auth: ">=2.0,<3" - url: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.42.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.43.0-pyhd8ed1ab_0.conda hash: - md5: ab734df729200eabfb8b31e8c99e7ce9 - sha256: 8c5c2326c401f5d64e851c7e6a40419f7d59ad145909e129f5a5cf1edc45fc82 + md5: b56db2df08e71054541f172008a0ce50 + sha256: 512c1b314c51017450c8baa56b112611106bbffe3e4e08b496e2ae09c50294c8 category: main optional: false - name: databricks-sdk - version: 0.42.0 + version: 0.43.0 manager: conda platform: osx-arm64 dependencies: python: ">=3.9" requests: ">=2.28.1,<3" google-auth: ">=2.0,<3" - url: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.42.0-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.43.0-pyhd8ed1ab_0.conda hash: - md5: ab734df729200eabfb8b31e8c99e7ce9 - sha256: 8c5c2326c401f5d64e851c7e6a40419f7d59ad145909e129f5a5cf1edc45fc82 + md5: b56db2df08e71054541f172008a0ce50 + sha256: 512c1b314c51017450c8baa56b112611106bbffe3e4e08b496e2ae09c50294c8 category: main optional: false - name: datasette @@ -4688,8 +4688,8 @@ package: asgi-csrf: ">=0.9" itsdangerous: ">=1.1" pluggy: ">=1.0" - hupper: ">=1.9" platformdirs: ">=2.1.0" + hupper: ">=1.9" uvicorn: ">=0.11" flexcache: ">=0.3" aiofiles: ">=0.4" @@ -4720,8 +4720,8 @@ package: asgi-csrf: ">=0.9" itsdangerous: ">=1.1" pluggy: ">=1.0" - hupper: ">=1.9" platformdirs: ">=2.1.0" + hupper: ">=1.9" uvicorn: ">=0.11" flexcache: ">=0.3" aiofiles: ">=0.4" @@ -4901,54 +4901,6 @@ package: sha256: 9717a059677553562a8f38ff07f3b9f61727bd614f505658b0a5ecbcf8df89be category: main optional: false - - name: deltalake - version: 0.24.0 - manager: conda - platform: linux-64 - dependencies: - __glibc: ">=2.17,<3.0.a0" - libgcc: ">=13" - liblzma: ">=5.6.3,<6.0a0" - pyarrow: ">=16" - pyarrow-hotfix: "" - python: ">=3.12,<3.13.0a0" - python_abi: 3.12.* - url: https://conda.anaconda.org/conda-forge/linux-64/deltalake-0.24.0-py312h07cb367_0.conda - hash: - md5: 8c0b7066e75926407bbb0307bc63e2b5 - sha256: 7ab302d46564e707ecdaf09d06239cb46dcb1dedd55247885f68f551fd62ae05 - category: main - optional: false - - name: deltalake - version: 0.24.0 - manager: conda - platform: osx-64 - dependencies: - pyarrow: ">=16" - pyarrow-hotfix: "" - python: ">=3.12,<3.13.0a0" - python_abi: 3.12.* - url: https://conda.anaconda.org/conda-forge/osx-64/deltalake-0.24.0-py312h16aec2d_0.conda - hash: - md5: 2cbeb7bb52d6f593c5e91721d052406b - sha256: 185fc067ace39a524e599619ef51ccbc1d3bb737ae4ed574059f2e911dc5524a - category: main - optional: false - - name: deltalake - version: 0.24.0 - manager: conda - platform: osx-arm64 - dependencies: - pyarrow: ">=16" - pyarrow-hotfix: "" - python: ">=3.12,<3.13.0a0" - python_abi: 3.12.* - url: https://conda.anaconda.org/conda-forge/osx-arm64/deltalake-0.24.0-py312h30a1e44_0.conda - hash: - md5: 449b7fd8a970fc53ff23249963f3949c - sha256: 38135afb692f17aaf14e026144bfaaa2ae0eee019678b66724439540d3452b8e - category: main - optional: false - name: deprecated version: 1.2.18 manager: conda @@ -7788,10 +7740,10 @@ package: libwebp-base: ">=1.5.0,<2.0a0" libzlib: ">=1.3.1,<2.0a0" pango: ">=1.56.1,<2.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/graphviz-12.2.1-h618ab57_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/graphviz-12.2.1-h5ae0cbf_1.conda hash: - md5: 65e5005b6c1e9d76164c9713de88f405 - sha256: e8f85722edb298d4015aba160fb606713272a0770db7be755442f20606a94dfd + md5: df7835d2c73cd1889d377cfd6694ada4 + sha256: e6866409ba03df392ac5ec6f0d6ff9751a685ed917bfbcd8a73f550c5fe83c2b category: dev optional: true - name: graphviz @@ -7814,10 +7766,10 @@ package: libwebp-base: ">=1.5.0,<2.0a0" libzlib: ">=1.3.1,<2.0a0" pango: ">=1.56.1,<2.0a0" - url: https://conda.anaconda.org/conda-forge/osx-64/graphviz-12.2.1-h22ac8cf_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/graphviz-12.2.1-h44a0556_1.conda hash: - md5: addb0f319e20eff7b3415bd8e02ce641 - sha256: 48566e839c9d03b6817fac0b66c2247042dbecaca1d58968882efa439c102cfe + md5: f1e519616cb1c137cff9849cfa1beb93 + sha256: 3a8eef238000e8fcb8f4f31a035869d7b5ad0466f69c72e9064786b54d1812cc category: dev optional: true - name: graphviz @@ -7840,10 +7792,10 @@ package: libwebp-base: ">=1.5.0,<2.0a0" libzlib: ">=1.3.1,<2.0a0" pango: ">=1.56.1,<2.0a0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/graphviz-12.2.1-hb6653a1_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/graphviz-12.2.1-hff64154_1.conda hash: - md5: 20d322c337695b484e8bad8e69737579 - sha256: 35c5a300f2f958727c0cbd9e9ca5d973e4539c02e80fff3e130ee0bf628e00a3 + md5: b0b656550a16dfba7efa1479756c5b63 + sha256: 54e3ce5668b17ea41fed515e57fbd9e805969df468eaf7ff65389d7f53b46d54 category: dev optional: true - name: greenlet @@ -16820,7 +16772,7 @@ package: category: dev optional: true - name: paramiko - version: 3.5.0 + version: 3.5.1 manager: conda platform: linux-64 dependencies: @@ -16828,14 +16780,14 @@ package: cryptography: ">=3.3" pynacl: ">=1.5" python: ">=3.9" - url: https://conda.anaconda.org/conda-forge/noarch/paramiko-3.5.0-pyhd8ed1ab_1.conda + url: https://conda.anaconda.org/conda-forge/noarch/paramiko-3.5.1-pyhd8ed1ab_0.conda hash: - md5: 92e18207b16a4e4790cdcb4e0bcdad60 - sha256: b5c2c348ec7ae4ac57422d3499fe611c05b63311d396713ba9125820bf305163 + md5: 4e6bea7eee94bb9d8a599385215719f9 + sha256: 1499e558d31536707fdd7d0b569dbe29ae6e3aa8f2fdce9ea6f3df3ce4c1aaf1 category: main optional: false - name: paramiko - version: 3.5.0 + version: 3.5.1 manager: conda platform: osx-64 dependencies: @@ -16843,14 +16795,14 @@ package: cryptography: ">=3.3" bcrypt: ">=3.2" pynacl: ">=1.5" - url: https://conda.anaconda.org/conda-forge/noarch/paramiko-3.5.0-pyhd8ed1ab_1.conda + url: https://conda.anaconda.org/conda-forge/noarch/paramiko-3.5.1-pyhd8ed1ab_0.conda hash: - md5: 92e18207b16a4e4790cdcb4e0bcdad60 - sha256: b5c2c348ec7ae4ac57422d3499fe611c05b63311d396713ba9125820bf305163 + md5: 4e6bea7eee94bb9d8a599385215719f9 + sha256: 1499e558d31536707fdd7d0b569dbe29ae6e3aa8f2fdce9ea6f3df3ce4c1aaf1 category: main optional: false - name: paramiko - version: 3.5.0 + version: 3.5.1 manager: conda platform: osx-arm64 dependencies: @@ -16858,10 +16810,10 @@ package: cryptography: ">=3.3" bcrypt: ">=3.2" pynacl: ">=1.5" - url: https://conda.anaconda.org/conda-forge/noarch/paramiko-3.5.0-pyhd8ed1ab_1.conda + url: https://conda.anaconda.org/conda-forge/noarch/paramiko-3.5.1-pyhd8ed1ab_0.conda hash: - md5: 92e18207b16a4e4790cdcb4e0bcdad60 - sha256: b5c2c348ec7ae4ac57422d3499fe611c05b63311d396713ba9125820bf305163 + md5: 4e6bea7eee94bb9d8a599385215719f9 + sha256: 1499e558d31536707fdd7d0b569dbe29ae6e3aa8f2fdce9ea6f3df3ce4c1aaf1 category: main optional: false - name: parso @@ -18232,45 +18184,6 @@ package: sha256: 063eb168a29d4ce6d9ed865e9e1ad3b6e141712189955a79e06b24ddc0cbbc9c category: main optional: false - - name: pyarrow-hotfix - version: "0.6" - manager: conda - platform: linux-64 - dependencies: - pyarrow: ">=0.14" - python: ">=3.9" - url: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_1.conda - hash: - md5: 49c3b8c3b2578f35a7034f75f30d0041 - sha256: 9ff4e520cff831d34adcf8d791f735972d804572f223ad21b9652ad0886968a6 - category: main - optional: false - - name: pyarrow-hotfix - version: "0.6" - manager: conda - platform: osx-64 - dependencies: - python: ">=3.9" - pyarrow: ">=0.14" - url: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_1.conda - hash: - md5: 49c3b8c3b2578f35a7034f75f30d0041 - sha256: 9ff4e520cff831d34adcf8d791f735972d804572f223ad21b9652ad0886968a6 - category: main - optional: false - - name: pyarrow-hotfix - version: "0.6" - manager: conda - platform: osx-arm64 - dependencies: - python: ">=3.9" - pyarrow: ">=0.14" - url: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_1.conda - hash: - md5: 49c3b8c3b2578f35a7034f75f30d0041 - sha256: 9ff4e520cff831d34adcf8d791f735972d804572f223ad21b9652ad0886968a6 - category: main - optional: false - name: pyasn1 version: 0.6.1 manager: conda @@ -24940,17 +24853,17 @@ package: category: dev optional: true - name: xorg-libx11 - version: 1.8.10 + version: 1.8.11 manager: conda platform: linux-64 dependencies: __glibc: ">=2.17,<3.0.a0" libgcc: ">=13" libxcb: ">=1.17.0,<2.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.10-h4f16b4b_1.conda + url: https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.11-h4f16b4b_0.conda hash: - md5: 125f34a17d7b4bea418a83904ea82ea6 - sha256: f53994d54f0604df881c4e984279b3cf6a1648a22d4b2113e2c89829968784c9 + md5: b6eb6d0cb323179af168df8fe16fb0a1 + sha256: a0e7fca9e341dc2455b20cd320fc1655e011f7f5f28367ecf8617cccd4bb2821 category: dev optional: true - name: xorg-libxau diff --git a/environments/conda-osx-64.lock.yml b/environments/conda-osx-64.lock.yml index 6b0692761..b8ae76f6f 100644 --- a/environments/conda-osx-64.lock.yml +++ b/environments/conda-osx-64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 0e66c2c5134fd7dcc98c7ea75aa4c0a3b06f62bf45fbb2ede2f6c4252e8b4e3c +# input_hash: 3ba3060f95f7cca7b0b57dcc26b27cf7362f666479bfecb76c998d5ed4b74e2f channels: - conda-forge @@ -20,7 +20,7 @@ dependencies: - aom=3.9.1=hf036a51_0 - appdirs=1.4.4=pyhd8ed1ab_1 - appnope=0.1.4=pyhd8ed1ab_1 - - arelle-release=2.36.7=pyhd8ed1ab_0 + - arelle-release=2.36.9=pyhd8ed1ab_0 - argon2-cffi=23.1.0=pyhd8ed1ab_1 - argon2-cffi-bindings=21.2.0=py312hb553811_5 - arrow=1.3.0=pyhd8ed1ab_1 @@ -54,13 +54,13 @@ dependencies: - backports=1.0=pyhd8ed1ab_5 - backports.tarfile=1.2.0=pyhd8ed1ab_1 - bcrypt=4.2.1=py312h0d0de52_0 - - beautifulsoup4=4.13.0=pyha770c72_0 + - beautifulsoup4=4.13.2=pyha770c72_0 - bleach=6.2.0=pyh29332c3_4 - bleach-with-css=6.2.0=h82add2a_4 - blinker=1.9.0=pyhff2d567_0 - blosc=1.21.6=hd145fbb_1 - - boto3=1.36.11=pyhd8ed1ab_0 - - botocore=1.36.11=pyge310_1234567_0 + - boto3=1.36.12=pyhd8ed1ab_0 + - botocore=1.36.12=pyge310_1234567_0 - bottleneck=1.4.2=py312h59f7578_0 - branca=0.8.1=pyhd8ed1ab_0 - brotli=1.1.0=h00291cd_2 @@ -106,13 +106,12 @@ dependencies: - dagster-webserver=1.9.11=pyhc5d1ea7_0 - dask-core=2025.1.0=pyhd8ed1ab_0 - dask-expr=2.0.0=pyhd8ed1ab_0 - - databricks-sdk=0.42.0=pyhd8ed1ab_0 + - databricks-sdk=0.43.0=pyhd8ed1ab_0 - datasette=0.65.1=pyhd8ed1ab_0 - dav1d=1.2.1=h0dc2134_0 - debugpy=1.8.12=py312haafddd8_0 - decorator=5.1.1=pyhd8ed1ab_1 - defusedxml=0.7.1=pyhd8ed1ab_0 - - deltalake=0.24.0=py312h16aec2d_0 - deprecated=1.2.18=pyhd8ed1ab_0 - distlib=0.3.9=pyhd8ed1ab_1 - dnspython=2.7.0=pyhff2d567_1 @@ -177,7 +176,7 @@ dependencies: - graphite2=1.3.13=h73e2aa4_1003 - graphql-core=3.2.6=pyh29332c3_0 - graphql-relay=3.2.0=pyhd8ed1ab_1 - - graphviz=12.2.1=h22ac8cf_0 + - graphviz=12.2.1=h44a0556_1 - greenlet=3.1.1=py312haafddd8_1 - grpcio=1.67.1=py312h145213c_1 - grpcio-health-checking=1.67.1=pyhd8ed1ab_1 @@ -370,7 +369,7 @@ dependencies: - pandoc=3.6.2=h694c41f_0 - pandocfilters=1.5.0=pyhd8ed1ab_0 - pango=1.56.1=hf94f63b_0 - - paramiko=3.5.0=pyhd8ed1ab_1 + - paramiko=3.5.1=pyhd8ed1ab_0 - parso=0.8.4=pyhd8ed1ab_1 - partd=1.4.2=pyhd8ed1ab_0 - pastel=0.2.1=pyhd8ed1ab_0 @@ -404,7 +403,6 @@ dependencies: - pure_eval=0.2.3=pyhd8ed1ab_1 - pyarrow=18.1.0=py312hb401068_0 - pyarrow-core=18.1.0=py312h5157fe3_0_cpu - - pyarrow-hotfix=0.6=pyhd8ed1ab_1 - pyasn1=0.6.1=pyhd8ed1ab_2 - pyasn1-modules=0.4.1=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_3 diff --git a/environments/conda-osx-arm64.lock.yml b/environments/conda-osx-arm64.lock.yml index d9f0a32e4..c1ae2bede 100644 --- a/environments/conda-osx-arm64.lock.yml +++ b/environments/conda-osx-arm64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-arm64 -# input_hash: fedcc3f67e42150aed97e57c4e65edd4dfd8085c958ca43b4a841e6c2bc98245 +# input_hash: 73cdf399b1dae90c8f5b420504c02e42883d416d1790e0d8390943326d33eea8 channels: - conda-forge @@ -20,7 +20,7 @@ dependencies: - aom=3.9.1=h7bae524_0 - appdirs=1.4.4=pyhd8ed1ab_1 - appnope=0.1.4=pyhd8ed1ab_1 - - arelle-release=2.36.7=pyhd8ed1ab_0 + - arelle-release=2.36.9=pyhd8ed1ab_0 - argon2-cffi=23.1.0=pyhd8ed1ab_1 - argon2-cffi-bindings=21.2.0=py312h024a12e_5 - arrow=1.3.0=pyhd8ed1ab_1 @@ -54,13 +54,13 @@ dependencies: - backports=1.0=pyhd8ed1ab_5 - backports.tarfile=1.2.0=pyhd8ed1ab_1 - bcrypt=4.2.1=py312hcd83bfe_0 - - beautifulsoup4=4.13.0=pyha770c72_0 + - beautifulsoup4=4.13.2=pyha770c72_0 - bleach=6.2.0=pyh29332c3_4 - bleach-with-css=6.2.0=h82add2a_4 - blinker=1.9.0=pyhff2d567_0 - blosc=1.21.6=h7dd00d9_1 - - boto3=1.36.11=pyhd8ed1ab_0 - - botocore=1.36.11=pyge310_1234567_0 + - boto3=1.36.12=pyhd8ed1ab_0 + - botocore=1.36.12=pyge310_1234567_0 - bottleneck=1.4.2=py312h147345f_0 - branca=0.8.1=pyhd8ed1ab_0 - brotli=1.1.0=hd74edd7_2 @@ -106,13 +106,12 @@ dependencies: - dagster-webserver=1.9.11=pyhc5d1ea7_0 - dask-core=2025.1.0=pyhd8ed1ab_0 - dask-expr=2.0.0=pyhd8ed1ab_0 - - databricks-sdk=0.42.0=pyhd8ed1ab_0 + - databricks-sdk=0.43.0=pyhd8ed1ab_0 - datasette=0.65.1=pyhd8ed1ab_0 - dav1d=1.2.1=hb547adb_0 - debugpy=1.8.12=py312hd8f9ff3_0 - decorator=5.1.1=pyhd8ed1ab_1 - defusedxml=0.7.1=pyhd8ed1ab_0 - - deltalake=0.24.0=py312h30a1e44_0 - deprecated=1.2.18=pyhd8ed1ab_0 - distlib=0.3.9=pyhd8ed1ab_1 - dnspython=2.7.0=pyhff2d567_1 @@ -177,7 +176,7 @@ dependencies: - graphite2=1.3.13=hebf3989_1003 - graphql-core=3.2.6=pyh29332c3_0 - graphql-relay=3.2.0=pyhd8ed1ab_1 - - graphviz=12.2.1=hb6653a1_0 + - graphviz=12.2.1=hff64154_1 - greenlet=3.1.1=py312hd8f9ff3_1 - grpcio=1.67.1=py312he4e58e5_1 - grpcio-health-checking=1.67.1=pyhd8ed1ab_1 @@ -370,7 +369,7 @@ dependencies: - pandoc=3.6.2=hce30654_0 - pandocfilters=1.5.0=pyhd8ed1ab_0 - pango=1.56.1=h73f1e88_0 - - paramiko=3.5.0=pyhd8ed1ab_1 + - paramiko=3.5.1=pyhd8ed1ab_0 - parso=0.8.4=pyhd8ed1ab_1 - partd=1.4.2=pyhd8ed1ab_0 - pastel=0.2.1=pyhd8ed1ab_0 @@ -404,7 +403,6 @@ dependencies: - pure_eval=0.2.3=pyhd8ed1ab_1 - pyarrow=18.1.0=py312h1f38498_0 - pyarrow-core=18.1.0=py312hc40f475_0_cpu - - pyarrow-hotfix=0.6=pyhd8ed1ab_1 - pyasn1=0.6.1=pyhd8ed1ab_2 - pyasn1-modules=0.4.1=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_3 diff --git a/migrations/versions/1189596f92ed_add_sec10k_tables.py b/migrations/versions/1189596f92ed_add_sec10k_tables.py new file mode 100644 index 000000000..aca027b37 --- /dev/null +++ b/migrations/versions/1189596f92ed_add_sec10k_tables.py @@ -0,0 +1,50 @@ +"""Add sec10k tables + +Revision ID: 1189596f92ed +Revises: 1d963598bf21 +Create Date: 2025-02-04 16:06:16.870305 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '1189596f92ed' +down_revision = '1d963598bf21' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('core_sec10k__exhibit_21_company_ownership', schema=None) as batch_op: + batch_op.add_column(sa.Column('subsidiary_company_location', sa.Text(), nullable=True, comment='Location of subsidiary company.')) + batch_op.drop_column('subsidiary_location') + + with op.batch_alter_table('core_sec10k__filings', schema=None) as batch_op: + batch_op.add_column(sa.Column('filing_date', sa.Date(), nullable=True, comment='Date filing was submitted.')) + batch_op.drop_column('date_filed') + + with op.batch_alter_table('out_sec10k__parents_and_subsidiaries', schema=None) as batch_op: + batch_op.add_column(sa.Column('name_change_date', sa.Date(), nullable=True, comment='Date of last name change of the company.')) + batch_op.drop_column('date_of_name_change') + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('out_sec10k__parents_and_subsidiaries', schema=None) as batch_op: + batch_op.add_column(sa.Column('date_of_name_change', sa.DATE(), nullable=True)) + batch_op.drop_column('name_change_date') + + with op.batch_alter_table('core_sec10k__filings', schema=None) as batch_op: + batch_op.add_column(sa.Column('date_filed', sa.DATE(), nullable=True)) + batch_op.drop_column('filing_date') + + with op.batch_alter_table('core_sec10k__exhibit_21_company_ownership', schema=None) as batch_op: + batch_op.add_column(sa.Column('subsidiary_location', sa.TEXT(), nullable=True)) + batch_op.drop_column('subsidiary_company_location') + + # ### end Alembic commands ### diff --git a/migrations/versions/1d963598bf21_.py b/migrations/versions/1d963598bf21_.py new file mode 100644 index 000000000..ead143a88 --- /dev/null +++ b/migrations/versions/1d963598bf21_.py @@ -0,0 +1,24 @@ +"""empty message + +Revision ID: 1d963598bf21 +Revises: 3b65c445d4b4, 4d7466b7f5c1 +Create Date: 2025-02-04 15:47:02.383902 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '1d963598bf21' +down_revision = ('3b65c445d4b4', '4d7466b7f5c1') +branch_labels = None +depends_on = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass diff --git a/migrations/versions/4d7466b7f5c1_add_sec10k_tables.py b/migrations/versions/4d7466b7f5c1_add_sec10k_tables.py new file mode 100644 index 000000000..fd28abd16 --- /dev/null +++ b/migrations/versions/4d7466b7f5c1_add_sec10k_tables.py @@ -0,0 +1,80 @@ +"""Add sec10k tables + +Revision ID: 4d7466b7f5c1 +Revises: 450d100cd30b +Create Date: 2025-02-04 15:34:53.060422 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '4d7466b7f5c1' +down_revision = '450d100cd30b' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('core_sec10k__company_information', + sa.Column('filename_sec10k', sa.Text(), nullable=False, comment='Name of filing as provided by SEC data portal.'), + sa.Column('filer_count', sa.Integer(), nullable=False, comment='Index company information as some filings contain information for multiple companies.'), + sa.Column('company_information_block', sa.Text(), nullable=False, comment='Title of block of data.'), + sa.Column('company_information_block_count', sa.Integer(), nullable=False, comment='Some blocks are repeated, this defines the index of the data block.'), + sa.Column('company_information_fact_name', sa.Text(), nullable=False, comment='Name of fact within a ``company_information_block``.'), + sa.Column('company_information_fact_value', sa.Text(), nullable=False, comment='Value corresponding with ``company_information_fact_name``.'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.PrimaryKeyConstraint('filename_sec10k', 'filer_count', 'company_information_block', 'company_information_block_count', 'company_information_fact_name', 'company_information_fact_value', name=op.f('pk_core_sec10k__company_information')) + ) + op.create_table('core_sec10k__exhibit_21_company_ownership', + sa.Column('filename_sec10k', sa.Text(), nullable=True, comment='Name of filing as provided by SEC data portal.'), + sa.Column('subsidiary_company_name', sa.Text(), nullable=True, comment='Name of subsidiary company.'), + sa.Column('subsidiary_location', sa.Text(), nullable=True, comment='Location of subsidiary company.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Fraction of subsidiary company owned by parent.'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.') + ) + op.create_table('core_sec10k__filings', + sa.Column('filename_sec10k', sa.Text(), nullable=False, comment='Name of filing as provided by SEC data portal.'), + sa.Column('central_index_key', sa.Text(), nullable=True, comment='Identifier of the company in SEC database.'), + sa.Column('company_name', sa.Text(), nullable=True, comment='Name of company submitting SEC 10k filing.'), + sa.Column('sec10k_version', sa.Text(), nullable=True, comment='Specific version of SEC 10k filed.'), + sa.Column('date_filed', sa.Date(), nullable=True, comment='Date filing was submitted.'), + sa.Column('exhibit_21_version', sa.Text(), nullable=True, comment='Version of exhibit 21 submitted (if applicable).'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.PrimaryKeyConstraint('filename_sec10k', name=op.f('pk_core_sec10k__filings')) + ) + op.create_table('out_sec10k__parents_and_subsidiaries', + sa.Column('company_id_sec', sa.Text(), nullable=True, comment="Algorithmically assigned ID for companies that file SEC 10k's or are referenced in exhibit 21 attachments to 10k's. May not be stable over time."), + sa.Column('filename_sec10k', sa.Text(), nullable=True, comment='Name of filing as provided by SEC data portal.'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.Column('central_index_key', sa.Text(), nullable=True, comment='Identifier of the company in SEC database.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('street_address', sa.Text(), nullable=True, comment='Physical street address.'), + sa.Column('address_2', sa.Text(), nullable=True, comment='Second line of the address.'), + sa.Column('city', sa.Text(), nullable=True, comment='Name of the city.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('company_name_raw', sa.Text(), nullable=True, comment='Uncleaned name of company.'), + sa.Column('date_of_name_change', sa.Date(), nullable=True, comment='Date of last name change of the company.'), + sa.Column('company_name_former', sa.Text(), nullable=True, comment='Former name of company.'), + sa.Column('industry_description_sic', sa.Text(), nullable=True, comment='Text description of Standard Industrial Classification (SIC)'), + sa.Column('industry_id_sic', sa.Text(), nullable=True, comment="Four-digit Standard Industrial Classification (SIC) code identifying the company's primary industry. SIC codes have been replaced by NAICS codes in many applications, but are still used by the SEC."), + sa.Column('state_of_incorporation', sa.Text(), nullable=True, comment='Two letter state code where company is incorporated.'), + sa.Column('location_of_incorporation', sa.Text(), nullable=True, comment='Cleaned location of incorporation of the company.'), + sa.Column('company_id_irs', sa.Text(), nullable=True, comment='ID of the company with the IRS.'), + sa.Column('files_sec10k', sa.Boolean(), nullable=True, comment='Indicates whether the company files an SEC 10-K.'), + sa.Column('parent_company_central_index_key', sa.Text(), nullable=True, comment="Central index key (CIK) of the company's parent company."), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Fraction of subsidiary company owned by parent.'), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['core_eia860__scd_utilities.utility_id_eia', 'core_eia860__scd_utilities.report_date'], name=op.f('fk_out_sec10k__parents_and_subsidiaries_utility_id_eia_core_eia860__scd_utilities')) + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('out_sec10k__parents_and_subsidiaries') + op.drop_table('core_sec10k__filings') + op.drop_table('core_sec10k__exhibit_21_company_ownership') + op.drop_table('core_sec10k__company_information') + # ### end Alembic commands ### diff --git a/migrations/versions/ac67e04d1383_use_sec10k_column_suffix.py b/migrations/versions/ac67e04d1383_use_sec10k_column_suffix.py new file mode 100644 index 000000000..7a8ddf9e9 --- /dev/null +++ b/migrations/versions/ac67e04d1383_use_sec10k_column_suffix.py @@ -0,0 +1,34 @@ +"""use sec10k column suffix + +Revision ID: ac67e04d1383 +Revises: 1189596f92ed +Create Date: 2025-02-04 16:52:10.950619 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'ac67e04d1383' +down_revision = '1189596f92ed' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('out_sec10k__parents_and_subsidiaries', schema=None) as batch_op: + batch_op.add_column(sa.Column('company_id_sec10k', sa.Text(), nullable=True, comment='PUDL-assigned ID for companies that file SEC Form 10-K or are referenced in exhibit 21 attachments to Form 10-K. May not be stable over time.')) + batch_op.drop_column('company_id_sec') + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('out_sec10k__parents_and_subsidiaries', schema=None) as batch_op: + batch_op.add_column(sa.Column('company_id_sec', sa.TEXT(), nullable=True)) + batch_op.drop_column('company_id_sec10k') + + # ### end Alembic commands ### diff --git a/pyproject.toml b/pyproject.toml index 04d892224..b865ee753 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ dependencies = [ "dask>=2025", "dask-expr>=2", # Required for dask[dataframe] "datasette>=0.65", - "deltalake>=0.24,<1", "doc8>=1.1", "duckdb>=1.1.3", "email-validator>=1.0.3", # pydantic[email] diff --git a/src/pudl/analysis/__init__.py b/src/pudl/analysis/__init__.py index c1dcf9840..f8e7cdd82 100644 --- a/src/pudl/analysis/__init__.py +++ b/src/pudl/analysis/__init__.py @@ -12,6 +12,7 @@ fuel_by_plant, mcoe, plant_parts_eia, + pudl_models, record_linkage, service_territory, spatial, diff --git a/src/pudl/analysis/pudl_models.py b/src/pudl/analysis/pudl_models.py index 117fcc5d3..314af1124 100644 --- a/src/pudl/analysis/pudl_models.py +++ b/src/pudl/analysis/pudl_models.py @@ -1,55 +1,124 @@ """Implement utilities for working with data produced in the pudl modelling repo.""" -import os - import pandas as pd -import pyarrow as pa -from dagster import AssetsDefinition, asset -from deltalake import DeltaTable - - -def get_model_tables() -> list[str]: - """Return all tables produced by PUDL models or empty list if env variable not set.""" - pudl_models_tables = [] - if os.getenv("USE_PUDL_MODELS"): - pudl_models_tables = [ - "core_sec10k__company_information", - "core_sec10k__exhibit_21_company_ownership", - "core_sec10k__filings", - "out_sec10k__parents_and_subsidiaries", - ] +from dagster import asset + + +def _load_table_from_gcs(table_name: str) -> pd.DataFrame: + return pd.read_parquet(f"gs://model-outputs.catalyst.coop/sec10k/{table_name}") + + +def _compute_fraction_owned(percent_ownership: pd.Series) -> pd.Series: + """Clean percent ownership, convert to float, then convert percent to ratio.""" + return ( + percent_ownership.str.replace(r"(\.{2,})", r"\.", regex=True) + .replace("\\\\", "", regex=True) + .replace(".", "0.0", regex=False) + .astype("float") + ) / 100.0 + + +def _year_quarter_to_date(year_quarter: pd.Series) -> pd.Series: + """Convert a year quarter in the format '2024q1' to date type.""" + return pd.PeriodIndex(year_quarter, freq="Q").to_timestamp() + + +@asset( + io_manager_key="pudl_io_manager", + group_name="pudl_models", +) +def core_sec10k__company_information() -> pd.DataFrame: + """Basic company information extracted from SEC10k filings.""" + df = _load_table_from_gcs("core_sec10k__company_information") + df = df.rename( + columns={ + "sec10k_filename": "filename_sec10k", + "block": "company_information_block", + "block_count": "company_information_block_count", + "key": "company_information_fact_name", + "value": "company_information_fact_value", + } + ) - return pudl_models_tables + # Get date from year quarters + df["report_date"] = _year_quarter_to_date(df.year_quarter) + + return df + + +@asset( + io_manager_key="pudl_io_manager", + group_name="pudl_models", +) +def core_sec10k__exhibit_21_company_ownership() -> pd.DataFrame: + """Company ownership information extracted from sec10k exhibit 21 attachments.""" + df = _load_table_from_gcs("core_sec10k__exhibit_21_company_ownership") + df = df.rename( + columns={ + "sec10k_filename": "filename_sec10k", + "subsidiary": "subsidiary_company_name", + "location": "subsidiary_company_location", + } + ) + # Convert ownership percentage + df["fraction_owned"] = _compute_fraction_owned(df.ownership_percentage) -def _get_table_uri(table_name: str) -> str: - return f"gs://model-outputs.catalyst.coop/sec10k/{table_name}" + # Get date from year quarters + df["report_date"] = _year_quarter_to_date(df.year_quarter) + return df -def pudl_models_asset_factory(table_name: str) -> AssetsDefinition: - """Factory function to create assets which will load pudl models tables.""" - @asset( - name=table_name, - io_manager_key="parquet_io_manager", - group_name="pudl_models", +@asset( + io_manager_key="pudl_io_manager", + group_name="pudl_models", +) +def core_sec10k__filings() -> pd.DataFrame: + """Metadata on all 10k filings submitted to SEC.""" + df = _load_table_from_gcs("core_sec10k__filings") + df = df.rename( + columns={ + "sec10k_filename": "filename_sec10k", + "form_type": "sec10k_version", + "date_filed": "filing_date", + } ) - def _asset() -> pd.DataFrame: - return DeltaTable(_get_table_uri(table_name)).to_pandas() - - return _asset + # Get date from year quarters + df["report_date"] = _year_quarter_to_date(df.year_quarter) + + return df + + +@asset( + io_manager_key="pudl_io_manager", + group_name="pudl_models", +) +def out_sec10k__parents_and_subsidiaries() -> pd.DataFrame: + """Denormalized output table with sec10k info and company ownership linked to EIA.""" + df = _load_table_from_gcs("out_sec10k__parents_and_subsidiaries") + df = df.rename( + columns={ + "sec10k_filename": "filename_sec10k", + "sec_company_id": "company_id_sec10k", + "street_address_2": "address_2", + "former_conformed_name": "company_name_former", + "location_of_inc": "location_of_incorporation", + "irs_number": "company_id_irs", + "parent_company_cik": "parent_company_central_index_key", + "files_10k": "files_sec10k", + "date_of_name_change": "name_change_date", + } + ) -def get_pudl_models_assets() -> list[AssetsDefinition]: - """Generate a collection of assets for all PUDL model tables.""" - return [pudl_models_asset_factory(table) for table in get_model_tables()] - + # Convert ownership percentage + df["fraction_owned"] = _compute_fraction_owned(df.ownership_percentage) -def get_model_table_schemas() -> list[str, str, pa.Schema]: - """Return pyarrow schemas for all PUDL models tables.""" - dts = [DeltaTable(_get_table_uri(table_name)) for table_name in get_model_tables()] + # Split standard industrial classification into ID and description columns + df[["industry_description_sic", "industry_id_sic"]] = df[ + "standard_industrial_classification" + ].str.extract(r"(.+)\[(\d{4})\]") + df["industry_id_sic"] = df["industry_id_sic"].astype("string") - return [ - (dt.metadata().name, dt.metadata().description, dt.schema().to_pyarrow()) - for dt in dts - ] + return df diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index 4b230b77d..aec2aed26 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -2,6 +2,7 @@ import importlib.resources import itertools +import os import warnings import pandera as pr @@ -21,7 +22,6 @@ from dagster._core.definitions.cacheable_assets import CacheableAssetsDefinition import pudl -from pudl.analysis.pudl_models import get_pudl_models_assets from pudl.io_managers import ( epacems_io_manager, ferc1_dbf_sqlite_io_manager, @@ -108,19 +108,19 @@ } all_asset_modules = raw_module_groups | core_module_groups | out_module_groups -default_assets = ( - list( - itertools.chain.from_iterable( - load_assets_from_modules( - modules, - group_name=group_name, - ) - for group_name, modules in all_asset_modules.items() +default_assets = list( + itertools.chain.from_iterable( + load_assets_from_modules( + modules, + group_name=group_name, ) + for group_name, modules in all_asset_modules.items() ) - + get_pudl_models_assets() ) +if os.getenv("USE_PUDL_MODELS"): + default_assets += load_assets_from_modules([pudl.analysis.pudl_models]) + default_asset_checks = list( itertools.chain.from_iterable( load_asset_checks_from_modules( diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 266ff747b..3ffca3fe2 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -25,7 +25,6 @@ from upath import UPath import pudl -from pudl.analysis.pudl_models import get_model_tables from pudl.metadata.classes import PUDL_PACKAGE, Package, Resource from pudl.workspace.setup import PudlPaths @@ -322,22 +321,13 @@ def load_input(self, context: InputContext) -> pd.DataFrame: class PudlParquetIOManager(IOManager): """IOManager that writes pudl tables to pyarrow parquet files.""" - def _get_table_resource(self, table_name: str) -> Resource: - """Return resource class for table.""" - if table_name not in get_model_tables(): - res = Resource.from_id(table_name) - else: - # For tables coming from PUDL modelling repo just use already parsed resource metadata - [res] = [r for r in PUDL_PACKAGE.resources if r.name == table_name] - return res - def handle_output(self, context: OutputContext, df: Any) -> None: """Writes pudl dataframe to parquet file.""" assert isinstance(df, pd.DataFrame), "Only panda dataframes are supported." table_name = get_table_name_from_context(context) parquet_path = PudlPaths().parquet_path(table_name) parquet_path.parent.mkdir(parents=True, exist_ok=True) - res = self._get_table_resource(table_name) + res = Resource.from_id(table_name) df = res.enforce_schema(df) schema = res.to_pyarrow() @@ -355,7 +345,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: """Loads pudl table from parquet file.""" table_name = get_table_name_from_context(context) parquet_path = PudlPaths().parquet_path(table_name) - res = self._get_table_resource(table_name) + res = Resource.from_id(table_name) df = pq.read_table(source=parquet_path, schema=res.to_pyarrow()).to_pandas() return res.enforce_schema(df) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 62ec0b214..2c06243a8 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -36,7 +36,6 @@ ) import pudl.logging_helpers -from pudl.analysis.pudl_models import get_model_table_schemas from pudl.metadata.codes import CODE_METADATA from pudl.metadata.constants import ( CONSTRAINT_DTYPES, @@ -573,24 +572,6 @@ class Field(PudlMeta): harvest: FieldHarvest = FieldHarvest() encoder: Encoder | None = None - @classmethod - def from_pyarrow_field(cls, field: pa.Field) -> "Field": - """Construct from pyarrow field.""" - # Reverse map from frictionless -> pyarrow to pyarrow -> frictionless - type_map = { - value: key for value, key in FIELD_DTYPES_PYARROW.items() if key != "year" - } | { - pa.bool8(): "boolean", - pa.int32(): "integer", - pa.int64(): "integer", - pa.date32(): "date", - } - return cls( - name=field.name, - type=type_map[field.type], - description=field.metadata[b"description"].decode(), - ) - @field_validator("constraints") @classmethod def _check_constraints(cls, value, info: ValidationInfo): # noqa: C901 @@ -812,15 +793,6 @@ class Schema(PudlMeta): "missing_values", "primary_key", "foreign_keys", fn=_check_unique ) - @classmethod - def from_pyarrow_schema(cls, schema: pa.Schema) -> "Schema": - """Construct from a pyarrow schema.""" - return cls( - fields=[ - Field.from_pyarrow_field(schema.field(name)) for name in schema.names - ] - ) - @field_validator("fields") @classmethod def _check_field_names_unique(cls, fields: list[Field]): @@ -1314,6 +1286,7 @@ class Resource(PudlMeta): "pudl", "nrelatb", "vcerare", + "sec10k", ] | None ) = None @@ -1342,6 +1315,7 @@ class Resource(PudlMeta): "service_territories", "nrelatb", "vcerare", + "pudl_models", ] | None ) = None @@ -1477,18 +1451,6 @@ def from_id(cls, x: str) -> "Resource": """Construct from PUDL identifier (`resource.name`).""" return cls(**cls.dict_from_id(x)) - @classmethod - def from_pyarrow_schema( - cls, name: str, description: str, schema: pa.Schema - ) -> "Resource": - """Construct from a pyarrow schema.""" - return cls( - name=name, - description=description, - schema=Schema.from_pyarrow_schema(schema), - create_database_schema=False, - ) - def get_field(self, name: str) -> Field: """Return field with the given name if it's part of the Resources.""" names = [field.name for field in self.schema.fields] @@ -2015,12 +1977,6 @@ def from_resource_ids( if len(names) > i: resources += [Resource.dict_from_id(x) for x in names[i:]] - resources += [ - Resource.from_pyarrow_schema(name, description, schema).model_dump( - by_alias=True - ) - for name, description, schema in get_model_table_schemas() - ] if excluded_etl_groups: resources = [ resource diff --git a/src/pudl/metadata/fields.py b/src/pudl/metadata/fields.py index 7547cc810..436990a4c 100644 --- a/src/pudl/metadata/fields.py +++ b/src/pudl/metadata/fields.py @@ -675,6 +675,10 @@ "type": "boolean", "description": "Indicates whether the generator uses carbon capture technology.", }, + "central_index_key": { + "type": "string", + "description": "Identifier of the company in SEC database.", + }, "chlorine_equipment_cost": { "description": ( "Actual installed cost for the existing chlorine discharge " @@ -778,6 +782,41 @@ "description": "Average monthly coincident peak (CP) demand (for requirements purchases, and any transactions involving demand charges). Monthly CP demand is the metered demand during the hour (60-minute integration) in which the supplier's system reaches its monthly peak. In megawatts.", "unit": "MW", }, + "company_id_sec10k": { + "type": "string", + "description": ( + "PUDL-assigned ID for companies that file SEC Form 10-K or are referenced " + "in exhibit 21 attachments to Form 10-K. May not be stable over time." + ), + }, + "company_information_block": { + "type": "string", + "description": "Title of block of data.", + }, + "company_information_block_count": { + "type": "integer", + "description": "Some blocks are repeated, this defines the index of the data block.", + }, + "company_information_fact_name": { + "type": "string", + "description": "Name of fact within a ``company_information_block``.", + }, + "company_information_fact_value": { + "type": "string", + "description": "Value corresponding with ``company_information_fact_name``.", + }, + "company_name": { + "type": "string", + "description": "Name of company submitting SEC 10k filing.", + }, + "company_name_former": { + "type": "string", + "description": "Former name of company.", + }, + "company_name_raw": { + "type": "string", + "description": "Uncleaned name of company.", + }, "compliance_year_nox": { "type": "integer", "description": "Year boiler was or is expected to be in compliance with federal, state and/or local regulations for nitrogen oxide emissions.", @@ -1537,6 +1576,10 @@ ), "unit": "MWh", }, + "exhibit_21_version": { + "type": "string", + "description": "Version of exhibit 21 submitted (if applicable).", + }, "expense_type": {"type": "string", "description": "The type of expense."}, "ferc1_generator_agg_id": { "type": "integer", @@ -1667,6 +1710,22 @@ "type": "number", "description": "Total number of flue gas desulfurization unit scrubber trains.", }, + "filer_count": { + "type": "integer", + "description": "Index company information as some filings contain information for multiple companies.", + }, + "filename_sec10k": { + "type": "string", + "description": "Name of filing as provided by SEC data portal.", + }, + "files_sec10k": { + "type": "boolean", + "description": "Indicates whether the company files an SEC 10-K.", + }, + "filing_date": { + "type": "date", + "description": "Date filing was submitted.", + }, "firing_rate_using_coal_tons_per_hour": { "type": "number", "unit": "tons_per_hour", @@ -2365,6 +2424,22 @@ ), "unit": "MW", }, + "industry_description_sic": { + "type": "string", + "description": "Text description of Standard Industrial Classification (SIC)", + }, + "industry_id_sic": { + "type": "string", + "description": ( + "Four-digit Standard Industrial Classification (SIC) code identifying " + "the company's primary industry. SIC codes have been replaced by NAICS " + "codes in many applications, but are still used by the SEC. See e.g. " + "https://www.osha.gov/data/sic-manual for code definitions." + ), + "constraints": { + "pattern": r"^\d{4}$", + }, + }, "installation_year": { "type": "integer", "description": "Year the plant's most recently built unit was installed.", @@ -2409,6 +2484,10 @@ "description": "Original reported energy interchange between adjacent balancing authorities.", "unit": "MWh", }, + "company_id_irs": { + "type": "string", + "description": "ID of the company with the IRS.", + }, "is_epacems_state": { "type": "boolean", "description": ( @@ -2536,6 +2615,10 @@ ), "unit": "MW", }, + "location_of_incorporation": { + "type": "string", + "description": "Cleaned location of incorporation of the company.", + }, "longitude": { "type": "number", "description": "Longitude of the plant's location, in degrees.", @@ -2778,6 +2861,10 @@ "type": "boolean", "description": "Whether the generator can burn multiple fuels.", }, + "name_change_date": { + "type": "date", + "description": "Date of last name change of the company.", + }, "nameplate_power_factor": { "type": "number", "description": "The nameplate power factor of the generator.", @@ -3407,6 +3494,10 @@ "type": "boolean", "description": "Whether a plant part record has a duplicate record with different ownership status.", }, + "parent_company_central_index_key": { + "type": "string", + "description": "Central index key (CIK) of the company's parent company.", + }, "particulate_control_id_eia": { "type": "string", "description": "Particulate matter control identification number. This ID is not a unique identifier.", @@ -4026,6 +4117,10 @@ "description": "Estimated electricity demand scaled by the total sales within a state.", "unit": "MWh", }, + "sec10k_version": { + "type": "string", + "description": "Specific version of SEC 10k filed.", + }, "secondary_transportation_mode_code": { "type": "string", "description": "Transportation mode for the second longest distance transported.", @@ -4371,6 +4466,10 @@ "pattern": r"^\d{2}$", }, }, + "state_of_incorporation": { + "type": "string", + "description": "Two letter state code where company is incorporated.", + }, "steam_load_1000_lbs": { "type": "number", "description": "Total steam pressure produced by a unit during the reported hour.", @@ -4442,6 +4541,14 @@ "type": "integer", "description": "Sub-plant ID links EPA CEMS emissions units to EIA units.", }, + "subsidiary_company_name": { + "type": "string", + "description": "Name of subsidiary company.", + }, + "subsidiary_company_location": { + "type": "string", + "description": "Location of subsidiary company.", + }, "sulfur_content_pct": { "type": "number", "description": "Sulfur content percentage by weight to the nearest 0.01 percent.", @@ -5542,6 +5649,16 @@ """ FIELD_METADATA_BY_RESOURCE: dict[str, dict[str, Any]] = { + "core_sec10k__exhibit_21_company_ownership": { + "fraction_owned": { + "description": "Fraction of subsidiary company owned by parent.", + } + }, + "out_sec10k__parents_and_subsidiaries": { + "fraction_owned": { + "description": "Fraction of subsidiary company owned by parent.", + } + }, "sector_consolidated_eia": {"code": {"type": "integer"}}, "core_ferc1__yearly_hydroelectric_plants_sched406": { "plant_type": { diff --git a/src/pudl/metadata/resources/eia923.py b/src/pudl/metadata/resources/eia923.py index aef25fb0a..ecdc3fb30 100644 --- a/src/pudl/metadata/resources/eia923.py +++ b/src/pudl/metadata/resources/eia923.py @@ -480,7 +480,10 @@ "etl_group": "outputs", }, "out_eia923__monthly_generation": { - "description": TABLE_DESCRIPTIONS["core_eia923__monthly_generation"], + "description": TABLE_DESCRIPTIONS["core_eia923__monthly_generation"] + + "\n\nThis table exists for naming consistency. While it is technically " + "aggregated by month, it ends up being identical to the " + "``out_eia923__generation`` table from which it is derived.", "schema": { "fields": [ "report_date", diff --git a/src/pudl/metadata/resources/sec10k.py b/src/pudl/metadata/resources/sec10k.py new file mode 100644 index 000000000..277b930d0 --- /dev/null +++ b/src/pudl/metadata/resources/sec10k.py @@ -0,0 +1,96 @@ +"""Table definitions for the SEC10k tables.""" + +from typing import Any + +RESOURCE_METADATA: dict[str, dict[str, Any]] = { + "core_sec10k__filings": { + "description": "Metadata describing all submitted SEC 10k filings.", + "schema": { + "fields": [ + "filename_sec10k", + "central_index_key", + "company_name", + "sec10k_version", + "filing_date", + "exhibit_21_version", + "report_date", + ], + "primary_key": [ + "filename_sec10k", + ], + }, + "sources": ["sec10k"], + "etl_group": "pudl_models", + "field_namespace": "sec10k", + }, + "core_sec10k__exhibit_21_company_ownership": { + "description": "Company ownership data extracted from Exhibit 21 attachments to SEC 10k filings.", + "schema": { + "fields": [ + "filename_sec10k", + "subsidiary_company_name", + "subsidiary_company_location", + "fraction_owned", + "report_date", + ], + }, + "sources": ["sec10k"], + "etl_group": "pudl_models", + "field_namespace": "sec10k", + }, + "core_sec10k__company_information": { + "description": "Company information extracted from SEC 10k filings.", + "schema": { + "fields": [ + "filename_sec10k", + "filer_count", + "company_information_block", + "company_information_block_count", + "company_information_fact_name", + "company_information_fact_value", + "report_date", + ], + "primary_key": [ + "filename_sec10k", + "filer_count", + "company_information_block", + "company_information_block_count", + "company_information_fact_name", + "company_information_fact_value", + ], + }, + "sources": ["sec10k"], + "etl_group": "pudl_models", + "field_namespace": "sec10k", + }, + "out_sec10k__parents_and_subsidiaries": { + "description": "Denormalized table containing SEC 10k company information with mapping between subsidiary and parent companies, as well as a linkage to EIA companies.", + "schema": { + "fields": [ + "company_id_sec10k", + "filename_sec10k", + "report_date", + "central_index_key", + "utility_id_eia", + "street_address", + "address_2", + "city", + "state", + "company_name_raw", + "name_change_date", + "company_name_former", + "industry_description_sic", + "industry_id_sic", + "state_of_incorporation", + "location_of_incorporation", + "company_id_irs", + "files_sec10k", + "parent_company_central_index_key", + "fraction_owned", + ], + }, + "sources": ["sec10k"], + "etl_group": "pudl_models", + "field_namespace": "sec10k", + }, +}