From b1ec421f29145e5414960fd3edaa57c0a3458804 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 10:47:34 +0700 Subject: [PATCH 01/33] faithful report --- .github/faithful-data-report.sh | 127 ++++++++++++++++++++++++++++++ .github/workflows/data-report.yml | 29 +++++++ 2 files changed, 156 insertions(+) create mode 100644 .github/faithful-data-report.sh create mode 100644 .github/workflows/data-report.yml diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh new file mode 100644 index 00000000..26edf571 --- /dev/null +++ b/.github/faithful-data-report.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash + +# exit in case of error +set -e + +get_sha() { + local epoch=$1 + local sha_url="$host/$epoch/epoch-$epoch.sha256" + if check_file_exists "$sha_url"; then + local sha=$(curl -s "$sha_url") + [[ -n "$sha" ]] && echo "$sha" || echo "n/a" + else + echo "n/a" + fi +} + +get_poh() { + local epoch=$1 + local poh_url="$host/$epoch/poh-check.log" + if check_file_exists "$poh_url"; then + local poh=$(curl -s "$poh_url") + [[ -n "$poh" ]] && echo "$poh" || echo "n/a" + else + echo "n/a" + fi +} + +get_txmeta() { + local epoch=$1 + local txmeta_url="$host/$epoch/tx-metadata-check.log" + if check_file_exists "$txmeta_url"; then + local txmeta=$(curl -s "$poh_url") + [[ -n "$txmeta" ]] && echo "$txmeta" || echo "n/a" + else + echo "n/a" + fi +} + +get_size() { + local epoch=$1 + local size_url="$host/$epoch/epoch-$epoch.car" + if check_file_exists "$size_url"; then + local size=$(curl -s --head "$size_url" 2>/dev/null | grep -i content-length | awk '{print $2}' | tr -d '\r' | awk '{printf "%.0f", $1/1024/1024/1024}') + [[ -n "$size" ]] && echo "$size" || echo "n/a" + else + echo "n/a" + fi +} + +get_car_url() { + local epoch=$1 + local car_url="$host/$epoch/epoch-$epoch.car" + if check_file_exists "$car_url"; then + echo "$car_url" + else + echo "n/a" + fi +} + + +check_file_exists() { + local url=$1 + curl --output /dev/null --silent --head --fail "$url" + return $? +} + +print_empty_row() { + local epoch=$1 + echo "| $epoch | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a |" +} + +print_row() { + local epoch=$1 + local car=$2 + local sha=$3 + local sha_url=$4 + local size=$5 + local poh=$6 + local txmeta=$7 + + # Only create links if the values aren't "n/a" + local car_cell="n/a" + local sha_cell="n/a" + local size_cell="n/a" + local poh_cell="n/a" + local txmeta_cell="n/a" + + [[ "$car" != "n/a" ]] && car_cell="[epoch-$epoch.car]($car)" + [[ "$sha" != "n/a" ]] && sha_cell="[$sha]($sha_url)" + [[ "$size" != "n/a" ]] && size_cell="[$size]($car)" + [[ "$poh" != "n/a" ]] && poh_cell="$poh" + [[ "$txmeta" != "n/a" ]] && txmeta_cell="$txmeta" + + echo "| $epoch | $car_cell | $sha_cell | $size_cell | | ✓ | $(date '+%Y-%m-%d %H:%M:%S') | ✓ | ✓ |" +} + +CURRENT_EPOCH=$(curl -s https://api.mainnet-beta.solana.com -s -X POST -H "Content-Type: application/json" -d ' + {"jsonrpc":"2.0","id":1, "method":"getEpochInfo"} +' -s | jq -r .result.epoch) + +# descending order +EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) +# test +EPOCH_LIST=$(seq 687 -1 0) +# fast test +EPOCH_LIST=$(seq 687 -1 670) + +# base hostname +host="https://files.old-faithful.net" + +echo "| Epoch # | CAR | CAR SHA256 | CAR filesize GB | tx meta check | poh check | CAR data created | Indexes | Filecoin Deals |" +echo "|---|---|---|---|---|---|---|---|---|" + +for EPOCH in $EPOCH_LIST; do + CAR=$(get_car_url "$EPOCH") + SHA_URL="$host/$EPOCH/epoch-$EPOCH.sha256" + + if check_file_exists "$CAR"; then + print_row "$EPOCH" \ + "$CAR" \ + "$(get_sha "$EPOCH")" \ + "$SHA_URL" \ + "$(get_size "$EPOCH")" + else + print_empty_row "$EPOCH" + fi +done \ No newline at end of file diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml new file mode 100644 index 00000000..3f9069e3 --- /dev/null +++ b/.github/workflows/data-report.yml @@ -0,0 +1,29 @@ +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +name: Data Report Generator +on: + schedule: + - cron: '0 0 * * *' # Run once a day at midnight UTC + workflow_dispatch: # Allow manual trigger + +jobs: + generate-report: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Generate Report + run: | + bash .github/faithful-data-report.sh > docs/CAR-REPORT.MD + + - name: Commit Report + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git fetch origin car-report || git checkout -b car-report + git checkout report + git add docs/CAR-REPORT.MD + git commit -m "Faithful CAR data report" || exit 0 + git push origin car-report From ec86f2867aab7f7114880004dd5c2be1f8ccadbb Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 11:21:14 +0700 Subject: [PATCH 02/33] fix --- .github/workflows/data-report.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 3f9069e3..1c4b86ec 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -16,6 +16,7 @@ jobs: - name: Generate Report run: | + mkdir -p docs bash .github/faithful-data-report.sh > docs/CAR-REPORT.MD - name: Commit Report From d3a252cd7b16191d8ed38e190643e988d059eb2b Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 11:37:42 +0700 Subject: [PATCH 03/33] fix --- .github/workflows/data-report.yml | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 1c4b86ec..fc946011 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -4,6 +4,7 @@ concurrency: name: Data Report Generator on: + push: schedule: - cron: '0 0 * * *' # Run once a day at midnight UTC workflow_dispatch: # Allow manual trigger @@ -11,20 +12,25 @@ on: jobs: generate-report: runs-on: ubuntu-latest + + permissions: + # Give the default GITHUB_TOKEN write permission to commit and push the + # added or changed files to the repository. + contents: write + steps: - uses: actions/checkout@v4 - name: Generate Report run: | mkdir -p docs - bash .github/faithful-data-report.sh > docs/CAR-REPORT.MD + bash .github/faithful-data-report.sh > docs/CAR-REPORT.md - - name: Commit Report - run: | - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - git fetch origin car-report || git checkout -b car-report - git checkout report - git add docs/CAR-REPORT.MD - git commit -m "Faithful CAR data report" || exit 0 - git push origin car-report + + # Commit all changed files back to the repository + - uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_message: Automated Change + branch: 'main' + file_pattern: 'docs/*.md' + From fdb09e2e749bacdf9474b7b188618ae98f92e5b2 Mon Sep 17 00:00:00 2001 From: Lusitaniae Date: Fri, 29 Nov 2024 04:38:22 +0000 Subject: [PATCH 04/33] Automated Change --- docs/CAR-REPORT.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 docs/CAR-REPORT.md diff --git a/docs/CAR-REPORT.md b/docs/CAR-REPORT.md new file mode 100644 index 00000000..2a7f89d8 --- /dev/null +++ b/docs/CAR-REPORT.md @@ -0,0 +1,20 @@ +| Epoch # | CAR | CAR SHA256 | CAR filesize GB | tx meta check | poh check | CAR data created | Indexes | Filecoin Deals | +|---|---|---|---|---|---|---|---|---| +| 687 | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | +| 686 | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | +| 685 | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | +| 684 | [epoch-684.car](https://files.old-faithful.net/684/epoch-684.car) | n/a | [318](https://files.old-faithful.net/684/epoch-684.car) | | ✓ | 2024-11-29 04:37:58 | ✓ | ✓ | +| 683 | [epoch-683.car](https://files.old-faithful.net/683/epoch-683.car) | n/a | [313](https://files.old-faithful.net/683/epoch-683.car) | | ✓ | 2024-11-29 04:37:59 | ✓ | ✓ | +| 682 | [epoch-682.car](https://files.old-faithful.net/682/epoch-682.car) | n/a | [307](https://files.old-faithful.net/682/epoch-682.car) | | ✓ | 2024-11-29 04:38:01 | ✓ | ✓ | +| 681 | [epoch-681.car](https://files.old-faithful.net/681/epoch-681.car) | n/a | [302](https://files.old-faithful.net/681/epoch-681.car) | | ✓ | 2024-11-29 04:38:03 | ✓ | ✓ | +| 680 | [epoch-680.car](https://files.old-faithful.net/680/epoch-680.car) | n/a | [290](https://files.old-faithful.net/680/epoch-680.car) | | ✓ | 2024-11-29 04:38:04 | ✓ | ✓ | +| 679 | [epoch-679.car](https://files.old-faithful.net/679/epoch-679.car) | n/a | [311](https://files.old-faithful.net/679/epoch-679.car) | | ✓ | 2024-11-29 04:38:06 | ✓ | ✓ | +| 678 | [epoch-678.car](https://files.old-faithful.net/678/epoch-678.car) | n/a | [303](https://files.old-faithful.net/678/epoch-678.car) | | ✓ | 2024-11-29 04:38:08 | ✓ | ✓ | +| 677 | [epoch-677.car](https://files.old-faithful.net/677/epoch-677.car) | n/a | [280](https://files.old-faithful.net/677/epoch-677.car) | | ✓ | 2024-11-29 04:38:09 | ✓ | ✓ | +| 676 | [epoch-676.car](https://files.old-faithful.net/676/epoch-676.car) | n/a | [277](https://files.old-faithful.net/676/epoch-676.car) | | ✓ | 2024-11-29 04:38:11 | ✓ | ✓ | +| 675 | [epoch-675.car](https://files.old-faithful.net/675/epoch-675.car) | n/a | [271](https://files.old-faithful.net/675/epoch-675.car) | | ✓ | 2024-11-29 04:38:13 | ✓ | ✓ | +| 674 | [epoch-674.car](https://files.old-faithful.net/674/epoch-674.car) | n/a | [283](https://files.old-faithful.net/674/epoch-674.car) | | ✓ | 2024-11-29 04:38:14 | ✓ | ✓ | +| 673 | [epoch-673.car](https://files.old-faithful.net/673/epoch-673.car) | n/a | [278](https://files.old-faithful.net/673/epoch-673.car) | | ✓ | 2024-11-29 04:38:16 | ✓ | ✓ | +| 672 | [epoch-672.car](https://files.old-faithful.net/672/epoch-672.car) | n/a | [285](https://files.old-faithful.net/672/epoch-672.car) | | ✓ | 2024-11-29 04:38:18 | ✓ | ✓ | +| 671 | [epoch-671.car](https://files.old-faithful.net/671/epoch-671.car) | n/a | [297](https://files.old-faithful.net/671/epoch-671.car) | | ✓ | 2024-11-29 04:38:19 | ✓ | ✓ | +| 670 | [epoch-670.car](https://files.old-faithful.net/670/epoch-670.car) | n/a | [305](https://files.old-faithful.net/670/epoch-670.car) | | ✓ | 2024-11-29 04:38:21 | ✓ | ✓ | From 02ff5570d14b7295f923931020230097dc152161 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 12:03:55 +0700 Subject: [PATCH 05/33] fix --- .github/faithful-data-report.sh | 90 ++++++++++++++++++++++++++----- .github/workflows/data-report.yml | 4 +- 2 files changed, 80 insertions(+), 14 deletions(-) diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh index 26edf571..44a9f7a6 100644 --- a/.github/faithful-data-report.sh +++ b/.github/faithful-data-report.sh @@ -36,6 +36,60 @@ get_txmeta() { fi } +get_indices() { + local epoch=$1 + local cid_url="$host/$epoch/epoch-$epoch.cid" + + # First get the CID (BAFY...) + if ! check_file_exists "$cid_url"; then + echo "n/a" + return + fi + + local bafy=$(curl -s "$cid_url") + if [[ -z "$bafy" ]]; then + echo "n/a" + return + fi + + # Check all required index files exist + local index_files=( + "epoch-$epoch-$bafy-mainnet-cid-to-offset-and-size.index" + "epoch-$epoch-$bafy-mainnet-sig-to-cid.index" + "epoch-$epoch-$bafy-mainnet-sig-exists.index" + "epoch-$epoch-$bafy-mainnet-slot-to-cid.index" + "epoch-$epoch-gsfa.index.tar.zstd" + ) + + for file in "${index_files[@]}"; do + if ! check_file_exists "$host/$epoch/$file"; then + echo "n/a" + return + fi + done + + # If we get here, all files exist + echo "$host/$epoch/epoch-$epoch-indices" +} + +get_deals() { + local epoch=$1 + local deals_url="$deals_host/$epoch/deals.csv" + if check_file_exists "$deals_url"; then + local deals=$(curl -s "$deals_url") + # right now it's just checking the deals.csv exists and is longer than 1 line + # i.e. we sent a deal that was accepted by an SP + # TODO: use `faithful check-deals` to determine if the full epoch can be loaded from filecoin + if [[ -n "$deals" ]] && [[ $(echo "$deals" | wc -l) -gt 1 ]]; then + echo "$deals_url" + else + echo "n/a" + fi + else + echo "n/a" + fi +} + get_size() { local epoch=$1 local size_url="$host/$epoch/epoch-$epoch.car" @@ -66,7 +120,7 @@ check_file_exists() { print_empty_row() { local epoch=$1 - echo "| $epoch | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a |" + echo "| $epoch | n/a | n/a | n/a | n/a | n/a | n/a | n/a |" } print_row() { @@ -77,21 +131,27 @@ print_row() { local size=$5 local poh=$6 local txmeta=$7 + local deals=$8 + local indices=$9 # Only create links if the values aren't "n/a" - local car_cell="n/a" - local sha_cell="n/a" - local size_cell="n/a" - local poh_cell="n/a" - local txmeta_cell="n/a" + local car_cell="✗" + local sha_cell="✗" + local size_cell="✗" + local poh_cell="✗" + local txmeta_cell="✗" + local deals_cell="✗" + local indices_cell="✗" [[ "$car" != "n/a" ]] && car_cell="[epoch-$epoch.car]($car)" - [[ "$sha" != "n/a" ]] && sha_cell="[$sha]($sha_url)" - [[ "$size" != "n/a" ]] && size_cell="[$size]($car)" + [[ "$sha" != "n/a" ]] && sha_cell="[${sha:0:5}]($sha_url)" + [[ "$size" != "n/a" ]] && size_cell="$size GB" [[ "$poh" != "n/a" ]] && poh_cell="$poh" [[ "$txmeta" != "n/a" ]] && txmeta_cell="$txmeta" + [[ "$indices" != "n/a" ]] && indices_cell="✓" + [[ "$deals" != "n/a" ]] && deals_cell="[✓]($deals)" - echo "| $epoch | $car_cell | $sha_cell | $size_cell | | ✓ | $(date '+%Y-%m-%d %H:%M:%S') | ✓ | ✓ |" + echo "| $epoch | $car_cell | $sha_cell | $size_cell | $txmeta_cell | $poh_cell | $indices_cell | $deals_cell |" } CURRENT_EPOCH=$(curl -s https://api.mainnet-beta.solana.com -s -X POST -H "Content-Type: application/json" -d ' @@ -107,9 +167,10 @@ EPOCH_LIST=$(seq 687 -1 670) # base hostname host="https://files.old-faithful.net" +deals_host="https://filecoin-car-storage-cdn.b-cdn.net/" -echo "| Epoch # | CAR | CAR SHA256 | CAR filesize GB | tx meta check | poh check | CAR data created | Indexes | Filecoin Deals |" -echo "|---|---|---|---|---|---|---|---|---|" +echo "| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indexes | Filecoin Deals |" +echo "|---|---|---|---|---|---|---|---|" for EPOCH in $EPOCH_LIST; do CAR=$(get_car_url "$EPOCH") @@ -120,7 +181,12 @@ for EPOCH in $EPOCH_LIST; do "$CAR" \ "$(get_sha "$EPOCH")" \ "$SHA_URL" \ - "$(get_size "$EPOCH")" + "$(get_size "$EPOCH")" \ + "$(get_poh "$EPOCH")" \ + "$(get_txmeta "$EPOCH")" \ + "$(get_deals "$EPOCH")" \ + "$(get_indices "$EPOCH")" \ + else print_empty_row "$EPOCH" fi diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index fc946011..740212a8 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -30,7 +30,7 @@ jobs: # Commit all changed files back to the repository - uses: stefanzweifel/git-auto-commit-action@v5 with: - commit_message: Automated Change - branch: 'main' + commit_message: Old Faithful CAR data report update + branch: 'gha-report' file_pattern: 'docs/*.md' From 918f027b8982f83817f4985f9ff8057763073800 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 12:10:46 +0700 Subject: [PATCH 06/33] fix --- .github/faithful-data-report.sh | 8 +++++--- .github/workflows/data-report.yml | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh index 44a9f7a6..1cb37ae6 100644 --- a/.github/faithful-data-report.sh +++ b/.github/faithful-data-report.sh @@ -160,9 +160,11 @@ CURRENT_EPOCH=$(curl -s https://api.mainnet-beta.solana.com -s -X POST -H "Conte # descending order EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) -# test -EPOCH_LIST=$(seq 687 -1 0) -# fast test + +# test +# EPOCH_LIST=$(seq 687 -1 0) + +# very fast test EPOCH_LIST=$(seq 687 -1 670) # base hostname diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 740212a8..dd055e76 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -32,5 +32,6 @@ jobs: with: commit_message: Old Faithful CAR data report update branch: 'gha-report' + create_branch: true file_pattern: 'docs/*.md' From 7b94c3eb27be435c4043b0b436a2dec2f27ea743 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 12:14:41 +0700 Subject: [PATCH 07/33] fix --- .github/faithful-data-report.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh index 1cb37ae6..72474bc1 100644 --- a/.github/faithful-data-report.sh +++ b/.github/faithful-data-report.sh @@ -165,7 +165,7 @@ EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) # EPOCH_LIST=$(seq 687 -1 0) # very fast test -EPOCH_LIST=$(seq 687 -1 670) +# EPOCH_LIST=$(seq 687 -1 670) # base hostname host="https://files.old-faithful.net" From cb846dbf33cba81146c1b87212eb8483f25ace78 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:06:05 +0700 Subject: [PATCH 08/33] fix --- .github/faithful-data-report.sh | 2 +- .github/workflows/data-report.yml | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh index 72474bc1..5fe58ae9 100644 --- a/.github/faithful-data-report.sh +++ b/.github/faithful-data-report.sh @@ -171,7 +171,7 @@ EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) host="https://files.old-faithful.net" deals_host="https://filecoin-car-storage-cdn.b-cdn.net/" -echo "| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indexes | Filecoin Deals |" +echo "| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Filecoin Deals |" echo "|---|---|---|---|---|---|---|---|" for EPOCH in $EPOCH_LIST; do diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index dd055e76..b85d90b1 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -28,10 +28,13 @@ jobs: # Commit all changed files back to the repository - - uses: stefanzweifel/git-auto-commit-action@v5 + - uses: EndBug/add-and-commit@v9 with: - commit_message: Old Faithful CAR data report update - branch: 'gha-report' + message: Old Faithful CAR data report update + new_branch: 'gha-report' create_branch: true - file_pattern: 'docs/*.md' + add: 'docs/*.md' + author_name: github-actions[bot] + author_email: 41898282+github-actions[bot]@users.noreply.github.com + fetch: true From 6933c0c6444ea8c85ff48e2a5131671cf4b100dd Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:17:10 +0700 Subject: [PATCH 09/33] fix --- .github/faithful-data-report.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh index 5fe58ae9..6cb9ea2e 100644 --- a/.github/faithful-data-report.sh +++ b/.github/faithful-data-report.sh @@ -165,7 +165,7 @@ EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) # EPOCH_LIST=$(seq 687 -1 0) # very fast test -# EPOCH_LIST=$(seq 687 -1 670) +EPOCH_LIST=$(seq 687 -1 670) # base hostname host="https://files.old-faithful.net" From 2332b3b365a3b645f3cd87c776ae933e41e1e863 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:21:46 +0700 Subject: [PATCH 10/33] fix --- .github/workflows/data-report.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index b85d90b1..29aeb076 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -37,4 +37,5 @@ jobs: author_name: github-actions[bot] author_email: 41898282+github-actions[bot]@users.noreply.github.com fetch: true + pull: '--rebase --autostash ...' From 38be14cb083cddbe544d853df412595251e937fb Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:27:44 +0700 Subject: [PATCH 11/33] fix --- .github/workflows/data-report.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 29aeb076..9981ff49 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -37,5 +37,6 @@ jobs: author_name: github-actions[bot] author_email: 41898282+github-actions[bot]@users.noreply.github.com fetch: true + push: origin gha-report --set-upstream --force pull: '--rebase --autostash ...' From ca185ca4b039646a421e6e27e4f48c7af5beaa17 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:31:28 +0700 Subject: [PATCH 12/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 9981ff49..372f77bd 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -37,6 +37,6 @@ jobs: author_name: github-actions[bot] author_email: 41898282+github-actions[bot]@users.noreply.github.com fetch: true - push: origin gha-report --set-upstream --force + push: origin gha-report --force pull: '--rebase --autostash ...' From 0a481d9b67b9c1312952a3b7bdd1a809f389d82c Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:35:02 +0700 Subject: [PATCH 13/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 372f77bd..c6902111 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -38,5 +38,5 @@ jobs: author_email: 41898282+github-actions[bot]@users.noreply.github.com fetch: true push: origin gha-report --force - pull: '--rebase --autostash ...' + pull: '--rebase --autostash' From 30d97d2a0f73fa3494a73a8c0374552d03ab40a0 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 14:38:02 +0700 Subject: [PATCH 14/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index c6902111..b27e39c6 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -38,5 +38,5 @@ jobs: author_email: 41898282+github-actions[bot]@users.noreply.github.com fetch: true push: origin gha-report --force - pull: '--rebase --autostash' + pull: 'origin gha-report' From 83af912cc5f01d909afb4778aabceeb0f86ebe48 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 15:08:10 +0700 Subject: [PATCH 15/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index b27e39c6..890571b9 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -31,7 +31,7 @@ jobs: - uses: EndBug/add-and-commit@v9 with: message: Old Faithful CAR data report update - new_branch: 'gha-report' + # new_branch: 'gha-report' create_branch: true add: 'docs/*.md' author_name: github-actions[bot] From 78c3e0619adebc2d1a1efcb366fed2a290727a99 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 15:10:04 +0700 Subject: [PATCH 16/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 890571b9..d47df14e 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -38,5 +38,5 @@ jobs: author_email: 41898282+github-actions[bot]@users.noreply.github.com fetch: true push: origin gha-report --force - pull: 'origin gha-report' + pull: ' ' From fda3d3dd2f2a06e6d56b75d646538c3db162af7a Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 15:17:51 +0700 Subject: [PATCH 17/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index d47df14e..7056a508 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -36,7 +36,7 @@ jobs: add: 'docs/*.md' author_name: github-actions[bot] author_email: 41898282+github-actions[bot]@users.noreply.github.com - fetch: true + fetch: origin gha-report push: origin gha-report --force pull: ' ' From aed500c8c21552485b34bf9c6b83a954e4a24e1a Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 15:26:43 +0700 Subject: [PATCH 18/33] fix --- .github/workflows/data-report.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 7056a508..265b4dc1 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -31,8 +31,7 @@ jobs: - uses: EndBug/add-and-commit@v9 with: message: Old Faithful CAR data report update - # new_branch: 'gha-report' - create_branch: true + new_branch: 'gha-report' add: 'docs/*.md' author_name: github-actions[bot] author_email: 41898282+github-actions[bot]@users.noreply.github.com From 5a1519b943d49e683930a42ef6455885a8f0a119 Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 29 Nov 2024 15:29:02 +0700 Subject: [PATCH 19/33] fix --- .github/faithful-data-report.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh index 6cb9ea2e..5fe58ae9 100644 --- a/.github/faithful-data-report.sh +++ b/.github/faithful-data-report.sh @@ -165,7 +165,7 @@ EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) # EPOCH_LIST=$(seq 687 -1 0) # very fast test -EPOCH_LIST=$(seq 687 -1 670) +# EPOCH_LIST=$(seq 687 -1 670) # base hostname host="https://files.old-faithful.net" From 3bee28ad6843618f73fa0fdab6f2ae9f8dd9492e Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 11 Dec 2024 17:36:41 +0700 Subject: [PATCH 20/33] move to python, parallize --- .github/faithful-data-report.py | 197 ++++++++++++++++++++++++++++++ .github/faithful-data-report.sh | 195 ----------------------------- .github/workflows/data-report.yml | 6 +- 3 files changed, 200 insertions(+), 198 deletions(-) create mode 100644 .github/faithful-data-report.py delete mode 100644 .github/faithful-data-report.sh diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py new file mode 100644 index 00000000..3fca581a --- /dev/null +++ b/.github/faithful-data-report.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +import asyncio +import aiohttp +import json +from typing import Dict, Optional +from dataclasses import dataclass + +@dataclass +class EpochData: + epoch: int + car: str = "n/a" + sha: str = "n/a" + sha_url: str = "n/a" + size: str = "n/a" + poh: str = "n/a" + poh_url: str = "n/a" + txmeta: str = "n/a" + txmeta_url: str = "n/a" + deals: str = "n/a" + indices: str = "n/a" + +class FaithfulDataReport: + def __init__(self): + self.host = "https://files.old-faithful.net" + self.deals_host = "https://filecoin-car-storage-cdn.b-cdn.net" + + async def check_url(self, session: aiohttp.ClientSession, url: str) -> bool: + try: + async with session.head(url, allow_redirects=True) as response: + return response.status == 200 + except: + return False + + async def fetch_text(self, session: aiohttp.ClientSession, url: str) -> Optional[str]: + try: + async with session.get(url) as response: + if response.status == 200: + return await response.text() + except: + pass + return None + + async def get_size(self, session: aiohttp.ClientSession, url: str) -> str: + try: + async with session.head(url) as response: + if response.status == 200: + size_bytes = int(response.headers.get('content-length', 0)) + size_gb = round(size_bytes / (1024 * 1024 * 1024)) + return str(size_gb) + except: + pass + return "n/a" + + async def get_indices(self, session: aiohttp.ClientSession, epoch: int) -> str: + cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid" + + # Get the CID first + bafy = await self.fetch_text(session, cid_url) + if not bafy: + return "n/a" + + # Check all required index files + index_files = [ + f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index", + f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", + f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", + f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", + f"epoch-{epoch}-gsfa.index.tar.zstd" + ] + + checks = await asyncio.gather(*[ + self.check_url(session, f"{self.host}/{epoch}/{file}") + for file in index_files + ]) + + return f"{self.host}/{epoch}/epoch-{epoch}-indices" if all(checks) else "n/a" + + async def get_deals(self, session: aiohttp.ClientSession, epoch: int) -> str: + deals_url = f"{self.deals_host}/{epoch}/deals.csv" + deals_content = await self.fetch_text(session, deals_url) + + if deals_content and len(deals_content.splitlines()) > 1: + return deals_url + return "n/a" + + async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> EpochData: + car_url = f"{self.host}/{epoch}/epoch-{epoch}.car" + sha_url = f"{self.host}/{epoch}/epoch-{epoch}.sha256" + poh_url = f"{self.host}/{epoch}/poh-check.log" + txmeta_url = f"{self.host}/{epoch}/tx-metadata-check.log" + + # Check if CAR exists first + car_exists = await self.check_url(session, car_url) + if not car_exists: + return EpochData(epoch=epoch) + + # Gather all data concurrently + sha, size, poh, txmeta, indices, deals = await asyncio.gather( + self.fetch_text(session, sha_url), + self.get_size(session, car_url), + self.fetch_text(session, poh_url), + self.fetch_text(session, txmeta_url), + self.get_indices(session, epoch), + self.get_deals(session, epoch) + ) + + return EpochData( + epoch=epoch, + car=car_url, + sha=sha if sha else "n/a", + sha_url=sha_url, + size=size, + poh=poh if poh else "n/a", + poh_url=poh_url, + txmeta=txmeta if txmeta else "n/a", + txmeta_url=txmeta_url, + deals=deals, + indices=indices + ) + + def format_row(self, data: EpochData) -> str: + car_cell = f"[epoch-{data.epoch}.car]({data.car})" if data.car != "n/a" else "✗" + sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗" + size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" + txmeta_cell = f"[✓]({data.txmeta_url})" if validate_txmeta_output(data.txmeta) else "✗" + poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗" + indices_cell = "✓" if data.indices != "n/a" else "✗" + deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" + + return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {deals_cell} |" + + async def get_current_epoch(self) -> int: + async with aiohttp.ClientSession() as session: + async with session.post( + 'https://api.mainnet-beta.solana.com', + json={"jsonrpc":"2.0","id":1, "method":"getEpochInfo"} + ) as response: + data = await response.json() + return int(data['result']['epoch']) + + async def run(self): + current_epoch = await self.get_current_epoch() + epochs = range(current_epoch, -1, -1) # descending order + + print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Filecoin Deals |") + print("|---|---|---|---|---|---|---|---|") + + # concurrency levels + chunk_size = 20 + + async with aiohttp.ClientSession() as session: + for i in range(0, len(epochs), chunk_size): + chunk = epochs[i:i + chunk_size] + results = await asyncio.gather( + *[self.get_epoch_data(session, epoch) for epoch in chunk] + ) + + # Print results in order + for result in results: + print(self.format_row(result)) + +def validate_txmeta_output(txmeta_text: str) -> bool: + """ + Validates that txmeta check output shows zero missing and zero parsing errors + Returns True if valid, False otherwise + """ + if txmeta_text == "n/a": + return False + + try: + print(txmeta_text) + return 'Transactions with missing metadata: 0' in txmeta_text and \ + 'Transactions with metadata parsing error: 0' in txmeta_text + + except Exception as e: + return False + +def validate_poh_output(poh_text: str) -> bool: + """ + Validates the PoH check output + Returns True if valid, False otherwise + """ + if poh_text == "n/a": + return False + + try: + return 'Successfully checked PoH on CAR file for epoch' in poh_text + + except: + return False + +def main(): + report = FaithfulDataReport() + asyncio.run(report.run()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh deleted file mode 100644 index 5fe58ae9..00000000 --- a/.github/faithful-data-report.sh +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/bin/env bash - -# exit in case of error -set -e - -get_sha() { - local epoch=$1 - local sha_url="$host/$epoch/epoch-$epoch.sha256" - if check_file_exists "$sha_url"; then - local sha=$(curl -s "$sha_url") - [[ -n "$sha" ]] && echo "$sha" || echo "n/a" - else - echo "n/a" - fi -} - -get_poh() { - local epoch=$1 - local poh_url="$host/$epoch/poh-check.log" - if check_file_exists "$poh_url"; then - local poh=$(curl -s "$poh_url") - [[ -n "$poh" ]] && echo "$poh" || echo "n/a" - else - echo "n/a" - fi -} - -get_txmeta() { - local epoch=$1 - local txmeta_url="$host/$epoch/tx-metadata-check.log" - if check_file_exists "$txmeta_url"; then - local txmeta=$(curl -s "$poh_url") - [[ -n "$txmeta" ]] && echo "$txmeta" || echo "n/a" - else - echo "n/a" - fi -} - -get_indices() { - local epoch=$1 - local cid_url="$host/$epoch/epoch-$epoch.cid" - - # First get the CID (BAFY...) - if ! check_file_exists "$cid_url"; then - echo "n/a" - return - fi - - local bafy=$(curl -s "$cid_url") - if [[ -z "$bafy" ]]; then - echo "n/a" - return - fi - - # Check all required index files exist - local index_files=( - "epoch-$epoch-$bafy-mainnet-cid-to-offset-and-size.index" - "epoch-$epoch-$bafy-mainnet-sig-to-cid.index" - "epoch-$epoch-$bafy-mainnet-sig-exists.index" - "epoch-$epoch-$bafy-mainnet-slot-to-cid.index" - "epoch-$epoch-gsfa.index.tar.zstd" - ) - - for file in "${index_files[@]}"; do - if ! check_file_exists "$host/$epoch/$file"; then - echo "n/a" - return - fi - done - - # If we get here, all files exist - echo "$host/$epoch/epoch-$epoch-indices" -} - -get_deals() { - local epoch=$1 - local deals_url="$deals_host/$epoch/deals.csv" - if check_file_exists "$deals_url"; then - local deals=$(curl -s "$deals_url") - # right now it's just checking the deals.csv exists and is longer than 1 line - # i.e. we sent a deal that was accepted by an SP - # TODO: use `faithful check-deals` to determine if the full epoch can be loaded from filecoin - if [[ -n "$deals" ]] && [[ $(echo "$deals" | wc -l) -gt 1 ]]; then - echo "$deals_url" - else - echo "n/a" - fi - else - echo "n/a" - fi -} - -get_size() { - local epoch=$1 - local size_url="$host/$epoch/epoch-$epoch.car" - if check_file_exists "$size_url"; then - local size=$(curl -s --head "$size_url" 2>/dev/null | grep -i content-length | awk '{print $2}' | tr -d '\r' | awk '{printf "%.0f", $1/1024/1024/1024}') - [[ -n "$size" ]] && echo "$size" || echo "n/a" - else - echo "n/a" - fi -} - -get_car_url() { - local epoch=$1 - local car_url="$host/$epoch/epoch-$epoch.car" - if check_file_exists "$car_url"; then - echo "$car_url" - else - echo "n/a" - fi -} - - -check_file_exists() { - local url=$1 - curl --output /dev/null --silent --head --fail "$url" - return $? -} - -print_empty_row() { - local epoch=$1 - echo "| $epoch | n/a | n/a | n/a | n/a | n/a | n/a | n/a |" -} - -print_row() { - local epoch=$1 - local car=$2 - local sha=$3 - local sha_url=$4 - local size=$5 - local poh=$6 - local txmeta=$7 - local deals=$8 - local indices=$9 - - # Only create links if the values aren't "n/a" - local car_cell="✗" - local sha_cell="✗" - local size_cell="✗" - local poh_cell="✗" - local txmeta_cell="✗" - local deals_cell="✗" - local indices_cell="✗" - - [[ "$car" != "n/a" ]] && car_cell="[epoch-$epoch.car]($car)" - [[ "$sha" != "n/a" ]] && sha_cell="[${sha:0:5}]($sha_url)" - [[ "$size" != "n/a" ]] && size_cell="$size GB" - [[ "$poh" != "n/a" ]] && poh_cell="$poh" - [[ "$txmeta" != "n/a" ]] && txmeta_cell="$txmeta" - [[ "$indices" != "n/a" ]] && indices_cell="✓" - [[ "$deals" != "n/a" ]] && deals_cell="[✓]($deals)" - - echo "| $epoch | $car_cell | $sha_cell | $size_cell | $txmeta_cell | $poh_cell | $indices_cell | $deals_cell |" -} - -CURRENT_EPOCH=$(curl -s https://api.mainnet-beta.solana.com -s -X POST -H "Content-Type: application/json" -d ' - {"jsonrpc":"2.0","id":1, "method":"getEpochInfo"} -' -s | jq -r .result.epoch) - -# descending order -EPOCH_LIST=$(seq $CURRENT_EPOCH -1 0) - -# test -# EPOCH_LIST=$(seq 687 -1 0) - -# very fast test -# EPOCH_LIST=$(seq 687 -1 670) - -# base hostname -host="https://files.old-faithful.net" -deals_host="https://filecoin-car-storage-cdn.b-cdn.net/" - -echo "| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Filecoin Deals |" -echo "|---|---|---|---|---|---|---|---|" - -for EPOCH in $EPOCH_LIST; do - CAR=$(get_car_url "$EPOCH") - SHA_URL="$host/$EPOCH/epoch-$EPOCH.sha256" - - if check_file_exists "$CAR"; then - print_row "$EPOCH" \ - "$CAR" \ - "$(get_sha "$EPOCH")" \ - "$SHA_URL" \ - "$(get_size "$EPOCH")" \ - "$(get_poh "$EPOCH")" \ - "$(get_txmeta "$EPOCH")" \ - "$(get_deals "$EPOCH")" \ - "$(get_indices "$EPOCH")" \ - - else - print_empty_row "$EPOCH" - fi -done \ No newline at end of file diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 265b4dc1..f0c2b45f 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -24,10 +24,10 @@ jobs: - name: Generate Report run: | mkdir -p docs - bash .github/faithful-data-report.sh > docs/CAR-REPORT.md + python3 -m pip install aiohttp + python3 .github/faithful-data-report.sh > docs/CAR-REPORT.md - - # Commit all changed files back to the repository + # Commit changed files back to the repository - uses: EndBug/add-and-commit@v9 with: message: Old Faithful CAR data report update From 039f1a11650607605ff79a20efcae46e396a6655 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 11 Dec 2024 17:40:00 +0700 Subject: [PATCH 21/33] fix --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index f0c2b45f..9075905f 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -25,7 +25,7 @@ jobs: run: | mkdir -p docs python3 -m pip install aiohttp - python3 .github/faithful-data-report.sh > docs/CAR-REPORT.md + python3 .github/faithful-data-report.py > docs/CAR-REPORT.md # Commit changed files back to the repository - uses: EndBug/add-and-commit@v9 From 5aacf40dd68f384597513dfc4ecd3f817b684c2a Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 11 Dec 2024 17:46:32 +0700 Subject: [PATCH 22/33] fix --- .github/faithful-data-report.sh | 196 ++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 .github/faithful-data-report.sh diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh new file mode 100644 index 00000000..24b4ef6a --- /dev/null +++ b/.github/faithful-data-report.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 +import asyncio +import aiohttp +import json +from typing import Dict, Optional +from dataclasses import dataclass + +@dataclass +class EpochData: + epoch: int + car: str = "n/a" + sha: str = "n/a" + sha_url: str = "n/a" + size: str = "n/a" + poh: str = "n/a" + poh_url: str = "n/a" + txmeta: str = "n/a" + txmeta_url: str = "n/a" + deals: str = "n/a" + indices: str = "n/a" + +class FaithfulDataReport: + def __init__(self): + self.host = "https://files.old-faithful.net" + self.deals_host = "https://filecoin-car-storage-cdn.b-cdn.net" + + async def check_url(self, session: aiohttp.ClientSession, url: str) -> bool: + try: + async with session.head(url, allow_redirects=True) as response: + return response.status == 200 + except: + return False + + async def fetch_text(self, session: aiohttp.ClientSession, url: str) -> Optional[str]: + try: + async with session.get(url) as response: + if response.status == 200: + return await response.text() + except: + pass + return None + + async def get_size(self, session: aiohttp.ClientSession, url: str) -> str: + try: + async with session.head(url) as response: + if response.status == 200: + size_bytes = int(response.headers.get('content-length', 0)) + size_gb = round(size_bytes / (1024 * 1024 * 1024)) + return str(size_gb) + except: + pass + return "n/a" + + async def get_indices(self, session: aiohttp.ClientSession, epoch: int) -> str: + cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid" + + # Get the CID first + bafy = await self.fetch_text(session, cid_url) + if not bafy: + return "n/a" + + # Check all required index files + index_files = [ + f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index", + f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", + f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", + f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", + f"epoch-{epoch}-gsfa.index.tar.zstd" + ] + + checks = await asyncio.gather(*[ + self.check_url(session, f"{self.host}/{epoch}/{file}") + for file in index_files + ]) + + return f"{self.host}/{epoch}/epoch-{epoch}-indices" if all(checks) else "n/a" + + async def get_deals(self, session: aiohttp.ClientSession, epoch: int) -> str: + deals_url = f"{self.deals_host}/{epoch}/deals.csv" + deals_content = await self.fetch_text(session, deals_url) + + if deals_content and len(deals_content.splitlines()) > 1: + return deals_url + return "n/a" + + async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> EpochData: + car_url = f"{self.host}/{epoch}/epoch-{epoch}.car" + sha_url = f"{self.host}/{epoch}/epoch-{epoch}.sha256" + poh_url = f"{self.host}/{epoch}/poh-check.log" + txmeta_url = f"{self.host}/{epoch}/tx-metadata-check.log" + + # Check if CAR exists first + car_exists = await self.check_url(session, car_url) + if not car_exists: + return EpochData(epoch=epoch) + + # Gather all data concurrently + sha, size, poh, txmeta, indices, deals = await asyncio.gather( + self.fetch_text(session, sha_url), + self.get_size(session, car_url), + self.fetch_text(session, poh_url), + self.fetch_text(session, txmeta_url), + self.get_indices(session, epoch), + self.get_deals(session, epoch) + ) + + return EpochData( + epoch=epoch, + car=car_url, + sha=sha if sha else "n/a", + sha_url=sha_url, + size=size, + poh=poh if poh else "n/a", + poh_url=poh_url, + txmeta=txmeta if txmeta else "n/a", + txmeta_url=txmeta_url, + deals=deals, + indices=indices + ) + + def format_row(self, data: EpochData) -> str: + car_cell = f"[epoch-{data.epoch}.car]({data.car})" if data.car != "n/a" else "✗" + sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗" + size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" + txmeta_cell = f"[✓]({data.txmeta_url})" if validate_txmeta_output(data.txmeta) else "✗" + poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗" + indices_cell = "✓" if data.indices != "n/a" else "✗" + deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" + + return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {deals_cell} |" + + async def get_current_epoch(self) -> int: + async with aiohttp.ClientSession() as session: + async with session.post( + 'https://api.mainnet-beta.solana.com', + json={"jsonrpc":"2.0","id":1, "method":"getEpochInfo"} + ) as response: + data = await response.json() + return int(data['result']['epoch']) + + async def run(self): + current_epoch = await self.get_current_epoch() + epochs = range(current_epoch, -1, -1) # descending order + + print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Filecoin Deals |") + print("|---|---|---|---|---|---|---|---|") + + # concurrency levels + chunk_size = 20 + + async with aiohttp.ClientSession() as session: + for i in range(0, len(epochs), chunk_size): + chunk = epochs[i:i + chunk_size] + results = await asyncio.gather( + *[self.get_epoch_data(session, epoch) for epoch in chunk] + ) + + # Print results in order + for result in results: + print(self.format_row(result)) + +def validate_txmeta_output(txmeta_text: str) -> bool: + """ + Validates that txmeta check output shows zero missing and zero parsing errors + Returns True if valid, False otherwise + """ + if txmeta_text == "n/a": + return False + + try: + return 'Transactions with missing metadata: 0' in txmeta_text and \ + 'Transactions with metadata parsing error: 0' in txmeta_text + + except Exception as e: + return False + +def validate_poh_output(poh_text: str) -> bool: + """ + Validates the PoH check output + Returns True if valid, False otherwise + """ + if poh_text == "n/a": + return False + + try: + return 'Successfully checked PoH on CAR file for epoch' in poh_text + + except: + return False + +def main(): + report = FaithfulDataReport() + asyncio.run(report.run()) + +if __name__ == "__main__": + main() \ No newline at end of file From c2903448b95d21d24c82e4b035118793f3bc4326 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 11 Dec 2024 17:52:12 +0700 Subject: [PATCH 23/33] fix --- .github/faithful-data-report.py | 2 - .github/faithful-data-report.sh | 196 -------------------------------- 2 files changed, 198 deletions(-) delete mode 100644 .github/faithful-data-report.sh diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index 3fca581a..8c4e365a 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import asyncio import aiohttp -import json from typing import Dict, Optional from dataclasses import dataclass @@ -168,7 +167,6 @@ def validate_txmeta_output(txmeta_text: str) -> bool: return False try: - print(txmeta_text) return 'Transactions with missing metadata: 0' in txmeta_text and \ 'Transactions with metadata parsing error: 0' in txmeta_text diff --git a/.github/faithful-data-report.sh b/.github/faithful-data-report.sh deleted file mode 100644 index 24b4ef6a..00000000 --- a/.github/faithful-data-report.sh +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 -import asyncio -import aiohttp -import json -from typing import Dict, Optional -from dataclasses import dataclass - -@dataclass -class EpochData: - epoch: int - car: str = "n/a" - sha: str = "n/a" - sha_url: str = "n/a" - size: str = "n/a" - poh: str = "n/a" - poh_url: str = "n/a" - txmeta: str = "n/a" - txmeta_url: str = "n/a" - deals: str = "n/a" - indices: str = "n/a" - -class FaithfulDataReport: - def __init__(self): - self.host = "https://files.old-faithful.net" - self.deals_host = "https://filecoin-car-storage-cdn.b-cdn.net" - - async def check_url(self, session: aiohttp.ClientSession, url: str) -> bool: - try: - async with session.head(url, allow_redirects=True) as response: - return response.status == 200 - except: - return False - - async def fetch_text(self, session: aiohttp.ClientSession, url: str) -> Optional[str]: - try: - async with session.get(url) as response: - if response.status == 200: - return await response.text() - except: - pass - return None - - async def get_size(self, session: aiohttp.ClientSession, url: str) -> str: - try: - async with session.head(url) as response: - if response.status == 200: - size_bytes = int(response.headers.get('content-length', 0)) - size_gb = round(size_bytes / (1024 * 1024 * 1024)) - return str(size_gb) - except: - pass - return "n/a" - - async def get_indices(self, session: aiohttp.ClientSession, epoch: int) -> str: - cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid" - - # Get the CID first - bafy = await self.fetch_text(session, cid_url) - if not bafy: - return "n/a" - - # Check all required index files - index_files = [ - f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index", - f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", - f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", - f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", - f"epoch-{epoch}-gsfa.index.tar.zstd" - ] - - checks = await asyncio.gather(*[ - self.check_url(session, f"{self.host}/{epoch}/{file}") - for file in index_files - ]) - - return f"{self.host}/{epoch}/epoch-{epoch}-indices" if all(checks) else "n/a" - - async def get_deals(self, session: aiohttp.ClientSession, epoch: int) -> str: - deals_url = f"{self.deals_host}/{epoch}/deals.csv" - deals_content = await self.fetch_text(session, deals_url) - - if deals_content and len(deals_content.splitlines()) > 1: - return deals_url - return "n/a" - - async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> EpochData: - car_url = f"{self.host}/{epoch}/epoch-{epoch}.car" - sha_url = f"{self.host}/{epoch}/epoch-{epoch}.sha256" - poh_url = f"{self.host}/{epoch}/poh-check.log" - txmeta_url = f"{self.host}/{epoch}/tx-metadata-check.log" - - # Check if CAR exists first - car_exists = await self.check_url(session, car_url) - if not car_exists: - return EpochData(epoch=epoch) - - # Gather all data concurrently - sha, size, poh, txmeta, indices, deals = await asyncio.gather( - self.fetch_text(session, sha_url), - self.get_size(session, car_url), - self.fetch_text(session, poh_url), - self.fetch_text(session, txmeta_url), - self.get_indices(session, epoch), - self.get_deals(session, epoch) - ) - - return EpochData( - epoch=epoch, - car=car_url, - sha=sha if sha else "n/a", - sha_url=sha_url, - size=size, - poh=poh if poh else "n/a", - poh_url=poh_url, - txmeta=txmeta if txmeta else "n/a", - txmeta_url=txmeta_url, - deals=deals, - indices=indices - ) - - def format_row(self, data: EpochData) -> str: - car_cell = f"[epoch-{data.epoch}.car]({data.car})" if data.car != "n/a" else "✗" - sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗" - size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" - txmeta_cell = f"[✓]({data.txmeta_url})" if validate_txmeta_output(data.txmeta) else "✗" - poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗" - indices_cell = "✓" if data.indices != "n/a" else "✗" - deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" - - return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {deals_cell} |" - - async def get_current_epoch(self) -> int: - async with aiohttp.ClientSession() as session: - async with session.post( - 'https://api.mainnet-beta.solana.com', - json={"jsonrpc":"2.0","id":1, "method":"getEpochInfo"} - ) as response: - data = await response.json() - return int(data['result']['epoch']) - - async def run(self): - current_epoch = await self.get_current_epoch() - epochs = range(current_epoch, -1, -1) # descending order - - print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Filecoin Deals |") - print("|---|---|---|---|---|---|---|---|") - - # concurrency levels - chunk_size = 20 - - async with aiohttp.ClientSession() as session: - for i in range(0, len(epochs), chunk_size): - chunk = epochs[i:i + chunk_size] - results = await asyncio.gather( - *[self.get_epoch_data(session, epoch) for epoch in chunk] - ) - - # Print results in order - for result in results: - print(self.format_row(result)) - -def validate_txmeta_output(txmeta_text: str) -> bool: - """ - Validates that txmeta check output shows zero missing and zero parsing errors - Returns True if valid, False otherwise - """ - if txmeta_text == "n/a": - return False - - try: - return 'Transactions with missing metadata: 0' in txmeta_text and \ - 'Transactions with metadata parsing error: 0' in txmeta_text - - except Exception as e: - return False - -def validate_poh_output(poh_text: str) -> bool: - """ - Validates the PoH check output - Returns True if valid, False otherwise - """ - if poh_text == "n/a": - return False - - try: - return 'Successfully checked PoH on CAR file for epoch' in poh_text - - except: - return False - -def main(): - report = FaithfulDataReport() - asyncio.run(report.run()) - -if __name__ == "__main__": - main() \ No newline at end of file From 06788de53ca3dfdab1f2c4ab5e6ec6541e789359 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 11 Dec 2024 18:35:27 +0700 Subject: [PATCH 24/33] fix --- .github/faithful-data-report.py | 44 +++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index 8c4e365a..fac6b2cc 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -17,6 +17,7 @@ class EpochData: txmeta_url: str = "n/a" deals: str = "n/a" indices: str = "n/a" + indices_size: str = "n/a" class FaithfulDataReport: def __init__(self): @@ -74,6 +75,36 @@ async def get_indices(self, session: aiohttp.ClientSession, epoch: int) -> str: return f"{self.host}/{epoch}/epoch-{epoch}-indices" if all(checks) else "n/a" + async def get_indices_size(self, session: aiohttp.ClientSession, epoch: int) -> str: + cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid" + + # Get the CID first + bafy = await self.fetch_text(session, cid_url) + if not bafy: + return "n/a" + + # Check all required index files + index_files = [ + f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index", + f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", + f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", + f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", + f"epoch-{epoch}-gsfa.index.tar.zstd" + ] + + sizes = await asyncio.gather(*[ + self.get_size(session, f"{self.host}/{epoch}/{file}") + for file in index_files + ]) + + # Convert sizes to integers, treating "n/a" as 0 + size_ints = [int(size) if size != "n/a" else 0 for size in sizes] + + # Sum up all sizes + total_size = sum(size_ints) + + return str(total_size) if total_size > 0 else "n/a" + async def get_deals(self, session: aiohttp.ClientSession, epoch: int) -> str: deals_url = f"{self.deals_host}/{epoch}/deals.csv" deals_content = await self.fetch_text(session, deals_url) @@ -94,12 +125,13 @@ async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> Ep return EpochData(epoch=epoch) # Gather all data concurrently - sha, size, poh, txmeta, indices, deals = await asyncio.gather( + sha, size, poh, txmeta, indices, indices_size, deals = await asyncio.gather( self.fetch_text(session, sha_url), self.get_size(session, car_url), self.fetch_text(session, poh_url), self.fetch_text(session, txmeta_url), self.get_indices(session, epoch), + self.get_indices_size(session, epoch), self.get_deals(session, epoch) ) @@ -114,7 +146,8 @@ async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> Ep txmeta=txmeta if txmeta else "n/a", txmeta_url=txmeta_url, deals=deals, - indices=indices + indices=indices, + indices_size=indices_size ) def format_row(self, data: EpochData) -> str: @@ -124,9 +157,10 @@ def format_row(self, data: EpochData) -> str: txmeta_cell = f"[✓]({data.txmeta_url})" if validate_txmeta_output(data.txmeta) else "✗" poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗" indices_cell = "✓" if data.indices != "n/a" else "✗" + indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗" deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" - return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {deals_cell} |" + return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {indices_size_cell} | {deals_cell} |" async def get_current_epoch(self) -> int: async with aiohttp.ClientSession() as session: @@ -141,8 +175,8 @@ async def run(self): current_epoch = await self.get_current_epoch() epochs = range(current_epoch, -1, -1) # descending order - print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Filecoin Deals |") - print("|---|---|---|---|---|---|---|---|") + print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Indices Size | Filecoin Deals |") + print("|---|---|---|---|---|---|---|---|---|") # concurrency levels chunk_size = 20 From f08fbe79256fd309da5a2b05de6adf994a368835 Mon Sep 17 00:00:00 2001 From: Pedro Date: Thu, 12 Dec 2024 10:32:37 +0700 Subject: [PATCH 25/33] remove old file --- docs/CAR-REPORT.md | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 docs/CAR-REPORT.md diff --git a/docs/CAR-REPORT.md b/docs/CAR-REPORT.md deleted file mode 100644 index 2a7f89d8..00000000 --- a/docs/CAR-REPORT.md +++ /dev/null @@ -1,20 +0,0 @@ -| Epoch # | CAR | CAR SHA256 | CAR filesize GB | tx meta check | poh check | CAR data created | Indexes | Filecoin Deals | -|---|---|---|---|---|---|---|---|---| -| 687 | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | -| 686 | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | -| 685 | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | -| 684 | [epoch-684.car](https://files.old-faithful.net/684/epoch-684.car) | n/a | [318](https://files.old-faithful.net/684/epoch-684.car) | | ✓ | 2024-11-29 04:37:58 | ✓ | ✓ | -| 683 | [epoch-683.car](https://files.old-faithful.net/683/epoch-683.car) | n/a | [313](https://files.old-faithful.net/683/epoch-683.car) | | ✓ | 2024-11-29 04:37:59 | ✓ | ✓ | -| 682 | [epoch-682.car](https://files.old-faithful.net/682/epoch-682.car) | n/a | [307](https://files.old-faithful.net/682/epoch-682.car) | | ✓ | 2024-11-29 04:38:01 | ✓ | ✓ | -| 681 | [epoch-681.car](https://files.old-faithful.net/681/epoch-681.car) | n/a | [302](https://files.old-faithful.net/681/epoch-681.car) | | ✓ | 2024-11-29 04:38:03 | ✓ | ✓ | -| 680 | [epoch-680.car](https://files.old-faithful.net/680/epoch-680.car) | n/a | [290](https://files.old-faithful.net/680/epoch-680.car) | | ✓ | 2024-11-29 04:38:04 | ✓ | ✓ | -| 679 | [epoch-679.car](https://files.old-faithful.net/679/epoch-679.car) | n/a | [311](https://files.old-faithful.net/679/epoch-679.car) | | ✓ | 2024-11-29 04:38:06 | ✓ | ✓ | -| 678 | [epoch-678.car](https://files.old-faithful.net/678/epoch-678.car) | n/a | [303](https://files.old-faithful.net/678/epoch-678.car) | | ✓ | 2024-11-29 04:38:08 | ✓ | ✓ | -| 677 | [epoch-677.car](https://files.old-faithful.net/677/epoch-677.car) | n/a | [280](https://files.old-faithful.net/677/epoch-677.car) | | ✓ | 2024-11-29 04:38:09 | ✓ | ✓ | -| 676 | [epoch-676.car](https://files.old-faithful.net/676/epoch-676.car) | n/a | [277](https://files.old-faithful.net/676/epoch-676.car) | | ✓ | 2024-11-29 04:38:11 | ✓ | ✓ | -| 675 | [epoch-675.car](https://files.old-faithful.net/675/epoch-675.car) | n/a | [271](https://files.old-faithful.net/675/epoch-675.car) | | ✓ | 2024-11-29 04:38:13 | ✓ | ✓ | -| 674 | [epoch-674.car](https://files.old-faithful.net/674/epoch-674.car) | n/a | [283](https://files.old-faithful.net/674/epoch-674.car) | | ✓ | 2024-11-29 04:38:14 | ✓ | ✓ | -| 673 | [epoch-673.car](https://files.old-faithful.net/673/epoch-673.car) | n/a | [278](https://files.old-faithful.net/673/epoch-673.car) | | ✓ | 2024-11-29 04:38:16 | ✓ | ✓ | -| 672 | [epoch-672.car](https://files.old-faithful.net/672/epoch-672.car) | n/a | [285](https://files.old-faithful.net/672/epoch-672.car) | | ✓ | 2024-11-29 04:38:18 | ✓ | ✓ | -| 671 | [epoch-671.car](https://files.old-faithful.net/671/epoch-671.car) | n/a | [297](https://files.old-faithful.net/671/epoch-671.car) | | ✓ | 2024-11-29 04:38:19 | ✓ | ✓ | -| 670 | [epoch-670.car](https://files.old-faithful.net/670/epoch-670.car) | n/a | [305](https://files.old-faithful.net/670/epoch-670.car) | | ✓ | 2024-11-29 04:38:21 | ✓ | ✓ | From 3d5294ba5cbdbb74d3de7086c9527e0b75cd7a40 Mon Sep 17 00:00:00 2001 From: Pedro Date: Thu, 16 Jan 2025 12:40:15 +0700 Subject: [PATCH 26/33] show link to failed log when validation fails --- .github/faithful-data-report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index fac6b2cc..8ada04d7 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -154,7 +154,8 @@ def format_row(self, data: EpochData) -> str: car_cell = f"[epoch-{data.epoch}.car]({data.car})" if data.car != "n/a" else "✗" sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗" size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" - txmeta_cell = f"[✓]({data.txmeta_url})" if validate_txmeta_output(data.txmeta) else "✗" + txmeta_cell = f"[✗]({data.txmeta_url})" if data.txmeta != "n/a" and not validate_txmeta_output(data.txmeta) else \ + f"[✓]({data.txmeta_url})" if data.txmeta != "n/a" else "✗" poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗" indices_cell = "✓" if data.indices != "n/a" else "✗" indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗" From d522f8bc1fcf83e163f362964e5d15ce06376795 Mon Sep 17 00:00:00 2001 From: Pedro Date: Thu, 16 Jan 2025 12:41:09 +0700 Subject: [PATCH 27/33] check for new blocktime index --- .github/faithful-data-report.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index 8ada04d7..c37590b1 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -89,6 +89,7 @@ async def get_indices_size(self, session: aiohttp.ClientSession, epoch: int) -> f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", + f"epoch-{epoch}-{bafy}-mainnet-slot-to-blocktime.index", f"epoch-{epoch}-gsfa.index.tar.zstd" ] From a019d8f8833368dc65d37df22415d14cca49cba0 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 22 Jan 2025 17:32:30 +0700 Subject: [PATCH 28/33] same for pohh --- .github/faithful-data-report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index c37590b1..0ff71167 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -157,7 +157,8 @@ def format_row(self, data: EpochData) -> str: size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" txmeta_cell = f"[✗]({data.txmeta_url})" if data.txmeta != "n/a" and not validate_txmeta_output(data.txmeta) else \ f"[✓]({data.txmeta_url})" if data.txmeta != "n/a" else "✗" - poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗" + poh_cell = f"[✗]({data.poh_url})" if data.poh != "n/a" and not validate_poh_output(data.poh) else \ + f"[✓]({data.poh_url})" if data.poh != "n/a" else "✗" indices_cell = "✓" if data.indices != "n/a" else "✗" indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗" deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" From 66cd0ccb494c19e6ad7471165a8cacbc2218d533 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 22 Jan 2025 17:34:08 +0700 Subject: [PATCH 29/33] as --- .github/workflows/data-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/data-report.yml b/.github/workflows/data-report.yml index 86b649a2..3f53cafd 100644 --- a/.github/workflows/data-report.yml +++ b/.github/workflows/data-report.yml @@ -4,7 +4,7 @@ concurrency: name: Data Report Generator on: - push: + # push: schedule: - cron: '3 */4 * * *' # Run once an hour workflow_dispatch: # Allow manual trigger From 15d816e0ca1ed60a7700fc0625ef208e28052299 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 5 Mar 2025 17:46:00 +0700 Subject: [PATCH 30/33] updates --- .github/faithful-data-report.py | 134 +++++++++++++++++++++++++------- 1 file changed, 104 insertions(+), 30 deletions(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index 0ff71167..b2913f4e 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -23,6 +23,16 @@ class FaithfulDataReport: def __init__(self): self.host = "https://files.old-faithful.net" self.deals_host = "https://filecoin-car-storage-cdn.b-cdn.net" + self.txmeta_first_epoch = 92 + self.issues = [] # Track issues for summary report + self.index_files = [ + "mainnet-cid-to-offset-and-size.index", + "mainnet-sig-to-cid.index", + "mainnet-sig-exists.index", + "mainnet-slot-to-cid.index", + "mainnet-slot-to-blocktime.index", + "gsfa.index.tar.zstd" + ] async def check_url(self, session: aiohttp.ClientSession, url: str) -> bool: try: @@ -45,36 +55,73 @@ async def get_size(self, session: aiohttp.ClientSession, url: str) -> str: async with session.head(url) as response: if response.status == 200: size_bytes = int(response.headers.get('content-length', 0)) - size_gb = round(size_bytes / (1024 * 1024 * 1024)) - return str(size_gb) + if size_bytes > 0: + size_gb = max(1, round(size_bytes / (1024 * 1024 * 1024))) + return str(size_gb) except: pass return "n/a" + async def check_gsfa_magic(self, session: aiohttp.ClientSession, epoch: int) -> bool: + """ + Validates the GSFA index manifest by checking its magic version + Returns True if valid, False otherwise + """ + manifest_url = f"{self.host}/{epoch}/gsfa.manifest" + try: + headers = {'Range': 'bytes=8-15'} + async with session.get(manifest_url, headers=headers) as response: + if response.status == 206: # Partial Content + content = await response.read() + # Convert bytes to hex string + hex_content = ''.join([f'{b:02X}' for b in content]) + return hex_content == '0500000000000000' + except: + pass + return False + async def get_indices(self, session: aiohttp.ClientSession, epoch: int) -> str: cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid" # Get the CID first bafy = await self.fetch_text(session, cid_url) if not bafy: + self.issues.append((epoch, ["failed to get CID"])) return "n/a" - # Check all required index files - index_files = [ - f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index", - f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", - f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", - f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", - f"epoch-{epoch}-gsfa.index.tar.zstd" - ] - + # Check all regular index files excluding gsfa + regular_files = self.index_files[:-1] # All files except gsfa checks = await asyncio.gather(*[ - self.check_url(session, f"{self.host}/{epoch}/{file}") - for file in index_files + self.check_url(session, f"{self.host}/{epoch}/epoch-{epoch}-{bafy}-{file}") + for file in regular_files ]) + + # Track which files failed validation + missing_files = [] + for i, exists in enumerate(checks): + if not exists: + missing_files.append(f"missing index file: {regular_files[i]}") + + # Check gsfa file existence and validate its magic version + gsfa_file = self.index_files[-1] + gsfa_exists = await self.check_url(session, f"{self.host}/{epoch}/epoch-{epoch}-{gsfa_file}") + gsfa_valid = True + if not gsfa_exists: + missing_files.append("missing GSFA index file") + else: + gsfa_valid = await self.check_gsfa_magic(session, epoch) + if not gsfa_valid: + missing_files.append("GSFA index file failed magic validation") + + # Add all missing files to issues if any + if missing_files: + self.issues.append((epoch, missing_files)) + + # Add gsfa validation result to checks + checks.append(gsfa_exists and gsfa_valid) return f"{self.host}/{epoch}/epoch-{epoch}-indices" if all(checks) else "n/a" - + async def get_indices_size(self, session: aiohttp.ClientSession, epoch: int) -> str: cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid" @@ -83,20 +130,15 @@ async def get_indices_size(self, session: aiohttp.ClientSession, epoch: int) -> if not bafy: return "n/a" - # Check all required index files - index_files = [ - f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index", - f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index", - f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index", - f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index", - f"epoch-{epoch}-{bafy}-mainnet-slot-to-blocktime.index", - f"epoch-{epoch}-gsfa.index.tar.zstd" - ] - + # Get sizes for all regular index files (excluding gsfa which has a different naming pattern) sizes = await asyncio.gather(*[ - self.get_size(session, f"{self.host}/{epoch}/{file}") - for file in index_files + self.get_size(session, f"{self.host}/{epoch}/epoch-{epoch}-{bafy}-{file}") + for file in self.index_files[:-1] # All files except gsfa ]) + + # Get gsfa size separately since it doesn't include the bafy CID in its filename + gsfa_size = await self.get_size(session, f"{self.host}/{epoch}/epoch-{epoch}-{self.index_files[-1]}") + sizes.append(gsfa_size) # Convert sizes to integers, treating "n/a" as 0 size_ints = [int(size) if size != "n/a" else 0 for size in sizes] @@ -155,14 +197,37 @@ def format_row(self, data: EpochData) -> str: car_cell = f"[epoch-{data.epoch}.car]({data.car})" if data.car != "n/a" else "✗" sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗" size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" - txmeta_cell = f"[✗]({data.txmeta_url})" if data.txmeta != "n/a" and not validate_txmeta_output(data.txmeta) else \ - f"[✓]({data.txmeta_url})" if data.txmeta != "n/a" else "✗" + + # Special handling for early epochs (0-12) txmeta validation + if 0 <= data.epoch < self.txmeta_first_epoch and data.txmeta != "n/a": + txmeta_cell = f"[★]({data.txmeta_url})" + else: + txmeta_cell = f"[✗]({data.txmeta_url})" if data.txmeta != "n/a" and not validate_txmeta_output(data.txmeta) else \ + f"[✓]({data.txmeta_url})" if data.txmeta != "n/a" else "✗" + poh_cell = f"[✗]({data.poh_url})" if data.poh != "n/a" and not validate_poh_output(data.poh) else \ f"[✓]({data.poh_url})" if data.poh != "n/a" else "✗" indices_cell = "✓" if data.indices != "n/a" else "✗" indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗" deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" + # Track issues for summary report + issues = [] + if data.car == "n/a": issues.append("missing CAR") + if data.sha == "n/a": issues.append("missing SHA") + if data.size == "n/a": issues.append("missing size") + if data.poh == "n/a": issues.append("missing POH check") + elif not validate_poh_output(data.poh): issues.append("failed POH check") + if data.txmeta == "n/a": issues.append("missing tx meta check") + elif not validate_txmeta_output(data.txmeta) and not (0 <= data.epoch < self.txmeta_first_epoch): + issues.append("failed tx meta check") + if data.indices == "n/a": issues.append("missing indices") + if data.indices_size == "n/a": issues.append("missing indices size") + #if data.deals == "n/a": issues.append("missing deals") + + if issues: + self.issues.append((data.epoch, issues)) + return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {indices_size_cell} | {deals_cell} |" async def get_current_epoch(self) -> int: @@ -176,10 +241,11 @@ async def get_current_epoch(self) -> int: async def run(self): current_epoch = await self.get_current_epoch() - epochs = range(current_epoch, -1, -1) # descending order - + epochs = range((current_epoch-1), -1, -1) # descending order + print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Indices Size | Filecoin Deals |") print("|---|---|---|---|---|---|---|---|---|") + print("|%s|currently ongoing||||||||" % current_epoch) # concurrency levels chunk_size = 20 @@ -195,6 +261,14 @@ async def run(self): for result in results: print(self.format_row(result)) + print("\n★ = tx meta validation skipped (epochs 0-%s where tx meta wasn't enabled yet)" % self.txmeta_first_epoch) + + # Print summary report + if self.issues: + print("\n### Summary of Issues") + for epoch, issues in sorted(self.issues): + print(f"- Epoch {epoch}: {', '.join(issues)}") + def validate_txmeta_output(txmeta_text: str) -> bool: """ Validates that txmeta check output shows zero missing and zero parsing errors From ac7e56e25beda34574ab3869e20237d19402c598 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 5 Mar 2025 17:48:13 +0700 Subject: [PATCH 31/33] updates --- .github/faithful-data-report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index b2913f4e..f84ffb51 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -198,7 +198,7 @@ def format_row(self, data: EpochData) -> str: sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗" size_cell = f"{data.size} GB" if data.size != "n/a" else "✗" - # Special handling for early epochs (0-12) txmeta validation + # Special handling for earlier epochs txmeta validation if 0 <= data.epoch < self.txmeta_first_epoch and data.txmeta != "n/a": txmeta_cell = f"[★]({data.txmeta_url})" else: From 42214d5ae47cf49d2db3cd57dbdc3f54dc7905e9 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 5 Mar 2025 17:51:56 +0700 Subject: [PATCH 32/33] updates --- .github/faithful-data-report.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index f84ffb51..1831479f 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -18,6 +18,7 @@ class EpochData: deals: str = "n/a" indices: str = "n/a" indices_size: str = "n/a" + slots_url: str = "n/a" class FaithfulDataReport: def __init__(self): @@ -178,6 +179,10 @@ async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> Ep self.get_deals(session, epoch) ) + # Construct slots.txt URL + slots_url = f"{self.host}/{epoch}/{epoch}.slots.txt" + + return EpochData( epoch=epoch, car=car_url, @@ -190,7 +195,8 @@ async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> Ep txmeta_url=txmeta_url, deals=deals, indices=indices, - indices_size=indices_size + indices_size=indices_size, + slots_url=slots_url ) def format_row(self, data: EpochData) -> str: @@ -210,6 +216,7 @@ def format_row(self, data: EpochData) -> str: indices_cell = "✓" if data.indices != "n/a" else "✗" indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗" deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" + slots_cell = f"[slots]({data.slots_url})" if data.slots_url != "n/a" else "✗" # Track issues for summary report issues = [] @@ -228,7 +235,7 @@ def format_row(self, data: EpochData) -> str: if issues: self.issues.append((data.epoch, issues)) - return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {indices_size_cell} | {deals_cell} |" + return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {indices_size_cell} | {deals_cell} | {slots_cell} |" async def get_current_epoch(self) -> int: async with aiohttp.ClientSession() as session: @@ -243,9 +250,9 @@ async def run(self): current_epoch = await self.get_current_epoch() epochs = range((current_epoch-1), -1, -1) # descending order - print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Indices Size | Filecoin Deals |") - print("|---|---|---|---|---|---|---|---|---|") - print("|%s|currently ongoing||||||||" % current_epoch) + print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Indices Size | Filecoin Deals | Slots") + print("|---|---|---|---|---|---|---|---|---|---|") + print("|%s|currently ongoing|||||||||" % current_epoch) # concurrency levels chunk_size = 20 From b9656027ddc8dd6671b660e0399a0f76928a7584 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 5 Mar 2025 18:16:38 +0700 Subject: [PATCH 33/33] s --- .github/faithful-data-report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/faithful-data-report.py b/.github/faithful-data-report.py index ec526501..4dcbb2c1 100644 --- a/.github/faithful-data-report.py +++ b/.github/faithful-data-report.py @@ -216,7 +216,7 @@ def format_row(self, data: EpochData) -> str: indices_cell = "✓" if data.indices != "n/a" else "✗" indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗" deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗" - slots_cell = f"[slots]({data.slots_url})" if data.slots_url != "n/a" else "✗" + slots_cell = f"[{data.epoch}.slots.txt]({data.slots_url})" if data.slots_url != "n/a" else "✗" # Track issues for summary report issues = [] @@ -252,7 +252,7 @@ async def run(self): print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Indices Size | Filecoin Deals | Slots") print("|---|---|---|---|---|---|---|---|---|---|") - print("|%s|currently ongoing|||||||||" % current_epoch) + print("|%s|epoch is|ongoing||||||||" % current_epoch) # concurrency levels chunk_size = 20