From 715c8faeade9a25b21b057995b3c3de4a11e6825 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 19 Dec 2024 19:08:05 +0100 Subject: [PATCH] chore: update hostgraph configuration for cc-main-2024-oct-nov-dec --- src/script/hostgraph/hostgraph_config.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/script/hostgraph/hostgraph_config.sh b/src/script/hostgraph/hostgraph_config.sh index b7bd0ad..b67c161 100644 --- a/src/script/hostgraph/hostgraph_config.sh +++ b/src/script/hostgraph/hostgraph_config.sh @@ -12,7 +12,7 @@ ### saved as tuples # crawls to be processed -CRAWLS=("CC-MAIN-2024-38" "CC-MAIN-2024-42" "CC-MAIN-2024-46") +CRAWLS=("CC-MAIN-2024-42" "CC-MAIN-2024-46" "CC-MAIN-2024-51") INPUT_BASE_URL="s3://commoncrawl/" @@ -42,7 +42,7 @@ S3A_OUTPUT_PREFIX=s3a://commoncrawl-webgraph ################################################################################ # construct a merged graph of multiple monthly crawls -MERGE_NAME=cc-main-2024-sep-oct-nov +MERGE_NAME=cc-main-2024-oct-nov-dec # Naming convention should be the three months' crawls that are # used to generate this graph release. In the event of multiple months