diff --git a/src/script/hostgraph/hostgraph_config.sh b/src/script/hostgraph/hostgraph_config.sh index 27a6aa9..46a9672 100644 --- a/src/script/hostgraph/hostgraph_config.sh +++ b/src/script/hostgraph/hostgraph_config.sh @@ -12,7 +12,7 @@ ### saved as tuples # crawls to be processed -CRAWLS=("CC-MAIN-2023-40" "CC-MAIN-2023-50" "CC-MAIN-2024-10") +CRAWLS=("CC-MAIN-2023-50" "CC-MAIN-2024-10" "CC-MAIN-2024-18") INPUT_BASE_URL="s3://commoncrawl/" @@ -42,7 +42,7 @@ S3A_OUTPUT_PREFIX=s3a://commoncrawl-webgraph ################################################################################ # construct a merged graph of multiple monthly crawls -MERGE_NAME=cc-main-2023-24-sep-nov-feb +MERGE_NAME=cc-main-2024-nov-feb-apr # Naming convention should be the three months' crawls that are # used to generate this graph release. In the event of multiple months