diff --git a/ingest/fc/fc-crawl/docker-compose.yml b/ingest/fc/fc-crawl/docker-compose.yml index 577359c..ccabfeb 100644 --- a/ingest/fc/fc-crawl/docker-compose.yml +++ b/ingest/fc/fc-crawl/docker-compose.yml @@ -130,6 +130,7 @@ services: - "PORT=8010" - "NODE_ENV=production" - "PUPPETEER_CLUSTER_SIZE=64" + # The renderer supports substituting the @VERSION@ - "USER_AGENT_ADDITIONAL=bl.uk_ldfc_renderbot/@VERSION@ (+https://www.bl.uk/legal-deposit/web-archiving)" volumes: - "${STORAGE_PATH}/heritrix/wren:/output/warcs" diff --git a/ingest/fc/prod/create-webrender-network.sh b/ingest/fc/prod/create-webrender-network.sh deleted file mode 100755 index becd4b8..0000000 --- a/ingest/fc/prod/create-webrender-network.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -docker network create --driver overlay --subnet 10.1.0.0/16 --attachable fc_crawl_webrender_network diff --git a/ingest/fc/prod/deploy-fc-crawl.sh b/ingest/fc/prod/deploy-fc-crawl.sh index 445932a..7c8c116 100755 --- a/ingest/fc/prod/deploy-fc-crawl.sh +++ b/ingest/fc/prod/deploy-fc-crawl.sh @@ -1,7 +1,7 @@ #!/bin/sh set -e ENVFILE=$1 -DEBUG=1 +DEBUG= # read environment file @@ -35,7 +35,6 @@ for _d in ${HERITRIX_OUTPUT_PATH} ${HERITRIX_WREN_PATH} ${SURTS_NPLD_PATH} ${SUR } fi done -exit # start FC crawler stack docker stack deploy -c ../fc-crawl/docker-compose.yml fc_crawl diff --git a/ingest/fc/prod/env-aws-fc2023-prod.sh b/ingest/fc/prod/env-aws-fc2023-prod.sh index fbf017e..ea08986 100644 --- a/ingest/fc/prod/env-aws-fc2023-prod.sh +++ b/ingest/fc/prod/env-aws-fc2023-prod.sh @@ -21,6 +21,7 @@ export WARCPROX_PATH=${STORAGE_PATH}/warcprox # crawler details export CRAWL_HOST_LAN_IP=172.31.43.254 export CRAWL_HOST_WAN_IP=18.130.205.6 -export H3_UID=$(id -u) -export HERITRIX_VERSION=2.9.0 +#export H3_UID=$(id -u) +export H3_UID=0 +export HERITRIX_VERSION=2.9.3 export CDXSERVER_ENDPOINT=http://${CRAWL_HOST_LAN_IP}:8081/fc