nftstorage · francardoso93 · Jan 24, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 26, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
 node_modules
 .env
+out
+dedupe
+pinned
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM cimg/node:16.14.2
+FROM cimg/node:18.13.0
 USER circleci
 RUN mkdir -p /home/circleci/app
 WORKDIR /home/circleci/app

diff --git a/README.md b/README.md
@@ -1,32 +1,29 @@
 # Backup
 
-A tool to backup all data for disaster recovery!
+A tool to backup data from IPFS to an S3 bucket.
 
 ## Usage
 
 Drop a `.env` file in the project root and populate:
 
 ```sh
-DATABASE_CONNECTION=<value>
-RO_DATABASE_CONNECTION=<value>
-IPFS_ADDRS=<value>
+DATA_URL=<value> # URL to ndjson file of objects with a CID property for backing up
 S3_REGION=<value>
-S3_ACCESS_KEY_ID=<value>
-S3_SECRET_ACCESS_KEY=<value>
 S3_BUCKET_NAME=<value>
-REDIS_CONN_STRING=<value> # optional - used to store CIDs that timed out during fetch
+S3_ACCESS_KEY_ID=<value> # optional
+S3_SECRET_ACCESS_KEY=<value> # optional
+CONCURRENCY=<number> # optional
+BATCH_SIZE=<number> # optional
 ```
 
-Replace `DATABASE_CONNECTION` with the connection string for the database you want to write to and `RO_DATABASE_CONNECTION` with the connection string for the database you want to read from, `IPFS_ADDRS` with the multiaddrs of nodes where content can be found and `S3_*` with the relevant S3 bucket details.
-
 Start the backup:
 
 ```sh
 npm start
-# with optional start date:
-npm start -- 2021-12-25
 ```
 
+Use `DEBUG=*` to get detailed debugging info.
+
 The tool writes _complete_ CAR files to the S3 bucket to a path like: `complete/<CID>.car`. Where `CID` is a normalized, v1 base32 encoded CID.
 
 ### Docker

diff --git a/bin.js b/bin.js
@@ -6,17 +6,13 @@ import { startBackup } from './index.js'
 dotenv.config()
 
 startBackup({
-  startDate: process.argv[2] ? new Date(process.argv[2]) : undefined,
-  dbConnString: mustGetEnv('DATABASE_CONNECTION'),
-  roDbConnString: mustGetEnv('RO_DATABASE_CONNECTION'),
-  ipfsAddrs: mustGetEnv('IPFS_ADDRS').split(','),
+  dataURL: mustGetEnv('DATA_URL'),
   s3Region: mustGetEnv('S3_REGION'),
-  s3AccessKeyId: mustGetEnv('S3_ACCESS_KEY_ID'),
-  s3SecretAccessKey: mustGetEnv('S3_SECRET_ACCESS_KEY'),
   s3BucketName: mustGetEnv('S3_BUCKET_NAME'),
-  maxDagSize: process.env.MAX_DAG_SIZE ? parseInt(process.env.MAX_DAG_SIZE) : undefined,
+  s3AccessKeyId: process.env.S3_ACCESS_KEY_ID,
+  s3SecretAccessKey: process.env.S3_SECRET_ACCESS_KEY,
   concurrency: process.env.CONCURRENCY ? parseInt(process.env.CONCURRENCY) : undefined,
-  redisConnString: process.env.REDIS_CONN_STRING
+  batchSize: process.env.BATCH_SIZE ? parseInt(process.env.BATCH_SIZE) : undefined
 })
 
 /**

diff --git a/bindings.d.ts b/bindings.d.ts
diff --git a/candidate.js b/candidate.js
diff --git a/export.js b/export.js