Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save tsibley/e92c67d35491ca9a2c94df27bf934e7f to your computer and use it in GitHub Desktop.
Save tsibley/e92c67d35491ca9a2c94df27bf934e7f to your computer and use it in GitHub Desktop.
From 5de6c904a6be1c775a576f6497e8a7a2c1f2f06b Mon Sep 17 00:00:00 2001
From: James Hadfield <hadfield.james@gmail.com>
Date: Mon, 25 Sep 2023 13:32:49 +1300
Subject: [PATCH] Collect resources from core + staging buckets
This sets out the pattern for reading S3 inventories and turning them
into resource collections. The JSON output will ultimately be used by
nextstrain.org to both provide a listing of available resources and to
be queried by versioned dataset requests (in order to go from a
requested date to the corresponding S3 version IDs of the relevant
objects).
Eventually this flat JSON file may be replaced with a database,
but for now this is a simple way to introduce the functionality. The
collected resources JSON for core + staging is a ~3.2Mb JSON file
(gzipped). When naively loaded into node it increases the total size of
the allocated heap (V8) by ~60Mb (presumably this would be reduced by
mapping certain string constants to variables).
Currently only working for S3 buckets nextstrain-data and
nextstrain-staging. Narratives are not yet considered, in part because
they are not stored on S3.
`node resourceIndexer/main.js --help` for how to run. AWS credentials
with permission to read s3://nextstrain-inventories will need to be set
in the usual way.
---
.gitignore | 3 +
package-lock.json | 494 ++++++++++++++++++++++++++++++-
package.json | 4 +-
resourceIndexer/constants.js | 35 +++
resourceIndexer/coreStagingS3.js | 294 ++++++++++++++++++
resourceIndexer/errors.js | 1 +
resourceIndexer/inventory.js | 255 ++++++++++++++++
resourceIndexer/logger.js | 10 +
resourceIndexer/main.js | 113 +++++++
9 files changed, 1196 insertions(+), 13 deletions(-)
create mode 100644 resourceIndexer/constants.js
create mode 100644 resourceIndexer/coreStagingS3.js
create mode 100644 resourceIndexer/errors.js
create mode 100644 resourceIndexer/inventory.js
create mode 100644 resourceIndexer/logger.js
create mode 100644 resourceIndexer/main.js
diff --git a/.gitignore b/.gitignore
index 8d0b1a69..564a660f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,9 @@
# local key-value storage (e.g. when Redis isn't configured)
/data/kv.db
+# data caches etc for development purposes
+/devData/
+
# Generated by scripts/collect-datasets.js
/data/datasets_influenza.json
/data/datasets_staging.json
diff --git a/package-lock.json b/package-lock.json
index 0d7be3de..e2d64630 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -34,9 +34,11 @@
"jszip": "^3.10.1",
"keyv": "^4.5.4",
"lodash.partition": "^4.6.0",
+ "luxon": "^3.4.3",
"make-fetch-happen": "^10.0.0",
"marked": "^0.7.0",
"mime": "^2.5.2",
+ "neat-csv": "^7.0.0",
"negotiator": "^0.6.2",
"node-fetch": "^2.6.0",
"passport": "^0.4.0",
@@ -45,6 +47,7 @@
"proxy-agent": "^6.3.1",
"raw-body": "^2.4.2",
"session-file-store": "^1.3.1",
+ "winston": "^3.11.0",
"yaml-front-matter": "^4.0.0"
},
"devDependencies": {
@@ -60,7 +63,6 @@
"http-proxy-middleware": "^1.3.1",
"jest": "^27.5.1",
"jest-extended": "^1.1.0",
- "luxon": "^3.0.4",
"nodemon": "^2.0.22",
"request": "^2.88.2",
"start-server-and-test": "^1.11.4"
@@ -4563,6 +4565,24 @@
"integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==",
"dev": true
},
+ "node_modules/@colors/colors": {
+ "version": "1.6.0",
+ "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz",
+ "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==",
+ "engines": {
+ "node": ">=0.1.90"
+ }
+ },
+ "node_modules/@dabh/diagnostics": {
+ "version": "2.0.3",
+ "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz",
+ "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==",
+ "dependencies": {
+ "colorspace": "1.1.x",
+ "enabled": "2.0.x",
+ "kuler": "^2.0.0"
+ }
+ },
"node_modules/@eslint/eslintrc": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.2.tgz",
@@ -8330,6 +8350,11 @@
"integrity": "sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==",
"dev": true
},
+ "node_modules/@types/triple-beam": {
+ "version": "1.3.4",
+ "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.4.tgz",
+ "integrity": "sha512-HlJjF3wxV4R2VQkFpKe0YqJLilYNgtRtsqqZtby7RkVsSs+i+vbyzjtUwpFEdUCKcrGzCiEJE7F/0mKjh0sunA=="
+ },
"node_modules/@types/ws": {
"version": "8.5.3",
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.3.tgz",
@@ -8836,6 +8861,11 @@
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
},
+ "node_modules/async": {
+ "version": "3.2.5",
+ "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
+ "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
+ },
"node_modules/async-limiter": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz",
@@ -10250,6 +10280,15 @@
"integrity": "sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==",
"dev": true
},
+ "node_modules/color": {
+ "version": "3.2.1",
+ "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz",
+ "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==",
+ "dependencies": {
+ "color-convert": "^1.9.3",
+ "color-string": "^1.6.0"
+ }
+ },
"node_modules/color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
@@ -10263,6 +10302,15 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
+ "node_modules/color-string": {
+ "version": "1.9.1",
+ "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
+ "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
+ "dependencies": {
+ "color-name": "^1.0.0",
+ "simple-swizzle": "^0.2.2"
+ }
+ },
"node_modules/color-support": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz",
@@ -10271,6 +10319,15 @@
"color-support": "bin.js"
}
},
+ "node_modules/colorspace": {
+ "version": "1.1.4",
+ "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz",
+ "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==",
+ "dependencies": {
+ "color": "^3.1.3",
+ "text-hex": "1.0.x"
+ }
+ },
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
@@ -10476,6 +10533,28 @@
"integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==",
"dev": true
},
+ "node_modules/csv-parser": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.0.0.tgz",
+ "integrity": "sha512-s6OYSXAK3IdKqYO33y09jhypG/bSDHPuyCme/IdEHfWpLf/jKcpitVFyOC6UemgGk8v7Q5u2XE0vvwmanxhGlQ==",
+ "dependencies": {
+ "minimist": "^1.2.0"
+ },
+ "bin": {
+ "csv-parser": "bin/csv-parser"
+ },
+ "engines": {
+ "node": ">= 10"
+ }
+ },
+ "node_modules/csv-parser/node_modules/minimist": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/dashdash": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
@@ -10802,6 +10881,11 @@
"url": "https://github.com/sindresorhus/emittery?sponsor=1"
}
},
+ "node_modules/enabled": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz",
+ "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ=="
+ },
"node_modules/encodeurl": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
@@ -11858,6 +11942,11 @@
"bser": "2.1.1"
}
},
+ "node_modules/fecha": {
+ "version": "4.2.3",
+ "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz",
+ "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw=="
+ },
"node_modules/file-entry-cache": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
@@ -12006,6 +12095,11 @@
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==",
"dev": true
},
+ "node_modules/fn.name": {
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz",
+ "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw=="
+ },
"node_modules/follow-redirects": {
"version": "1.5.10",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.10.tgz",
@@ -12976,7 +13070,6 @@
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
- "dev": true,
"engines": {
"node": ">=8"
},
@@ -15746,6 +15839,11 @@
"node": ">=6"
}
},
+ "node_modules/kuler": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz",
+ "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A=="
+ },
"node_modules/lazy-ass": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/lazy-ass/-/lazy-ass-1.6.0.tgz",
@@ -15866,6 +15964,27 @@
"resolved": "https://registry.npmjs.org/lodash.partition/-/lodash.partition-4.6.0.tgz",
"integrity": "sha1-o45GtzRp4EILDaEhLmbUFL42S6Q="
},
+ "node_modules/logform": {
+ "version": "2.6.0",
+ "resolved": "https://registry.npmjs.org/logform/-/logform-2.6.0.tgz",
+ "integrity": "sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ==",
+ "dependencies": {
+ "@colors/colors": "1.6.0",
+ "@types/triple-beam": "^1.3.2",
+ "fecha": "^4.2.0",
+ "ms": "^2.1.1",
+ "safe-stable-stringify": "^2.3.1",
+ "triple-beam": "^1.3.0"
+ },
+ "engines": {
+ "node": ">= 12.0.0"
+ }
+ },
+ "node_modules/logform/node_modules/ms": {
+ "version": "2.1.3",
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
+ },
"node_modules/lowercase-keys": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz",
@@ -15883,10 +16002,9 @@
}
},
"node_modules/luxon": {
- "version": "3.0.4",
- "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.0.4.tgz",
- "integrity": "sha512-aV48rGUwP/Vydn8HT+5cdr26YYQiUZ42NM6ToMoaGKwYfWbfLeRkEu1wXWMHBZT6+KyLfcbbtVcoQFCbbPjKlw==",
- "dev": true,
+ "version": "3.4.3",
+ "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.4.3.tgz",
+ "integrity": "sha512-tFWBiv3h7z+T/tDaoxA8rqTxy1CHV6gHS//QdaH4pulbq/JuBSGgQspQQqcgnwdAx6pNI7cmvz5Sv/addzHmUg==",
"engines": {
"node": ">=12"
}
@@ -16552,6 +16670,32 @@
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
"dev": true
},
+ "node_modules/neat-csv": {
+ "version": "7.0.0",
+ "resolved": "https://registry.npmjs.org/neat-csv/-/neat-csv-7.0.0.tgz",
+ "integrity": "sha512-ZmiKZNkdqb6hrBU3lDHm52vWXs6CuFPfw6ZoJZNnY7IIpfA1fxM0UPPi+iQpqQo82qcLbsZPwLkQ1cdrMDtwwA==",
+ "dependencies": {
+ "csv-parser": "^3.0.0",
+ "get-stream": "^6.0.1"
+ },
+ "engines": {
+ "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
+ },
+ "node_modules/neat-csv/node_modules/get-stream": {
+ "version": "6.0.1",
+ "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
+ "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==",
+ "engines": {
+ "node": ">=10"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
+ },
"node_modules/negotiator": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
@@ -17083,6 +17227,14 @@
"wrappy": "1"
}
},
+ "node_modules/one-time": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz",
+ "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==",
+ "dependencies": {
+ "fn.name": "1.x.x"
+ }
+ },
"node_modules/onetime": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@@ -18199,6 +18351,14 @@
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
+ "node_modules/safe-stable-stringify": {
+ "version": "2.4.3",
+ "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.4.3.tgz",
+ "integrity": "sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g==",
+ "engines": {
+ "node": ">=10"
+ }
+ },
"node_modules/safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
@@ -18396,6 +18556,19 @@
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
"integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="
},
+ "node_modules/simple-swizzle": {
+ "version": "0.2.2",
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
+ "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
+ "dependencies": {
+ "is-arrayish": "^0.3.1"
+ }
+ },
+ "node_modules/simple-swizzle/node_modules/is-arrayish": {
+ "version": "0.3.2",
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
+ "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ=="
+ },
"node_modules/simple-update-notifier": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-1.1.0.tgz",
@@ -18639,6 +18812,14 @@
"node": ">= 8"
}
},
+ "node_modules/stack-trace": {
+ "version": "0.0.10",
+ "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz",
+ "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==",
+ "engines": {
+ "node": "*"
+ }
+ },
"node_modules/stack-utils": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.2.tgz",
@@ -19115,6 +19296,11 @@
"url": "https://github.com/sponsors/isaacs"
}
},
+ "node_modules/text-hex": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz",
+ "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg=="
+ },
"node_modules/text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
@@ -19234,6 +19420,14 @@
"node": "*"
}
},
+ "node_modules/triple-beam": {
+ "version": "1.4.1",
+ "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz",
+ "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==",
+ "engines": {
+ "node": ">= 14.0.0"
+ }
+ },
"node_modules/tslib": {
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-1.10.0.tgz",
@@ -19731,6 +19925,66 @@
"node": ">= 0.10.0"
}
},
+ "node_modules/winston": {
+ "version": "3.11.0",
+ "resolved": "https://registry.npmjs.org/winston/-/winston-3.11.0.tgz",
+ "integrity": "sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==",
+ "dependencies": {
+ "@colors/colors": "^1.6.0",
+ "@dabh/diagnostics": "^2.0.2",
+ "async": "^3.2.3",
+ "is-stream": "^2.0.0",
+ "logform": "^2.4.0",
+ "one-time": "^1.0.0",
+ "readable-stream": "^3.4.0",
+ "safe-stable-stringify": "^2.3.1",
+ "stack-trace": "0.0.x",
+ "triple-beam": "^1.3.0",
+ "winston-transport": "^4.5.0"
+ },
+ "engines": {
+ "node": ">= 12.0.0"
+ }
+ },
+ "node_modules/winston-transport": {
+ "version": "4.6.0",
+ "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.6.0.tgz",
+ "integrity": "sha512-wbBA9PbPAHxKiygo7ub7BYRiKxms0tpfU2ljtWzb3SjRjv5yl6Ozuy/TkXf00HTAt+Uylo3gSkNwzc4ME0wiIg==",
+ "dependencies": {
+ "logform": "^2.3.2",
+ "readable-stream": "^3.6.0",
+ "triple-beam": "^1.3.0"
+ },
+ "engines": {
+ "node": ">= 12.0.0"
+ }
+ },
+ "node_modules/winston-transport/node_modules/readable-stream": {
+ "version": "3.6.2",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+ "dependencies": {
+ "inherits": "^2.0.3",
+ "string_decoder": "^1.1.1",
+ "util-deprecate": "^1.0.1"
+ },
+ "engines": {
+ "node": ">= 6"
+ }
+ },
+ "node_modules/winston/node_modules/readable-stream": {
+ "version": "3.6.2",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+ "dependencies": {
+ "inherits": "^2.0.3",
+ "string_decoder": "^1.1.1",
+ "util-deprecate": "^1.0.1"
+ },
+ "engines": {
+ "node": ">= 6"
+ }
+ },
"node_modules/word-wrap": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
@@ -23780,6 +24034,21 @@
"integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==",
"dev": true
},
+ "@colors/colors": {
+ "version": "1.6.0",
+ "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz",
+ "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA=="
+ },
+ "@dabh/diagnostics": {
+ "version": "2.0.3",
+ "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz",
+ "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==",
+ "requires": {
+ "colorspace": "1.1.x",
+ "enabled": "2.0.x",
+ "kuler": "^2.0.0"
+ }
+ },
"@eslint/eslintrc": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-1.3.2.tgz",
@@ -26886,6 +27155,11 @@
"integrity": "sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==",
"dev": true
},
+ "@types/triple-beam": {
+ "version": "1.3.4",
+ "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.4.tgz",
+ "integrity": "sha512-HlJjF3wxV4R2VQkFpKe0YqJLilYNgtRtsqqZtby7RkVsSs+i+vbyzjtUwpFEdUCKcrGzCiEJE7F/0mKjh0sunA=="
+ },
"@types/ws": {
"version": "8.5.3",
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.3.tgz",
@@ -27273,6 +27547,11 @@
}
}
},
+ "async": {
+ "version": "3.2.5",
+ "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
+ "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
+ },
"async-limiter": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz",
@@ -28389,6 +28668,15 @@
"integrity": "sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==",
"dev": true
},
+ "color": {
+ "version": "3.2.1",
+ "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz",
+ "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==",
+ "requires": {
+ "color-convert": "^1.9.3",
+ "color-string": "^1.6.0"
+ }
+ },
"color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
@@ -28402,11 +28690,29 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
+ "color-string": {
+ "version": "1.9.1",
+ "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
+ "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
+ "requires": {
+ "color-name": "^1.0.0",
+ "simple-swizzle": "^0.2.2"
+ }
+ },
"color-support": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz",
"integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg=="
},
+ "colorspace": {
+ "version": "1.1.4",
+ "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz",
+ "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==",
+ "requires": {
+ "color": "^3.1.3",
+ "text-hex": "1.0.x"
+ }
+ },
"combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
@@ -28582,6 +28888,21 @@
}
}
},
+ "csv-parser": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.0.0.tgz",
+ "integrity": "sha512-s6OYSXAK3IdKqYO33y09jhypG/bSDHPuyCme/IdEHfWpLf/jKcpitVFyOC6UemgGk8v7Q5u2XE0vvwmanxhGlQ==",
+ "requires": {
+ "minimist": "^1.2.0"
+ },
+ "dependencies": {
+ "minimist": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="
+ }
+ }
+ },
"dashdash": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
@@ -28835,6 +29156,11 @@
"integrity": "sha512-uDfvUjVrfGJJhymx/kz6prltenw1u7WrCg1oa94zYY8xxVpLLUu045LAT0dhDZdXG58/EpPL/5kA180fQ/qudg==",
"dev": true
},
+ "enabled": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz",
+ "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ=="
+ },
"encodeurl": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
@@ -29622,6 +29948,11 @@
"bser": "2.1.1"
}
},
+ "fecha": {
+ "version": "4.2.3",
+ "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz",
+ "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw=="
+ },
"file-entry-cache": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
@@ -29737,6 +30068,11 @@
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==",
"dev": true
},
+ "fn.name": {
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz",
+ "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw=="
+ },
"follow-redirects": {
"version": "1.5.10",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.10.tgz",
@@ -30498,8 +30834,7 @@
"is-stream": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
- "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
- "dev": true
+ "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg=="
},
"is-typedarray": {
"version": "1.0.0",
@@ -32599,6 +32934,11 @@
"integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==",
"dev": true
},
+ "kuler": {
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz",
+ "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A=="
+ },
"lazy-ass": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/lazy-ass/-/lazy-ass-1.6.0.tgz",
@@ -32704,6 +33044,26 @@
"resolved": "https://registry.npmjs.org/lodash.partition/-/lodash.partition-4.6.0.tgz",
"integrity": "sha1-o45GtzRp4EILDaEhLmbUFL42S6Q="
},
+ "logform": {
+ "version": "2.6.0",
+ "resolved": "https://registry.npmjs.org/logform/-/logform-2.6.0.tgz",
+ "integrity": "sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ==",
+ "requires": {
+ "@colors/colors": "1.6.0",
+ "@types/triple-beam": "^1.3.2",
+ "fecha": "^4.2.0",
+ "ms": "^2.1.1",
+ "safe-stable-stringify": "^2.3.1",
+ "triple-beam": "^1.3.0"
+ },
+ "dependencies": {
+ "ms": {
+ "version": "2.1.3",
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
+ }
+ }
+ },
"lowercase-keys": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz",
@@ -32715,10 +33075,9 @@
"integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA=="
},
"luxon": {
- "version": "3.0.4",
- "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.0.4.tgz",
- "integrity": "sha512-aV48rGUwP/Vydn8HT+5cdr26YYQiUZ42NM6ToMoaGKwYfWbfLeRkEu1wXWMHBZT6+KyLfcbbtVcoQFCbbPjKlw==",
- "dev": true
+ "version": "3.4.3",
+ "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.4.3.tgz",
+ "integrity": "sha512-tFWBiv3h7z+T/tDaoxA8rqTxy1CHV6gHS//QdaH4pulbq/JuBSGgQspQQqcgnwdAx6pNI7cmvz5Sv/addzHmUg=="
},
"make-dir": {
"version": "3.1.0",
@@ -33232,6 +33591,22 @@
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
"dev": true
},
+ "neat-csv": {
+ "version": "7.0.0",
+ "resolved": "https://registry.npmjs.org/neat-csv/-/neat-csv-7.0.0.tgz",
+ "integrity": "sha512-ZmiKZNkdqb6hrBU3lDHm52vWXs6CuFPfw6ZoJZNnY7IIpfA1fxM0UPPi+iQpqQo82qcLbsZPwLkQ1cdrMDtwwA==",
+ "requires": {
+ "csv-parser": "^3.0.0",
+ "get-stream": "^6.0.1"
+ },
+ "dependencies": {
+ "get-stream": {
+ "version": "6.0.1",
+ "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
+ "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg=="
+ }
+ }
+ },
"negotiator": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
@@ -33646,6 +34021,14 @@
"wrappy": "1"
}
},
+ "one-time": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz",
+ "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==",
+ "requires": {
+ "fn.name": "1.x.x"
+ }
+ },
"onetime": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@@ -34491,6 +34874,11 @@
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
+ "safe-stable-stringify": {
+ "version": "2.4.3",
+ "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.4.3.tgz",
+ "integrity": "sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g=="
+ },
"safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
@@ -34657,6 +35045,21 @@
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
"integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="
},
+ "simple-swizzle": {
+ "version": "0.2.2",
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
+ "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
+ "requires": {
+ "is-arrayish": "^0.3.1"
+ },
+ "dependencies": {
+ "is-arrayish": {
+ "version": "0.3.2",
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
+ "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ=="
+ }
+ }
+ },
"simple-update-notifier": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-1.1.0.tgz",
@@ -34845,6 +35248,11 @@
"minipass": "^3.1.1"
}
},
+ "stack-trace": {
+ "version": "0.0.10",
+ "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz",
+ "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg=="
+ },
"stack-utils": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.2.tgz",
@@ -35196,6 +35604,11 @@
}
}
},
+ "text-hex": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz",
+ "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg=="
+ },
"text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
@@ -35290,6 +35703,11 @@
"optional": true,
"peer": true
},
+ "triple-beam": {
+ "version": "1.4.1",
+ "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz",
+ "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg=="
+ },
"tslib": {
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-1.10.0.tgz",
@@ -35684,6 +36102,58 @@
"optional": true,
"peer": true
},
+ "winston": {
+ "version": "3.11.0",
+ "resolved": "https://registry.npmjs.org/winston/-/winston-3.11.0.tgz",
+ "integrity": "sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==",
+ "requires": {
+ "@colors/colors": "^1.6.0",
+ "@dabh/diagnostics": "^2.0.2",
+ "async": "^3.2.3",
+ "is-stream": "^2.0.0",
+ "logform": "^2.4.0",
+ "one-time": "^1.0.0",
+ "readable-stream": "^3.4.0",
+ "safe-stable-stringify": "^2.3.1",
+ "stack-trace": "0.0.x",
+ "triple-beam": "^1.3.0",
+ "winston-transport": "^4.5.0"
+ },
+ "dependencies": {
+ "readable-stream": {
+ "version": "3.6.2",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+ "requires": {
+ "inherits": "^2.0.3",
+ "string_decoder": "^1.1.1",
+ "util-deprecate": "^1.0.1"
+ }
+ }
+ }
+ },
+ "winston-transport": {
+ "version": "4.6.0",
+ "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.6.0.tgz",
+ "integrity": "sha512-wbBA9PbPAHxKiygo7ub7BYRiKxms0tpfU2ljtWzb3SjRjv5yl6Ozuy/TkXf00HTAt+Uylo3gSkNwzc4ME0wiIg==",
+ "requires": {
+ "logform": "^2.3.2",
+ "readable-stream": "^3.6.0",
+ "triple-beam": "^1.3.0"
+ },
+ "dependencies": {
+ "readable-stream": {
+ "version": "3.6.2",
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+ "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+ "requires": {
+ "inherits": "^2.0.3",
+ "string_decoder": "^1.1.1",
+ "util-deprecate": "^1.0.1"
+ }
+ }
+ }
+ },
"word-wrap": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
diff --git a/package.json b/package.json
index 366ec6e2..7e767fc9 100644
--- a/package.json
+++ b/package.json
@@ -49,9 +49,11 @@
"jszip": "^3.10.1",
"keyv": "^4.5.4",
"lodash.partition": "^4.6.0",
+ "luxon": "^3.4.3",
"make-fetch-happen": "^10.0.0",
"marked": "^0.7.0",
"mime": "^2.5.2",
+ "neat-csv": "^7.0.0",
"negotiator": "^0.6.2",
"node-fetch": "^2.6.0",
"passport": "^0.4.0",
@@ -60,6 +62,7 @@
"proxy-agent": "^6.3.1",
"raw-body": "^2.4.2",
"session-file-store": "^1.3.1",
+ "winston": "^3.11.0",
"yaml-front-matter": "^4.0.0"
},
"devDependencies": {
@@ -75,7 +78,6 @@
"http-proxy-middleware": "^1.3.1",
"jest": "^27.5.1",
"jest-extended": "^1.1.0",
- "luxon": "^3.0.4",
"nodemon": "^2.0.22",
"request": "^2.88.2",
"start-server-and-test": "^1.11.4"
diff --git a/resourceIndexer/constants.js b/resourceIndexer/constants.js
new file mode 100644
index 00000000..9ec0404e
--- /dev/null
+++ b/resourceIndexer/constants.js
@@ -0,0 +1,35 @@
+export const DATESTAMP_REGEX = /_\d{4}-\d{2}-\d{2}/;
+
+export const INVALID_AUSPICE_PATTERNS = [/_seq\.json$/, /_sequences\.json$/, /_entropy\.json$/, /_titers\.json$/];
+
+/**
+ * These patterns can be used to classify files which are potentially valid
+ * auspice files. They should be checked in order, with the first match winning.
+ *
+ * Each entry is a tuple of [subresource type, regex match pattern].
+ *
+ * The subresource type (string) is the same as that used internally in the
+ * server code (used when instantiating (sub-)classes of `Subresource`)
+ */
+export const VALID_AUSPICE_PATTERNS = [
+ ["root-sequence", /_root-sequence\.json$/],
+ ["tip-frequencies", /_tip-frequencies\.json$/],
+ ["measurements", /_measurements\.json$/],
+ ["meta", /_meta\.json$/],
+ ["tree", /_tree\.json$/],
+ ["main", /\.json$/],
+]
+
+export const SIDECAR_TYPES = new Set(
+ VALID_AUSPICE_PATTERNS
+ .map(([subresourceType, ]) => subresourceType)
+ .filter((subresourceType) => !['main', 'meta', 'tree'].includes(subresourceType))
+)
+
+/**
+ * Following values taken to match the server's `sourceNameToClass`.
+ */
+export const SOURCE = {
+ CORE: "core",
+ STAGING: "staging",
+}
diff --git a/resourceIndexer/coreStagingS3.js b/resourceIndexer/coreStagingS3.js
new file mode 100644
index 00000000..1f0f524f
--- /dev/null
+++ b/resourceIndexer/coreStagingS3.js
@@ -0,0 +1,294 @@
+import { SOURCE, VALID_AUSPICE_PATTERNS, INVALID_AUSPICE_PATTERNS,
+ DATESTAMP_REGEX, SIDECAR_TYPES } from './constants.js';
+import { collectInventory } from './inventory.js';
+
+/**
+ * The inventory of buckets (especially the core bucket) is in some ways a
+ * historical record of work over the years, but this isn't really what we want
+ * to display to users. As some examples:
+ * - Files which don't match a resource to list should be excluded
+ * - Datestampted files (i.e. _YYYY-MM-DD in the filename) are excluded
+ * (we use S3 versioning instead)
+ *
+ * If the s3 object is to be excluded we return false here.
+ *
+ * In the case where the object represents a (part of) a resource we want to
+ * expose, then we categorise it here by adding the following properties:
+ * - source (STAGING or CORE)
+ * - resourceType (dataset, narrative or intermediate)
+ * - id (the ID by which objects will be grouped together.
+ * For datasets this is the nextstrain.org URL path, without any temporal signifier)
+ * - subresourceType (currently only for resourceType=dataset)
+ */
+function categoriseCoreObjects(item, staging) {
+ const key = item.key;
+ item.source = staging ? SOURCE.STAGING : SOURCE.CORE;
+ item.baseUrl = `https://${item.bucket}.s3.amazonaws.com/${key}`
+ if (key.startsWith('search_')
+ || key.startsWith('manifest_')
+ || key.startsWith('datasets_')
+ ) return false;
+
+ // On the core bucket, directory-like hierarchies are used for intermediate
+ // files. These intermediate files may include files which auspice can
+ // display, but nextstrain.org cannot map URLs to directory-like hierarchies.
+ // There are other resourceTypes here we may consider in the future -- e.g.
+ // model output JSONs
+ if (key.includes("/")) {
+ if (staging===true) return false;
+ if (key.startsWith('files/')) {
+ if (
+ key.includes('/archive/')
+ || key.includes('/test/')
+ || key.includes('/workflows/')
+ || key.includes('/branch/')
+ || key.includes('/trial/')
+ || key.includes('/test-data/')
+ || key.includes('jen_test/')
+ || key.match(/\/nextclade-full-run-[\d-]+--UTC\//)
+ || key.match(/\/\d{4}-\d{2}-\d{2}_results.json/) // forecasts-ncov
+ || key.endsWith('.png') // forecasts-ncov
+ ) {
+ return false;
+ }
+ item.resourceType = 'intermediate';
+ /* The ID is used for grouping. For a nextstrain.org dataset this would be
+ combined with the source to form a nextstrain URL, however that's not
+ applicable here. Instead we use the filepath information without the
+ leading 'files/' and without the (trailing) filename so that different
+ files in the same directory structure get grouped together. For instance,
+ files/ncov/open/x.json -> ncov/open */
+ const fields = key.split('/')
+ item.resourcePath = fields.slice(1, fields.length-1).join('/')
+ return item;
+ }
+ return false;
+ }
+
+ // Some filenames have a double underscore (presumably by mistake)
+ if (key.includes('__')) return false;
+
+ // We don't have narratives on the core/staging buckets, so all that's left is
+ // to check if the key looks like a valid auspice file
+ const auspiceFileInfo = auspiceFile(key);
+ if (!auspiceFileInfo) return false
+ item.resourceType = 'dataset';
+ item.subresourceType = auspiceFileInfo.subresourceType;
+
+ /**
+ * Currently the resourcePath is based completely off the key name,
+ * paralleling how the nextstrain.org URLs of datasets are mapped to resource
+ * paths and then to S3 keys. In the future we may change this in order to
+ * group together files with different s3 key names but which we want to
+ * associate with the same nextstrain.org URL. For example, we may which to
+ * combine the auspice datasets behind `ncov/gisaid/africa` and
+ * `ncov/gisaid/africa/all-time`.
+ */
+ item.resourcePath = auspiceFileInfo.urlPath;
+
+ return item;
+}
+
+/**
+ * Returns false if the filename doesn't appear to be an auspice dataset/sidecar file
+ * Otherwise returns an object with properties resourceType, subresourceType
+ */
+function auspiceFile(filename) {
+ if (filename.match(DATESTAMP_REGEX)) return false;
+ for (const pattern of INVALID_AUSPICE_PATTERNS) {
+ if (filename.match(pattern)) return false;
+ }
+ for (const [type, pattern] of VALID_AUSPICE_PATTERNS) {
+ if (filename.match(pattern)) {
+ return {
+ subresourceType: type,
+ urlPath: filename.replace(pattern, '').replace(/_/g, '/'),
+ }
+ }
+ }
+ return false;
+}
+
+
+/**
+ * Given a list of items (i.e. files) which appear to be valid components of a resource
+ * we want to group them into versioned resources. As an example, we may have
+ * - date: A, files: X_tree.json, X_meta.json
+ * - date: B, files: X_meta.json (invalid)
+ * - date: C, files: X.json
+ * - date: D, files: X.json, X_root-sequence.json
+ * - date: E, files: X_root-sequence.json (invalid)
+ * - date: F, files: X.json, X.json, X_root-sequence.json (valid, pick the newest X.json)
+ * and we want to produce a structure like:
+ * [
+ * {date: F, versions: [{main: versionId, root-sequence: versionId}]},
+ * {date: D, versions: [{main: versionId, root-sequence: versionId}]},
+ * {date: C, versions: [{main: versionId}]},
+ * {date: A, versions: [{v1-meta: versionId, v1-tree: versionId}]}
+ * ]
+ *
+ * The maximum temporal resolution is per-day, in other words if a resource was uploaded
+ * multiple times in a single day then only the last one is used. This matches our
+ * (implicit) expectation when we started used datestamped datasets during the ncov pandemic.
+ * It also covers the (somewhat common, I think) case where datasets were re-uploaded after
+ * an error / omission was noticed.
+ *
+ * The returned object may contain `versions:[]` (empty array) if no valid versions are found.
+ */
+function createVersionedResources(resourceType, id, items) {
+ const groupedByDate = items.reduce((acc, o) => {
+ const date = o.date;
+ if (acc.hasOwnProperty(date)) { // eslint-disable-line no-prototype-builtins
+ acc[date].push(o)
+ } else {
+ acc[date] = [o]
+ }
+ return acc;
+ }, {});
+
+ // Associate each of the files behind this dataset to its version ID
+ const versions = Object.entries(groupedByDate)
+ // sort the groups by the day (first entry: most recent)
+ .sort(([dateA, ], [dateB, ]) => dateA < dateB ? 1 : dateA > dateB ? -1 : 0)
+ // (re-)sort the objects within each day (first entry: most recent).
+ .map(([date, objects]) => [date, objects.sort((a, b) => b.timestamp - a.timestamp)])
+ // convert the objects for each day into resource objects (or false)
+ .map(([date, objects]) => {
+ if (resourceType==='dataset') {
+ return validDataset(id, date, objects);
+ } else if (resourceType==='intermediate') {
+ return validIntermediate(id, date, objects);
+ } else {
+ throw new Error(`Unknown resourceType '${resourceType}' to create versioned resource from`)
+ }
+ })
+ // remove days without a resource object (some days might have files but no valid dataset)
+ .filter((version) => !!version);
+
+ const resource = {versions};
+ return resource;
+}
+
+
+/**
+ * Given a set of files from the same _day_ (S3 keys) return the subset such
+ * that, taken together, they represent a dataset. Often a dataset will be
+ * uploaded multiple times in a single day (often to fix minor mistakes) and we
+ * only want to surface the last-updated dataset on the day. Note that each
+ * individual object provided here is a valid dataset-related file in its own right,
+ * but taken together the objects may not represent a valid dataset, or only a
+ * subset may represent a valid dataset.
+ *
+ * We take the first (i.e. most recent) occurrence of valid files. In theory we
+ * could have a situation where we take a sidecar file that wasn't intended to
+ * be grouped with the auspice json, but I think that's worth the
+ * simplifications it allows here.
+ */
+function validDataset(id, date, objects) {
+ // The `subresources` object represents the maximal possible collection of
+ // subresources for this dataset. The keys are the subresource types, and the
+ // values are false (subresource doesn't exist) or the relevant s3 object.
+ const subresources = Object.fromEntries(
+ VALID_AUSPICE_PATTERNS.map(([subresourceType, ]) => [subresourceType, false])
+ );
+
+ const _firstItem = (type) => objects.filter((o) => o.subresourceType===type)[0];
+
+ /**
+ * Take a v2 dataset over a v2 dataset _even if_ the v1 dataset was uploaded
+ * more recently. (This is not hypothetical - it is the case for /zika as of
+ * 2023-11-01.) This is almost certainly an unintentional situation, and the
+ * behaviour of the nextstrain.org server is to look for a v2 dataset and use
+ * that, irregardless of whether a v1 dataset exists.
+ */
+ const types = new Set(objects.map((o) => o.subresourceType));
+ if (types.has('main')) {
+ subresources.main = _firstItem('main');
+ } else if (types.has('meta') && types.has('tree')) {
+ subresources.meta = _firstItem('meta');
+ subresources.tree = _firstItem('tree');
+ } else {
+ /* It isn't unexpected to encounter days with auspice-like files but no
+ valid dataset. Looking at the core bucket in late 2023 identified ~2700 such
+ days. It seems this is (mostly?) due to delete markers being added for
+ certain files and so looking at dates prior to the delete marker(s) we only
+ see a subset of the files which were actually uploaded on that day. */
+ return false;
+ }
+
+ ([...types]).filter((subresourceType) => SIDECAR_TYPES.has(subresourceType))
+ .forEach((subresourceType) => {
+ subresources[subresourceType] = _firstItem(subresourceType);
+ })
+
+ return {
+ date,
+ fileUrls: Object.fromEntries(
+ Object.entries(subresources).map(([subresourceType, s3object]) => {
+ if (!s3object.versionId) { // (bucket unversioned)
+ return [subresourceType, s3object.baseUrl]
+ }
+ return [subresourceType, `${s3object.baseUrl}?versionId=${encodeURIComponent(s3object.versionId)}`]
+ })
+ )
+ };
+}
+
+/**
+ * For a set of intermediate files (on a given day), return the subset to be
+ * represented by the resource. We don't perform any filename-based pruning at
+ * the moment, so the files for the resource are everything on the bucket which
+ * was assigned the same ID - this includes the same "file" under different
+ * compression schemes (etc), as that results in a different filename (key).
+ * If multiple files exist on the same day the first (most recent) is taken.
+ */
+function validIntermediate(id, date, objects) {
+ const seenKeys = new Set();
+ return {
+ date,
+ fileUrls: Object.fromEntries(
+ objects
+ .filter((o) => {
+ if (seenKeys.has(o.key)) return false;
+ seenKeys.add(o.key)
+ return true;
+ })
+ .map((s3object) => {
+ const filename = s3object.key.split('/').pop();
+ const url = s3object.versionId ?
+ s3object.baseUrl :
+ `${s3object.baseUrl}?versionId=${encodeURIComponent(s3object.versionId)}`;
+ return [filename, url]
+ })
+ )
+ };
+}
+
+
+export const coreS3Data = {
+ name: 'core',
+ async collect({local}) {
+ return await collectInventory({
+ name: this.name,
+ local,
+ inventoryBucket: "nextstrain-inventories",
+ inventoryPrefix: "nextstrain-data/config-v1/"
+ })
+ },
+ categorise: (item) => categoriseCoreObjects(item, false),
+ createResource: createVersionedResources
+};
+
+export const stagingS3Data = {
+ name: 'staging',
+ async collect({local}) {
+ return await collectInventory({
+ name: this.name,
+ local,
+ inventoryBucket: "nextstrain-inventories",
+ inventoryPrefix: "nextstrain-staging/config-v1/"
+ })
+ },
+ categorise: (item) => categoriseCoreObjects(item, true),
+ createResource: createVersionedResources
+};
diff --git a/resourceIndexer/errors.js b/resourceIndexer/errors.js
new file mode 100644
index 00000000..7d5bff2c
--- /dev/null
+++ b/resourceIndexer/errors.js
@@ -0,0 +1 @@
+export class ResourceIndexerError extends Error {}
diff --git a/resourceIndexer/inventory.js b/resourceIndexer/inventory.js
new file mode 100644
index 00000000..c1c0ac74
--- /dev/null
+++ b/resourceIndexer/inventory.js
@@ -0,0 +1,255 @@
+import * as fs from 'node:fs/promises';
+import neatCsv from 'neat-csv';
+import zlib from 'zlib';
+import { promisify } from 'util';
+import AWS from 'aws-sdk';
+import {logger} from './logger.js';
+import { DateTime } from 'luxon';
+import escapeStringRegexp from 'escape-string-regexp';
+import { ResourceIndexerError } from './errors.js';
+const gunzip = promisify(zlib.gunzip)
+
+/**
+ * Fetches and reads the latest inventory from the provided bucket/prefix:
+ * - finds the most recent manifest.json via comparison of timestamps in keys
+ * - uses this manifest.json to get the schema + key of the actual inventory
+ * - gets the actual inventory & returns the data as an object[] with keys from the schema
+ *
+ * Note that we only read a maximum of 999 keys from the provided bucket+prefix. A typical inventory
+ * update adds ~4 keys, so this should allow for ~8 months of inventories. The bucket where inventories
+ * are stored should use lifecycles to expire objects.
+ *
+ * Returns an object with properties:
+ * - inventory: object[] list of entries in the inventory, using the schema to define keys
+ * - versionsExist: boolean are key versions present within the bucket?
+ */
+const fetchInventoryRemote = async ({bucket, prefix, name}) => {
+ const S3 = new AWS.S3();
+ const _prefix = escapeStringRegexp(prefix.replace(/\/*$/, "/"));
+ const manifestKeyPattern = new RegExp(`^${_prefix}\\d{4}-\\d{2}-\\d{2}T\\d{2}-\\d{2}Z/manifest\\.json$`);
+ const manifestKey = await new Promise((resolve, reject) => {
+ S3.listObjectsV2({Bucket: bucket, Prefix: prefix, MaxKeys: 999}, (err, data) => {
+ if (err) return reject(err);
+ const orderedKeys = data.Contents
+ .map((object) => object.Key)
+ .filter((key) => key.match(manifestKeyPattern))
+ .sort() // keys are identical except for a YYYY-MM-DDTHH-MMZ timestamp within the key itself
+ .reverse(); // now sorted most recent object first
+ if (orderedKeys.length===0) reject("No valid inventory manifest.json found")
+ resolve(orderedKeys[0])
+ });
+ });
+ logger.info(`inventory for ${name} - manifest key: ${manifestKey}`)
+
+ const {schema, inventoryKey, versionsExist} = await S3.getObject({Bucket: bucket, Key: manifestKey})
+ .promise()
+ .then((response) => _parseManifest(JSON.parse(response.Body.toString('utf-8'))));
+
+ logger.info(`inventory for ${name} - parsed manifest JSON`)
+
+ const inventory = await S3.getObject({Bucket: bucket, Key: inventoryKey})
+ .promise()
+ .then((response) => gunzip(response.Body))
+ .then((data) => neatCsv(data, schema));
+
+ logger.info(`inventory for ${name} - fetched ${inventory.length} rows`)
+ return {inventory, versionsExist};
+}
+
+/**
+ * Parse an on-disk inventory. This expects the following files to be present:
+ * - `./devData/${name}.manifest.json`
+ * - `./devData/${name}.inventory.csv.gz`
+ *
+ * Returns an object with properties:
+ * - inventory: object[] list of entries in the inventory, using the schema to define keys
+ * - versionsExist: boolean are key versions present within the bucket?
+ */
+const fetchInventoryLocal = async ({name}) => {
+ const manifestPath = `./devData/${name}.manifest.json`;
+ const inventoryPath = `./devData/${name}.inventory.csv.gz`;
+ logger.info(`inventory for ${name} -- reading S3 inventories from ${manifestPath} and ${inventoryPath}`);
+ const manifest = JSON.parse(await fs.readFile(manifestPath));
+ const {schema, versionsExist} = _parseManifest(manifest);
+ const inventory = await neatCsv(await gunzip(await fs.readFile(inventoryPath)), schema);
+ logger.info(`inventory for ${name} - read ${inventory.length} rows from the local file`)
+ return {inventory, versionsExist};
+}
+
+
+/**
+ * Returns a list of objects in the requested S3 inventory, which itself represents a list of
+ * objects + versions within a specific bucket+prefix. Keys before a delete marker are excluded
+ */
+const parseInventory = async ({objects, versionsExist}) => {
+ // Ensure all objects are chronological
+ objects.map((item) => {
+ item.timestamp = DateTime.fromISO(item.LastModifiedDate)
+ return item;
+ }).sort((a, b) => b.timestamp - a.timestamp);
+
+ objects = versionsExist ? _checkVersionedObjects(objects) : _checkNonVersionedObjects(objects);
+ objects = _removeDeletedObjects(objects);
+
+ /* rename / prune / add properties as I find the default S3 properties /
+ values awkward to work with */
+ return objects.map((item) => {
+ return {
+ timestamp: item.timestamp,
+ date: item.LastModifiedDate.split("T")[0],
+ key: item.Key,
+ bucket: item.Bucket,
+ versionId: item.VersionId, // will be undefined if bucket is not versioned
+ latest: versionsExist ? item.IsLatest==='true' : true,
+ }
+ });
+}
+
+
+/**
+ * Fetch and parse the latest inventory in the inventoryBucket / inventoryPrefix
+ * _or_ source a local inventory file (useful for dev purposes to avoid constant
+ * downloads from S3)
+ * @returns S3Object[]
+ */
+export const collectInventory = async ({name, local, inventoryBucket, inventoryPrefix}) => {
+ let objects, versionsExist;
+ try {
+ const fetchInventory = local ? fetchInventoryLocal : fetchInventoryRemote;
+ ({ inventory: objects, versionsExist} = await fetchInventory(
+ {bucket: inventoryBucket, prefix: inventoryPrefix, name}
+ ));
+ } catch (e) {
+ logger.error(`There was an error while fetching the S3 inventory for ${name}. This is fatal.`)
+ throw e;
+ }
+ return await parseInventory({objects, versionsExist})
+}
+
+/**
+ * For a versioned bucked, ensure that version ID is present on every object
+ * by filtering out those without a valid-looking version ID. For instance,
+ * s3://nextstrain-data/WNV_NA_tree.json from 2018-05-09 has an empty-string version ID.
+ * These may represent objects from before versioning was enabled.
+ * @param {S3Item[]} Objects chronologically sorted, latest first
+ */
+function _checkVersionedObjects(objects) {
+ const keysSeen = new Set();
+
+ return objects.filter((item) => {
+ if (!item.VersionId) {
+ logger.verbose(`Object ${item.Bucket}/${item.Key} is missing a versionId but the bucket is versioned. The item will be ignored.`);
+ return false;
+ }
+ if (!item.hasOwnProperty('IsLatest')) { // eslint-disable-line no-prototype-builtins
+ logger.verbose(`Object ${item.Bucket}/${item.Key} is (unexpectedly) missing the IsLatest property. The item will be ignored.`);
+ return false;
+ }
+ return true;
+ })
+ .map((item) => {
+ if (item.IsLatest === 'true') {
+ if (keysSeen.has(item.Key)) {
+ throw new ResourceIndexerError(`
+ These appears to be something amiss for S3 objects ${item.Bucket}/${item.Key}.
+ Specifically, the version ${item.VersionId} is considered by S3 to be the latest,
+ however it is not the most recent after sorting on LastModified.
+ This may result in an invalid index and so this is a fatal error.
+ `.replace(/\s+/g, ' '))
+ }
+ keysSeen.add(item.Key);
+ } else {
+ if (!keysSeen.has(item.Key)) {
+ throw new ResourceIndexerError(`
+ These appears to be something amiss for S3 objects ${item.Bucket}/${item.Key}.
+ Specifically, the most recent object (via sorting on LastModified, version ID:
+ ${item.VersionId}) is not classified by S3 as the latest.
+ This may result in an invalid index and so this is a fatal error.
+ `.replace(/\s+/g, ' '))
+ }
+ }
+ return item;
+ })
+}
+
+/**
+ * For a non-versioned object, check that the VersionId is _not_ present and that keys are never duplicated.
+ * Adds the property 'IsLatest' = 'true' for every object
+ */
+function _checkNonVersionedObjects(objects) {
+ const keys = new Set();
+ objects.forEach((item) => {
+ if (item.hasOwnProperty('VersionId')) { // eslint-disable-line no-prototype-builtins
+ logger.verbose(`Object ${item.Bucket}/${item.Key} has a versionId ('${item.VersionId}') but the bucket is not versioned! The item will be ignored.`);
+ return false;
+ }
+ if (keys.has(item.Key)) {
+ throw new ResourceIndexerError(`
+ The S3 Object for ${item.Bucket}/${item.Key} (unexpectedly) appears multiple times in an un-versioned bucket.
+ This may result in a corrupted index and so is a fatal error.
+ `.replace(/\s+/g, ' '))
+ }
+ keys.add(item.Key);
+ })
+ return objects;
+
+}
+
+/**
+ * Removed 'deleted' objects in a (versioned) bucket. Versions more recent than
+ * the most recent delete marker will _not_ be removed. Delete markers
+ * themselves will be removed.
+ *
+ * Non-versioned buckets don't have delete markers, and it's safe to run this
+ * function for them.
+ */
+function _removeDeletedObjects(objects) {
+
+ // Store the most recent delete makers. Keys are s3 keys, values are the timestamp of the delete marker
+ const deleteMarkers = {};
+
+ return objects
+ .filter((item) => {
+ if (item.IsDeleteMarker === "true") {
+ if (!deleteMarkers[item.Key] || deleteMarkers[item.Key]<item.timestamp) {
+ deleteMarkers[item.Key] = item.timestamp;
+ }
+ return false;
+ }
+ return true;
+ })
+ .filter((item) => {
+ if (deleteMarkers[item.Key]) {
+ if (item.timestamp <= deleteMarkers[item.Key]) {
+ return false;
+ }
+ }
+ return true;
+ })
+}
+
+/**
+ * Parses a S3 inventory manifest JSON file
+ * @param {object} manifest
+ * @returns {object} object.schema = string[]
+ * object.inventoryKey = string
+*/
+function _parseManifest(manifest) {
+ if (manifest.files.length>1) {
+ throw new ResourceIndexerError(`
+ The manifest file for the S3 inventory for bucket ${manifest.sourceBucket}
+ includes more than one inventory file. This situation was not encountered
+ during development, but this is presumably caused by the inventory size
+ exceeding some threshold and being chunked into multiple files. Please check
+ this is indeed the case and, if so, amend the code to parse and join each file.
+ `.replace(/\s+/g, ' '))
+ }
+ const schema = manifest.fileSchema.split(",").map((f) => f.trim());
+ return {
+ schema,
+ inventoryKey: manifest.files[0].key,
+ // Buckets without versioning cannot produce inventories with VersionId
+ versionsExist: schema.includes('VersionId'),
+ }
+}
+
diff --git a/resourceIndexer/logger.js b/resourceIndexer/logger.js
new file mode 100644
index 00000000..1f307713
--- /dev/null
+++ b/resourceIndexer/logger.js
@@ -0,0 +1,10 @@
+import { createLogger, transports } from 'winston';
+
+const logger = createLogger({
+ level: 'info',
+ transports: [new transports.Console()],
+});
+
+export {
+ logger,
+}
\ No newline at end of file
diff --git a/resourceIndexer/main.js b/resourceIndexer/main.js
new file mode 100644
index 00000000..5f234ef8
--- /dev/null
+++ b/resourceIndexer/main.js
@@ -0,0 +1,113 @@
+
+import { ArgumentParser } from 'argparse';
+import fs from 'fs';
+import { coreS3Data, stagingS3Data } from "./coreStagingS3.js";
+import {logger} from './logger.js';
+import zlib from 'zlib';
+import { promisify } from 'util';
+import { ResourceIndexerError } from './errors.js';
+
+const gzip = promisify(zlib.gzip)
+
+/**
+ * We define a number of collections which each represent some listing of
+ * nextstrain resources. The actual details are deferred to the provided
+ * collection objects - e.g. they may represent a GitHub repo listing, an S3
+ * inventory. Each of these collections provides functions which allow items
+ * (files) across collections to be collected into a master list of resources
+ * using three identifiers: source, resourceType and resourcePath. The intention
+ * is for source to parallel the information in the corresponding Source
+ * (sub-)class and resourcePath to parallel the information in the Resource
+ * (sub-)class.
+ *
+ * Currently only core & staging sources + datasets & intermediates are part of
+ * the index. For instance, the core WNV/NA (nextstrain.org/WNV/NA) dataset is
+ * indexed like so:
+ *
+ * core → dataset → WNV/NA → versions -> [
+ * {date: "2021-04-08", fileUrls: {main: ...},
+ * {date: "2019-08-30", fileUrls: {meta: ..., tree: ...}
+ * ]
+ *
+ */
+const COLLECTIONS = [
+ coreS3Data,
+ stagingS3Data,
+];
+
+function parseArgs() {
+ const argparser = new ArgumentParser({
+ description: `
+ Fetch file lists from a number of provided collections (e.g. S3 inventories) and collect them into
+ resources. Resources are organised in a hierarchical fashion via source → resourceType → resourcePath.
+ Each resource contains a list of available versions, where applicable.
+ The output JSON is intended for consumption by the nextstrain.org server.
+ `,
+ });
+ argparser.addArgument("--local", {action: 'storeTrue',
+ help: 'Access a local copy of S3 inventories within ./devData/. See docstring of fetchInventoryLocal() for expected filenames.'})
+ argparser.addArgument("--collections", {metavar: "<name>", type: "string", nargs: '+',
+ help: "Only fetch data from a subset of collections. Source names are those defined in COLLECTIONS"});
+ argparser.addArgument("--output", {metavar: "<json>", required: true})
+ argparser.addArgument("--indent", {action: 'storeTrue', help: 'Indent the output JSON'})
+ argparser.addArgument("--gzip", {action: 'storeTrue', help: 'GZip the output JSON'})
+ argparser.addArgument("--verbose", {action: 'storeTrue', help: 'Verbose logging'})
+
+ return argparser.parseArgs();
+}
+
+
+main(parseArgs())
+ .catch((err) => {
+ logger.error(err.message);
+ if (!(err instanceof ResourceIndexerError)) {
+ console.trace(err);
+ }
+ })
+
+
+async function main(args) {
+
+ if (args.verbose) {
+ logger.transports.forEach((t) => t.level = 'verbose');
+ }
+
+ const resources = {};
+
+ for (const collection of COLLECTIONS) {
+ if (args.collections && !args.collections.includes(collection.name)) {
+ continue
+ }
+
+ const groupedObjects = (await collection.collect({local: args.local}))
+ .map(collection.categorise)
+ .filter((item) => !!item)
+ // Collect together all items ("files") based on their assigned resourceType & resourcePath
+ .reduce((store, item) => {
+ const {resourceType, resourcePath, source} = item;
+ if (!store[source]) store[source]={}
+ if (!store[source][resourceType]) store[source][resourceType]={}
+ if (!store[source][resourceType][resourcePath]) store[source][resourceType][resourcePath]=[]
+ store[source][resourceType][resourcePath].push(item);
+ return store;
+ }, {});
+
+ for (const source of Object.keys(groupedObjects)) {
+ for (const resourceType of Object.keys(groupedObjects[source])) {
+ for (const [resourcePath, items] of Object.entries(groupedObjects[source][resourceType])) {
+ const resource = collection.createResource(resourceType, resourcePath, items);
+ if (resource.versions.length===0) continue;
+ if (!resources[source]) resources[source]={}
+ if (!resources[source][resourceType]) resources[source][resourceType]={}
+ resources[source][resourceType][resourcePath] = resource;
+ }
+ }
+ }
+ }
+
+ let output = JSON.stringify(resources, null, args.indent ? 2 : null);
+ if (args.gzip) {
+ output = await gzip(output)
+ }
+ fs.writeFileSync(args.output, output);
+}
\ No newline at end of file
--
2.42.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment