Skip to content

Instantly share code, notes, and snippets.

@tsibley
Created October 31, 2023 23:59
Show Gist options
  • Save tsibley/3fd84b5bae812d50ed73caa471bbd036 to your computer and use it in GitHub Desktop.
Save tsibley/3fd84b5bae812d50ed73caa471bbd036 to your computer and use it in GitHub Desktop.
diff --git a/resourceIndexer/constants.js b/resourceIndexer/constants.js
index f22e209e..ce67d5f8 100644
--- a/resourceIndexer/constants.js
+++ b/resourceIndexer/constants.js
@@ -7,16 +7,21 @@ export const INVALID_AUSPICE_PATTERNS = [/_seq\.json$/, /_sequences\.json$/, /_e
* The keys here represent the subresource type of these files used internally
* in the server code (encoded there as the 2nd argument when instantiating
* (sub-)classes of `Subresource`)
+ *
+ * Matched against filenames to classify them, in order, with the first match
+ * "winning".
+ *
+ * Any filename that doesn't match a pattern here will not be classified as an
+ * Auspice file.
*/
-export const VALID_AUSPICE_PATTERNS = {
- "root-sequence": /_root-sequence\.json$/,
- "tip-frequencies": /_tip-frequencies\.json$/,
- measurements: /_measurements\.json$/,
- meta: /_meta\.json$/,
- tree: /_tree\.json$/,
-}
-
-export const MAIN_DATASET_JSON = "main";
+export const VALID_AUSPICE_PATTERNS = new Map([
+ ["root-sequence", /_root-sequence\.json$/],
+ ["tip-frequencies", /_tip-frequencies\.json$/],
+ ["measurements", /_measurements\.json$/],
+ ["meta", /_meta\.json$/],
+ ["tree", /_tree\.json$/],
+ ["main", /\.json$/],
+]);
/**
* Following values taken to match the server's `sourceNameToClass`.
diff --git a/resourceIndexer/coreStagingS3.js b/resourceIndexer/coreStagingS3.js
index ad1cce9d..5beae18e 100644
--- a/resourceIndexer/coreStagingS3.js
+++ b/resourceIndexer/coreStagingS3.js
@@ -1,6 +1,6 @@
import {logger} from './logger.js';
import { SOURCE, VALID_AUSPICE_PATTERNS, INVALID_AUSPICE_PATTERNS,
- DATESTAMP_REGEX, MAIN_DATASET_JSON } from './constants.js';
+ DATESTAMP_REGEX } from './constants.js';
/**
* The inventory of buckets (especially the core bucket) is in some ways a
@@ -89,20 +89,20 @@ function categoriseCoreObjects(item, staging) {
* Otherwise returns an object with properties resourceType, subresourceType
*/
function auspiceFile(filename) {
- if (!filename.endsWith('.json')) return false;
if (filename.match(DATESTAMP_REGEX)) return false;
for (const pattern of INVALID_AUSPICE_PATTERNS) {
if (filename.match(pattern)) return false;
}
- let subresourceType = MAIN_DATASET_JSON; // this is the default _unless_ it looks like something else
- let urlPath = filename.replace('.json', '').replace(/_/g, '/');
- for (const [type, pattern] of Object.entries(VALID_AUSPICE_PATTERNS)) {
+ // First match wins
+ for (const [type, pattern] of VALID_AUSPICE_PATTERNS) {
if (filename.match(pattern)) {
- subresourceType = type;
- urlPath = filename.replace(pattern, '').replace(/_/g, '/');
+ return {
+ subresourceType: type,
+ urlPath: filename.replace(pattern, '').replace(/_/g, '/'),
+ };
}
}
- return {subresourceType, urlPath};
+ return false;
}
@@ -188,16 +188,16 @@ function validDataset(id, date, objects) {
// in a most-recent wins approach.
// Properties: subresource types
// Values: false if not present else the S3 object
- const subresources = Object.fromEntries(Object.keys(VALID_AUSPICE_PATTERNS)
- .map((subresourceType) => [subresourceType, false]));
- subresources[MAIN_DATASET_JSON] = false;
+ const subresources = Object.fromEntries(
+ Array.from(VALID_AUSPICE_PATTERNS.keys())
+ .map((subresourceType) => [subresourceType, false]));
// most recent first (they're all the same day, BTW)
objects.sort((a, b) => a.timestamp < b.timestamp ? 1 : a.timestamp > b.timestamp ? -1 : 0)
let valid = false;
for (const o of objects) {
const type = o.subresourceType;
- if (type===MAIN_DATASET_JSON) {
+ if (type==="main") {
subresources[type] = o;
// Ensure a single v1 JSON (i.e. either meta OR tree) is removed
subresources.meta = false;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment