Created
October 31, 2023 23:59
-
-
Save tsibley/3fd84b5bae812d50ed73caa471bbd036 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/resourceIndexer/constants.js b/resourceIndexer/constants.js | |
index f22e209e..ce67d5f8 100644 | |
--- a/resourceIndexer/constants.js | |
+++ b/resourceIndexer/constants.js | |
@@ -7,16 +7,21 @@ export const INVALID_AUSPICE_PATTERNS = [/_seq\.json$/, /_sequences\.json$/, /_e | |
* The keys here represent the subresource type of these files used internally | |
* in the server code (encoded there as the 2nd argument when instantiating | |
* (sub-)classes of `Subresource`) | |
+ * | |
+ * Matched against filenames to classify them, in order, with the first match | |
+ * "winning". | |
+ * | |
+ * Any filename that doesn't match a pattern here will not be classified as an | |
+ * Auspice file. | |
*/ | |
-export const VALID_AUSPICE_PATTERNS = { | |
- "root-sequence": /_root-sequence\.json$/, | |
- "tip-frequencies": /_tip-frequencies\.json$/, | |
- measurements: /_measurements\.json$/, | |
- meta: /_meta\.json$/, | |
- tree: /_tree\.json$/, | |
-} | |
- | |
-export const MAIN_DATASET_JSON = "main"; | |
+export const VALID_AUSPICE_PATTERNS = new Map([ | |
+ ["root-sequence", /_root-sequence\.json$/], | |
+ ["tip-frequencies", /_tip-frequencies\.json$/], | |
+ ["measurements", /_measurements\.json$/], | |
+ ["meta", /_meta\.json$/], | |
+ ["tree", /_tree\.json$/], | |
+ ["main", /\.json$/], | |
+]); | |
/** | |
* Following values taken to match the server's `sourceNameToClass`. | |
diff --git a/resourceIndexer/coreStagingS3.js b/resourceIndexer/coreStagingS3.js | |
index ad1cce9d..5beae18e 100644 | |
--- a/resourceIndexer/coreStagingS3.js | |
+++ b/resourceIndexer/coreStagingS3.js | |
@@ -1,6 +1,6 @@ | |
import {logger} from './logger.js'; | |
import { SOURCE, VALID_AUSPICE_PATTERNS, INVALID_AUSPICE_PATTERNS, | |
- DATESTAMP_REGEX, MAIN_DATASET_JSON } from './constants.js'; | |
+ DATESTAMP_REGEX } from './constants.js'; | |
/** | |
* The inventory of buckets (especially the core bucket) is in some ways a | |
@@ -89,20 +89,20 @@ function categoriseCoreObjects(item, staging) { | |
* Otherwise returns an object with properties resourceType, subresourceType | |
*/ | |
function auspiceFile(filename) { | |
- if (!filename.endsWith('.json')) return false; | |
if (filename.match(DATESTAMP_REGEX)) return false; | |
for (const pattern of INVALID_AUSPICE_PATTERNS) { | |
if (filename.match(pattern)) return false; | |
} | |
- let subresourceType = MAIN_DATASET_JSON; // this is the default _unless_ it looks like something else | |
- let urlPath = filename.replace('.json', '').replace(/_/g, '/'); | |
- for (const [type, pattern] of Object.entries(VALID_AUSPICE_PATTERNS)) { | |
+ // First match wins | |
+ for (const [type, pattern] of VALID_AUSPICE_PATTERNS) { | |
if (filename.match(pattern)) { | |
- subresourceType = type; | |
- urlPath = filename.replace(pattern, '').replace(/_/g, '/'); | |
+ return { | |
+ subresourceType: type, | |
+ urlPath: filename.replace(pattern, '').replace(/_/g, '/'), | |
+ }; | |
} | |
} | |
- return {subresourceType, urlPath}; | |
+ return false; | |
} | |
@@ -188,16 +188,16 @@ function validDataset(id, date, objects) { | |
// in a most-recent wins approach. | |
// Properties: subresource types | |
// Values: false if not present else the S3 object | |
- const subresources = Object.fromEntries(Object.keys(VALID_AUSPICE_PATTERNS) | |
- .map((subresourceType) => [subresourceType, false])); | |
- subresources[MAIN_DATASET_JSON] = false; | |
+ const subresources = Object.fromEntries( | |
+ Array.from(VALID_AUSPICE_PATTERNS.keys()) | |
+ .map((subresourceType) => [subresourceType, false])); | |
// most recent first (they're all the same day, BTW) | |
objects.sort((a, b) => a.timestamp < b.timestamp ? 1 : a.timestamp > b.timestamp ? -1 : 0) | |
let valid = false; | |
for (const o of objects) { | |
const type = o.subresourceType; | |
- if (type===MAIN_DATASET_JSON) { | |
+ if (type==="main") { | |
subresources[type] = o; | |
// Ensure a single v1 JSON (i.e. either meta OR tree) is removed | |
subresources.meta = false; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment