Skip to content

Instantly share code, notes, and snippets.

@yuanzhou
Created May 17, 2024 19:10
Show Gist options
  • Save yuanzhou/165d11f593718afbb7b58e4389057998 to your computer and use it in GitHub Desktop.
Save yuanzhou/165d11f593718afbb7b58e4389057998 to your computer and use it in GitHub Desktop.
Modified call_indexer() for debugging
def call_indexer(self, entity, reindex=False, document=None, target_index=None):
logger.info(f"Start executing call_indexer() on uuid: {entity['uuid']}, entity_type: {entity['entity_type']}")
try:
if document is None:
try:
document = self.generate_doc(entity, 'json')
except Exception as e:
logger.exception(e)
logger.error(f"Failed to execute generate_doc() on {entity['entity_type']} {entity['uuid']} during executing call_indexer()")
# Raise the exception so the caller can handle it properly
raise Exception(e)
if target_index:
self.indexer.index(entity['uuid'], document, target_index, reindex)
elif entity['entity_type'] == 'Upload':
target_index = self.INDICES['indices'][self.DEFAULT_INDEX_WITHOUT_PREFIX]['private']
self.indexer.index(entity['uuid'], document, target_index, reindex)
else:
# write entity into indices
for index in self.indices.keys():
public_index = self.INDICES['indices'][index]['public']
private_index = self.INDICES['indices'][index]['private']
# check to see if the index has a transformer, default to None if not found
transformer = self.TRANSFORMERS.get(index, None)
if self.is_public(entity):
try:
public_doc = self.generate_public_doc(entity)
if transformer is not None:
public_transformed = transformer.transform(json.loads(public_doc), self.transformation_resources)
test_json={
"created_by_user_displayname": "Amanda Knoten",
"created_by_user_email": "aknoten@wustl.edu",
"created_timestamp": 1642630383146,
"data_access_level": "public",
"description": "cryosections on dry ice",
"entity_type": "Sample",
"group_name": "University of California San Diego TMC",
"group_uuid": "03b3d854-ed44-11e8-8bce-0e368f3075e8",
"hubmap_id": "HBM477.XBRN.629",
"lab_tissue_sample_id": "HBM564_L_D3",
"last_modified_timestamp": 1642630383146,
"protocol_url": "dx.doi.org/10.17504/protocols.io.568g9hw",
"sample_category": "section",
"submission_id": "CALT0001-HT-2-3-2",
"uuid": "e18f88add6de4b5abc0c07d148076390"
}
test_public_transformed = transformer.transform(test_json, self.transformation_resources)
logger.info("========test_public_transformed=========")
logger.info(test_public_transformed)
logger.info("========test_public_transformed=========")
public_transformed_doc = json.dumps(public_transformed)
target_doc = public_transformed_doc
else:
target_doc = public_doc
#self.indexer.index(entity['uuid'], target_doc, public_index, reindex)
logger.info(f"Finished executing indexer.index({entity['uuid']}, 'target_doc', {public_index}, {reindex}) for entity_type: {entity['entity_type']}")
except Exception:
msg = f"Exception encountered during executing generate_public_doc() inside call_indexer() for uuid: {entity['uuid']}, entity_type: {entity['entity_type']}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
# add it to private
if transformer is not None:
private_transformed = transformer.transform(json.loads(document), self.transformation_resources)
target_doc = json.dumps(private_transformed)
else:
target_doc = document
#self.indexer.index(entity['uuid'], target_doc, private_index, reindex)
logger.info(f"Finished executing indexer.index({entity['uuid']}, 'target_doc', {private_index}, {reindex}) for entity_type: {entity['entity_type']}")
logger.info(f"Finished executing call_indexer() on uuid: {entity['uuid']}, entity_type: {entity['entity_type']}")
except Exception as e:
logger.exception(e)
msg = f"Exception encountered during executing call_indexer() for uuid: {entity['uuid']}, entity_type: {entity['entity_type']}"
# Log the full stack trace, prepend a line with our message
logger.error(msg)
@yuanzhou
Copy link
Author

yuanzhou commented May 17, 2024

Calling transformer.transform() directly on this test json returns two more fields mapper_metadata and mapped_consortium compared to the portal result:

Screenshot 2024-05-17 at 3 11 40 PM

@yuanzhou
Copy link
Author

yuanzhou commented May 17, 2024

The portal transform adds extra mapped fields on top of the entity-api /documents/<id> result

Screenshot 2024-05-17 at 3 13 29 PM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment