Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jsstevenson/e37dac93eb3b5ec134f25eb9b0ad4a89 to your computer and use it in GitHub Desktop.
Save jsstevenson/e37dac93eb3b5ec134f25eb9b0ad4a89 to your computer and use it in GitHub Desktop.
(gene-normalization) ~/code/gene-normalization (issue-112) % ipython
Python 3.9.12 (main, Mar 26 2022, 15:51:15)
Type 'copyright', 'credits' or 'license' for more information
IPython 8.2.0 -- An enhanced Interactive Python. Type '?' for help.
In [2]: import boto3
In [3]: g = boto3.resource("dynamodb").Table("gene_concepts")
In [4]: from boto3.dynamodb.conditions import Key
In [7]: g.query(KeyConditionExpression=Key("label_and_type").eq("tpx2##alias"))
Out[7]:
{'Items': [],
'Count': 0,
'ScannedCount': 0,
'ResponseMetadata': {'RequestId': '28VF2LC1A3MG3KKV2GGEB3GSURVV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 12:11:36 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '39',
'connection': 'keep-alive',
'x-amzn-requestid': '28VF2LC1A3MG3KKV2GGEB3GSURVV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '3413411624'},
'RetryAttempts': 0}}
In [8]: g.query(KeyConditionExpression=Key("label_and_type").eq("tpx2##xref"))
Out[8]:
{'Items': [],
'Count': 0,
'ScannedCount': 0,
'ResponseMetadata': {'RequestId': 'E9QHMLJQCUSUR9J80PHGJQN09BVV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 12:11:41 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '39',
'connection': 'keep-alive',
'x-amzn-requestid': 'E9QHMLJQCUSUR9J80PHGJQN09BVV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '3413411624'},
'RetryAttempts': 0}}
In [9]: g.query(KeyConditionExpression=Key("label_and_type").eq("tpx2##symbol"))
Out[9]:
{'Items': [{'src_name': 'Ensembl',
'concept_id': 'ensembl:ensg00000088325',
'label_and_type': 'tpx2##symbol',
'item_type': 'symbol'},
{'src_name': 'HGNC',
'concept_id': 'hgnc:1249',
'label_and_type': 'tpx2##symbol',
'item_type': 'symbol'},
{'src_name': 'NCBI',
'concept_id': 'ncbigene:22974',
'label_and_type': 'tpx2##symbol',
'item_type': 'symbol'}],
'Count': 3,
'ScannedCount': 3,
'ResponseMetadata': {'RequestId': '65I8FMTPOOKQK1CV6MTBB7JLV7VV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 12:11:55 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '426',
'connection': 'keep-alive',
'x-amzn-requestid': '65I8FMTPOOKQK1CV6MTBB7JLV7VV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '2227394646'},
'RetryAttempts': 0}}
In [10]: def query(string):
...: return g.query(KeyConditionExpression=Key("label_and_type").eq(string))
...:
In [12]: query("ensembl:ensg00000088325##identity")
Out[12]:
{'Items': [{'locations': [{'sequence_id': 'ga4gh:SQ.-A1QmD_MatoqxvgVxBLZTONHz9-c7nQo',
'interval': {'type': 'SequenceInterval',
'start': {'type': 'Number', 'value': Decimal('31739270')},
'end': {'type': 'Number', 'value': Decimal('31801805')}},
'_id': 'ga4gh:VSL.QUMWyN4wVps8vwXoNJCmhT_b1Vw4XLK_',
'type': 'SequenceLocation'}],
'merge_ref': 'hgnc:1249',
'xrefs': ['hgnc:1249'],
'symbol': 'TPX2',
'label': 'TPX2 microtubule nucleation factor',
'src_name': 'Ensembl',
'concept_id': 'ensembl:ENSG00000088325',
'label_and_type': 'ensembl:ensg00000088325##identity',
'strand': '+',
'item_type': 'identity'}],
'Count': 1,
'ScannedCount': 1,
'ResponseMetadata': {'RequestId': '0A4IOA7NRH97KN15CFPBJF536FVV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 12:12:52 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '712',
'connection': 'keep-alive',
'x-amzn-requestid': '0A4IOA7NRH97KN15CFPBJF536FVV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '1424710172'},
'RetryAttempts': 0}}
In [13]: query("ncbigene:22974##identity")
Out[13]:
{'Items': [{'merge_ref': 'hgnc:1249',
'aliases': ['HCA519',
'REPP86',
'GD:C20orf1',
'C20orf2',
'HCTP4',
'C20orf1',
'DIL2',
'p100',
'FLS353',
'DIL-2'],
'symbol': 'TPX2',
'label': 'TPX2 microtubule nucleation factor',
'src_name': 'NCBI',
'concept_id': 'ncbigene:22974',
'label_and_type': 'ncbigene:22974##identity',
'item_type': 'identity',
'locations': [{'species_id': 'taxonomy:9606',
'interval': {'type': 'CytobandInterval',
'start': 'q11.21',
'end': 'q11.21'},
'_id': 'ga4gh:VCL.lrZVfHmHSb77QV5ukJ6RM933cuNlRyQT',
'type': 'ChromosomeLocation',
'chr': '20'},
{'sequence_id': 'ga4gh:SQ.-A1QmD_MatoqxvgVxBLZTONHz9-c7nQo',
'interval': {'type': 'SequenceInterval',
'start': {'type': 'Number', 'value': Decimal('31739247')},
'end': {'type': 'Number', 'value': Decimal('31801802')}},
'_id': 'ga4gh:VSL.fHZf326JNndK-FvuRT17QetuZphxn-pf',
'type': 'SequenceLocation'}],
'associated_with': ['omim:605917'],
'xrefs': ['ensembl:ENSG00000088325', 'hgnc:1249'],
'previous_symbols': ['C20ORF2', 'FLS353'],
'strand': '+'}],
'Count': 1,
'ScannedCount': 1,
'ResponseMetadata': {'RequestId': 'KR04GUL8QN5BVJ7ISS2HEFLFMJVV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 12:13:04 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '1241',
'connection': 'keep-alive',
'x-amzn-requestid': 'KR04GUL8QN5BVJ7ISS2HEFLFMJVV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '3921033789'},
'RetryAttempts': 0}}
In [14]: query("hgnc:1249##identity")
Out[14]:
{'Items': [],
'Count': 0,
'ScannedCount': 0,
'ResponseMetadata': {'RequestId': 'EPONTI3B9SMU0FVFAG4N28SSVFVV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 12:13:38 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '39',
'connection': 'keep-alive',
'x-amzn-requestid': 'EPONTI3B9SMU0FVFAG4N28SSVFVV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '3413411624'},
'RetryAttempts': 0}}
In [15]: exit()
(gene-normalization) ~/code/gene-normalization (issue-112) % ipython
Python 3.9.12 (main, Mar 26 2022, 15:51:15)
Type 'copyright', 'credits' or 'license' for more information
IPython 8.2.0 -- An enhanced Interactive Python. Type '?' for help.
In [1]: from boto3.dynamodb.conditions import Key
In [2]: import boto3
In [3]: g = boto3.resource("dynamodb").Table("gene_concepts")
In [5]: def query(string):
...: return g.query(KeyConditionExpression=Key("label_and_type").eq(string))
...:
In [6]: query("hgnc:1249##identity")
Out[6]:
{'Items': [{'merge_ref': 'hgnc:1249',
'aliases': ['HCA519', 'p100', 'FLS353', 'DIL-2'],
'symbol': 'TPX2',
'label': 'TPX2 microtubule nucleation factor',
'src_name': 'HGNC',
'concept_id': 'hgnc:1249',
'label_and_type': 'hgnc:1249##identity',
'symbol_status': 'approved',
'item_type': 'identity',
'locations': [{'species_id': 'taxonomy:9606',
'interval': {'type': 'CytobandInterval',
'start': 'q11.21',
'end': 'q11.21'},
'_id': 'ga4gh:VCL.lrZVfHmHSb77QV5ukJ6RM933cuNlRyQT',
'type': 'ChromosomeLocation',
'chr': '20'}],
'associated_with': ['vega:OTTHUMG00000032190',
'omim:605917',
'ccds:CCDS13190',
'ucsc:uc002wwp.2',
'uniprot:Q9ULW0',
'refseq:NM_012112',
'pubmed:10393424',
'pubmed:12177045',
'pubmed:9207457',
'pubmed:12389033',
'ena.embl:AF098158'],
'xrefs': ['ncbigene:22974', 'ensembl:ENSG00000088325'],
'previous_symbols': ['C20orf2', 'C20orf1']}],
'Count': 1,
'ScannedCount': 1,
'ResponseMetadata': {'RequestId': 'QK1UR22ST3B4TCIDINGJDN40GVVV4KQNSO5AEMVJF66Q9ASUAAJG',
'HTTPStatusCode': 200,
'HTTPHeaders': {'server': 'Server',
'date': 'Thu, 07 Apr 2022 13:29:44 GMT',
'content-type': 'application/x-amz-json-1.0',
'content-length': '1070',
'connection': 'keep-alive',
'x-amzn-requestid': 'QK1UR22ST3B4TCIDINGJDN40GVVV4KQNSO5AEMVJF66Q9ASUAAJG',
'x-amz-crc32': '3728575485'},
'RetryAttempts': 0}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment