Last active
November 9, 2018 19:08
-
-
Save domarps/09b83b3d712f95f046e429f29a49337c to your computer and use it in GitHub Desktop.
extracting url from cid using elasticsearch API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Requirements: | |
pip3 install elasticsearch | |
pip3 install certifi | |
''' | |
from elasticsearch import Elasticsearch | |
import certifi | |
import re | |
def image_id_to_url(cid, size): | |
""" | |
Extract the watermarked image url from cid (Warning : ugly and costly method, but works) | |
:param cid : content id of the image | |
:param size : image_thumbnail_size | |
:return url : watermarked image url | |
""" | |
es = Elasticsearch(['https://similarityes.stage.adobesearch.io'], use_ssl=True, ca_certs=certifi.where()) # use staging index | |
try: | |
res = es.search(body={"query": {"term": {"cid": {"value": cid}}}}) | |
id32 = res['hits']['hits'][0]['_source']['id32'] | |
chop_id = '/'.join(re.findall('..', '{:0>10}'.format(str(cid))[0:4])) | |
url = 'https://t3.ftcdn.net/jpg/{}/{}_F_{}_{}_NW.jpg'.format(chop_id, str(size), cid, id32) | |
return url | |
except: | |
print('url extract fail: ', cid) | |
return 'NaN' | |
print(image_id_to_url(110559232, 240)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment