Skip to content

Instantly share code, notes, and snippets.

@domarps
Last active November 9, 2018 19:08
Show Gist options
  • Save domarps/09b83b3d712f95f046e429f29a49337c to your computer and use it in GitHub Desktop.
Save domarps/09b83b3d712f95f046e429f29a49337c to your computer and use it in GitHub Desktop.
extracting url from cid using elasticsearch API
'''
Requirements:
pip3 install elasticsearch
pip3 install certifi
'''
from elasticsearch import Elasticsearch
import certifi
import re
def image_id_to_url(cid, size):
"""
Extract the watermarked image url from cid (Warning : ugly and costly method, but works)
:param cid : content id of the image
:param size : image_thumbnail_size
:return url : watermarked image url
"""
es = Elasticsearch(['https://similarityes.stage.adobesearch.io'], use_ssl=True, ca_certs=certifi.where()) # use staging index
try:
res = es.search(body={"query": {"term": {"cid": {"value": cid}}}})
id32 = res['hits']['hits'][0]['_source']['id32']
chop_id = '/'.join(re.findall('..', '{:0>10}'.format(str(cid))[0:4]))
url = 'https://t3.ftcdn.net/jpg/{}/{}_F_{}_{}_NW.jpg'.format(chop_id, str(size), cid, id32)
return url
except:
print('url extract fail: ', cid)
return 'NaN'
print(image_id_to_url(110559232, 240))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment