Skip to content

Instantly share code, notes, and snippets.

@rishi-raj-jain
Last active May 3, 2023 07:06
Show Gist options
  • Save rishi-raj-jain/ad01c4635b49427a05428136e3053b64 to your computer and use it in GitHub Desktop.
Save rishi-raj-jain/ad01c4635b49427a05428136e3053b64 to your computer and use it in GitHub Desktop.
import os, re, requests, shutil, asyncio, time, aiohttp
root = next(os.walk('.'))[1]
root.remove('.git')
pattern = r'https?://(?:[^\s()<>{}\[\]]+\.(?:jpg|jpeg|gif|png))(?![^\s]*\))'
async def get(url, session):
try:
async with session.get(url=url) as response:
resp= await response.read()
print("Successfully got url {} with resp of length {}.".format(url, len(resp)))
file_name = 'assets-www/img/cloudinary/'+url[url.rfind('/'):]
with open(file_name,'wb+') as f:
f.write(resp)
except Exception as e:
print(e)
print("Unable to get url {} due to {}.".format(url, e.__class__))
async def main(urls):
async with aiohttp.ClientSession() as session:
ret = await asyncio.gather(*[get(url, session) for url in urls])
print("Finalized all. Return is a list of len {} outputs.".format(len(ret)))
urls = []
def replaceFn(match):
return ('/assets-www/img/cloudinary/' + match.group(0).split('/')[-1])
for eachRoot in root:
for path, subdirs, files in os.walk(eachRoot):
for name in files:
test = open(os.path.join(path, name), 'r')
try:
tmp = test.read()
cloudinaryImages = re.findall(pattern, tmp)
if len(cloudinaryImages) > 0:
for i in cloudinaryImages:
urls.append(i)
with open(os.path.join(path, name), 'w+') as tmpFile:
tmpFile.write(re.sub(pattern, replaceFn, tmp))
except Exception as e:
pass
start = time.time()
asyncio.run(main(urls))
end = time.time()
print("Took {} seconds to pull {} websites.".format(end - start, len(urls)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment