Skip to content

Instantly share code, notes, and snippets.

@internetimagery
Last active April 21, 2023 21:43
Show Gist options
  • Save internetimagery/037e057af6c1555f48e20f4e553f3824 to your computer and use it in GitHub Desktop.
Save internetimagery/037e057af6c1555f48e20f4e553f3824 to your computer and use it in GitHub Desktop.
Extract thumbnail images from gpx files for gps usage.
import re
import os
import html
import shutil
import logging
import tempfile
from functools import partial
from threading import Lock
from urllib.request import urlopen, Request
from urllib.error import HTTPError, URLError
from concurrent.futures import ThreadPoolExecutor, Future
from PIL import Image
LOG = logging.getLogger(__name__)
URL = re.compile(r"""<\s*img\s+src=(['"])(https?://[^\1]+?)\1""")
EXT = {
"image/jpeg": "jpg",
"image/png": "png",
"image/gif": "gif",
}
def generate_replacement(size, store_dir, identifier, cache, lock, item):
url = item[1].group(2)
with lock:
future = cache.get(url)
if future:
path = future.result()
if path:
return (item[1].start(2), item[1].end(2), path)
else:
return (0, 0, "")
else:
future = cache[url] = Future()
result = ""
try:
req = Request(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'})
with urlopen(req) as resp:
mimetype = resp.headers.get_content_type()
ext = EXT.get(mimetype)
if ext:
with tempfile.SpooledTemporaryFile() as tmp:
shutil.copyfileobj(resp, tmp)
output = os.path.join(
store_dir,
"{}_{:03d}.{}".format(identifier, item[0] + 1, ext),
)
img = Image.open(tmp)
img.thumbnail(size)
try:
img.save(output)
except OSError as err:
LOG.warning('Failed: %s "%s"', err, url)
else:
LOG.info('Downloaded: "{}"'.format(url))
result = output
return (item[1].start(2), item[1].end(2), output)
except (HTTPError, URLError) as err:
LOG.warning('Failed: %s "%s"', err, url)
finally:
future.set_result(result)
return (0, 0, result)
def main(path, size):
identifier, _ = os.path.splitext(os.path.basename(path))
path_dir = os.path.dirname(path)
img_path = os.path.abspath(os.path.join(path_dir, "images", identifier))
os.makedirs(img_path, exist_ok=True)
replaced_images = []
with open(path) as h:
data = h.read()
cache = {}
lock = Lock()
with ThreadPoolExecutor() as pool:
for start, stop, replacement_path in sorted(
pool.map(
partial(generate_replacement, (size, size), img_path, identifier, cache, lock),
enumerate(URL.finditer(data)),
),
reverse=True,
):
if not replacement_path:
continue
relative_path = os.path.relpath(replacement_path, img_path)
data = data[:start] + html.escape(relative_path) + data[stop:]
output = os.path.join(path_dir, "{}-updated.gpx".format(identifier))
with open(output, "w") as h:
h.write(data)
if __name__ =="__main__":
import argparse
parser = argparse.ArgumentParser("Collect linked images from gpx file and localize them.")
parser.add_argument("path", help="Path to the .gpx file")
parser.add_argument("-s", "--size", type=int, default=200, help="Size for thumbnails")
args = parser.parse_args()
main(args.path, args.size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment