sport4minus/deckset_collect.py

## deckset_collect.py
#!/bin/python

# deckset_collect.py
# by jens wunderling
# this script collects all assets used in a deckset presentation into one folder and creates an updated .md file
# useful if you wildly use image files all over your local file system
# and for archiving (if online sources go offline)
# i'd like this to be a feature of the main app though :D

import os, argparse, shutil
import re
import urllib

## all the paths and filenames
working_dir = os.getcwd()
assets_dir_name = "assets"
assets_dir_path = None
in_file_name = None
out_file_name = None

asset_list = []
asset_location_map = {}

argparser = argparse.ArgumentParser()
argparser.add_argument("-i", "--infile", help="the infile, format .md", required=True)
args = argparser.parse_args()

## build paths to in and outfile and assets directory ##
if args.infile:
	in_file_name = args.infile
	out_file_name = "collected_" + args.infile
	in_file_path = os.path.join(working_dir, in_file_name)
	out_file_path = os.path.join(working_dir, out_file_name)
	assets_dir_path = os.path.join(working_dir, assets_dir_name)
	if not os.path.exists(assets_dir_path):
		os.mkdir(assets_dir_path)
		print "assets directory called %s created" % assets_dir_name
else:
	print "no infile specified. you should do this."
	exit()


def retreive_links():
	'''opens input file, reads, does some regex matching to extract paths to assets'''
	in_file = open(in_file_path)
	in_file_contents = in_file.read()
	in_file.close()
	assetmatch = re.compile('!\[([^\[]*)\]\(([^\)]+)\)')
	asset_hrefs = assetmatch.findall(in_file_contents)
	for ahref in asset_hrefs:
		print ahref
		asset_list.append(ahref[1])
	pass

def collect_assets():
	# iterate links, put in folder named assets
	for a in asset_list:
		asset_path = a
		splitpath = asset_path.split("/")
		asset_name = splitpath[len(splitpath)-1]
		print asset_path, asset_name
		try:
			if not a.startswith(assets_dir_name):
				if a.startswith("http"):# better way to identify urls?
					print "is url. downloading to assets folder"
					dl = urllib.URLopener()
					asset_name = urllib.unquote(asset_name)#some images on the web have urlencoded chars in filename. deckset doesnt handle these names well on the file system
					dl.retrieve(a, os.path.join(assets_dir_path, asset_name))
					print "done."
				else:
					print "is file. copying"
					shutil.copy(asset_path, os.path.join(assets_dir_path, asset_name))
					print "done."
				asset_location_map[asset_path] = os.path.join(assets_dir_name, asset_name)
			else:
				print "already in assets folder"
		except Exception as e:
			print e
			print "something went wrong collecting %s" % a

def replace_refs():
	#TODO shutil.copy2 to copy file first then overwrite. maybe metadata(theme) stays put..
	in_file = open(in_file_path)
	in_file_contents = in_file.read()
	in_file.close()
	for key, value in asset_location_map.iteritems():
		in_file_contents = in_file_contents.replace(key,value)
	outfile = open(out_file_path, "w")
	outfile.write(in_file_contents)
	outfile.close()

if __name__ == "__main__":
    retreive_links()
    collect_assets()
    replace_refs()
	#!/bin/python

	# deckset_collect.py
	# by jens wunderling
	# this script collects all assets used in a deckset presentation into one folder and creates an updated .md file
	# useful if you wildly use image files all over your local file system
	# and for archiving (if online sources go offline)
	# i'd like this to be a feature of the main app though :D

	import os, argparse, shutil
	import re
	import urllib

	## all the paths and filenames
	working_dir = os.getcwd()
	assets_dir_name = "assets"
	assets_dir_path = None
	in_file_name = None
	out_file_name = None

	asset_list = []
	asset_location_map = {}

	argparser = argparse.ArgumentParser()
	argparser.add_argument("-i", "--infile", help="the infile, format .md", required=True)
	args = argparser.parse_args()

	## build paths to in and outfile and assets directory ##
	if args.infile:
	in_file_name = args.infile
	out_file_name = "collected_" + args.infile
	in_file_path = os.path.join(working_dir, in_file_name)
	out_file_path = os.path.join(working_dir, out_file_name)
	assets_dir_path = os.path.join(working_dir, assets_dir_name)
	if not os.path.exists(assets_dir_path):
	os.mkdir(assets_dir_path)
	print "assets directory called %s created" % assets_dir_name
	else:
	print "no infile specified. you should do this."
	exit()


	def retreive_links():
	'''opens input file, reads, does some regex matching to extract paths to assets'''
	in_file = open(in_file_path)
	in_file_contents = in_file.read()
	in_file.close()
	assetmatch = re.compile('!\[([^\[]*)\]\(([^\)]+)\)')
	asset_hrefs = assetmatch.findall(in_file_contents)
	for ahref in asset_hrefs:
	print ahref
	asset_list.append(ahref[1])
	pass

	def collect_assets():
	# iterate links, put in folder named assets
	for a in asset_list:
	asset_path = a
	splitpath = asset_path.split("/")
	asset_name = splitpath[len(splitpath)-1]
	print asset_path, asset_name
	try:
	if not a.startswith(assets_dir_name):
	if a.startswith("http"):# better way to identify urls?
	print "is url. downloading to assets folder"
	dl = urllib.URLopener()
	asset_name = urllib.unquote(asset_name)#some images on the web have urlencoded chars in filename. deckset doesnt handle these names well on the file system
	dl.retrieve(a, os.path.join(assets_dir_path, asset_name))
	print "done."
	else:
	print "is file. copying"
	shutil.copy(asset_path, os.path.join(assets_dir_path, asset_name))
	print "done."
	asset_location_map[asset_path] = os.path.join(assets_dir_name, asset_name)
	else:
	print "already in assets folder"
	except Exception as e:
	print e
	print "something went wrong collecting %s" % a

	def replace_refs():
	#TODO shutil.copy2 to copy file first then overwrite. maybe metadata(theme) stays put..
	in_file = open(in_file_path)
	in_file_contents = in_file.read()
	in_file.close()
	for key, value in asset_location_map.iteritems():
	in_file_contents = in_file_contents.replace(key,value)
	outfile = open(out_file_path, "w")
	outfile.write(in_file_contents)
	outfile.close()

	if __name__ == "__main__":
	retreive_links()
	collect_assets()
	replace_refs()