Skip to content

Instantly share code, notes, and snippets.

@ontheklaud
Last active August 14, 2018 00:58
Show Gist options
  • Save ontheklaud/2dd8c5699993b22ac51c739686f4d9c7 to your computer and use it in GitHub Desktop.
Save ontheklaud/2dd8c5699993b22ac51c739686f4d9c7 to your computer and use it in GitHub Desktop.
TensorFlow Dependencies Repository Fetcher for Offline Build
import os
import sys
import re
import pprint
import hashlib
import time
import urllib.request as urlreq
verbose_debug = True
def reporthook(count, block_size, total_size):
# reference: https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html
global start_time
if count == 0:
start_time = time.time()
return
duration = time.time() - start_time
progress_size = int(count * block_size)
speed = int(progress_size / (1024 * duration))
percent = int(count * block_size * 100 / total_size)
sys.stdout.write("\r ...%d%%, %d MB, %d KB/s, %d seconds passed" %
(percent, progress_size / (1024 * 1024), speed, duration))
sys.stdout.flush()
def fetch_candidate_dict(targets):
global verbose_debug
fetch_candid_dict = dict()
for target in targets:
# for debugging
print('target:', target)
fetch_candidate = list()
file_target = open(file=target, mode='r')
filename_target = os.path.split(target)
# print('filename:', filename_target)
buffer = str()
sw_buffering = False
for line in file_target:
# print(line.rstrip())
# buffer switch trigger
if line.strip().endswith('http_archive(') and not line.lstrip().startswith('#'):
sw_buffering = True
elif line.strip().endswith('mkl_repository(') and not line.lstrip().startswith('#'):
sw_buffering = True
elif line.strip().endswith('java_import_external(') and not line.lstrip().startswith('#'):
sw_buffering = True
elif line.strip().endswith('filegroup_external(') and not line.lstrip().startswith('#'):
sw_buffering = True
elif sw_buffering and line.strip() == ')':
sw_buffering = False
buffer += ")"
fetch_candidate.append(buffer)
buffer = str()
continue
# check buffering
if sw_buffering:
buffer += line.lstrip()
else:
pass
if verbose_debug:
print('>>:', len(fetch_candidate), fetch_candidate)
if len(fetch_candidate) > 0:
dict_setup = dict({
'path_full': target,
'candidates': parse_web_archive(fetch_candidate)
})
fetch_candid_dict.update({
filename_target[1]: dict_setup
})
else:
continue
# fin
return fetch_candid_dict
def parse_web_archive(_plain_list):
global verbose_debug
parse_dict = dict()
for item in _plain_list:
inline = str()
for line in item.split('\n'):
# for debugging
# print('>', line)
if not line.strip().startswith('#'):
inline += line.strip().split('#')[0]
# for debugging
# print('inline:', inline)
m = re.match(pattern=r'([A-Za-z._]+)\((.*)\)', string=inline)
item_type = m.group(1)
item_contents = m.group(2)
if verbose_debug:
print(item_contents)
item_name = re.findall('name = \"(.+?)\",', item_contents)[-1]
# print('item_contents:', item_contents, 'item_type:', item_type)
try:
# print(item_type, file=sys.stderr)
if item_type != 'filegroup_external':
item_sha256 = re.findall('(jar_sha256|sha256)(\s)*=(\s)*\"(.+?)\",', item_contents)[-1][-1]
# item_sha256 = item_sha256[-1]
item_urls_set = re.findall('(jar_urls|urls)(\s)*=(\s)*\[(.+?)\],', item_contents)[-1][-1]
else:
# in case of 'filegroup_external',
sha256_urls = re.findall(r'sha256_urls(\s)*=(\s)*{\"(.+?)\"(\s)*:(\s)*\[(.+?)\},', item_contents)[-1]
item_sha256 = sha256_urls[2]
item_urls_set = sha256_urls[5].replace('],', '').replace(']', '')
item_urls_ = [x.replace('"', '') for x in item_urls_set.split(',')]
item_urls = list()
for url in item_urls_:
if url.startswith('http'):
item_urls.append(url)
else:
continue
item_urls_.clear()
parse_dict.update({
item_name:dict({
'type': item_type,
'sha256': item_sha256,
'urls': item_urls,
'topurl': item_urls[0],
})
})
except Exception as err:
print(err, inline, file=sys.stderr)
# fin
return parse_dict
def download_dependencies(path_root, dicts):
path_local_repo = os.path.join(path_root, 'tf_localrepo')
try:
os.makedirs(path_local_repo, exist_ok=True)
except FileExistsError:
print('[E] Directory \'localrepo\' already exist in tensorflow directory; Please remove it first.')
sys.exit(-1)
# loop through target file
for key_target, value_target in dicts.items():
# target_name = key_target
# path_full = value_target['path_full']
candidates = value_target['candidates']
keys_item = candidates.keys()
for key_item in keys_item:
print('[I] fetching {:s}...'.format(key_item))
key_sha256 = candidates[key_item]['sha256']
urls = candidates[key_item]['urls']
for i in range(len(urls)):
print(' [{:02d}] trying at {:s} ...'.format(i + 1, urls[i]))
path_local_repo_key = os.path.join(path_local_repo, key_item)
os.makedirs(path_local_repo_key, exist_ok=True)
try:
filename = urls[i].split('/')[-1]
path_local_repo_key_full = os.path.join(path_local_repo_key, filename)
if os.path.exists(path_local_repo_key_full):
os.remove(path_local_repo_key_full)
urlreq.urlretrieve(url=urls[i], filename=path_local_repo_key_full, reporthook=reporthook)
print()
except Exception as e:
print ('[E] {:s}'.format(str(e)))
continue
if os.path.exists(path_local_repo_key_full):
with open(path_local_repo_key_full, mode='rb') as f_test:
if hashlib.sha256(f_test.read()).hexdigest() == key_sha256:
print('[I] download complete ({:s}) as {:s} with valid sha256 checksum.'.format(
key_item, path_local_repo_key_full
))
break
else:
continue
# fin
return
def insert_localized_deps(dicts, localrepo='/tmp/tf_localrepo'):
global verbose_debug
for key_target, value_target in dicts.items():
target_name = key_target
path_full = value_target['path_full']
path_target_contents = str()
with open(path_full, mode='r') as file_check:
for line in file_check:
path_target_contents += line
with open(path_full+".bak", mode='w') as file_backup:
file_backup.write(path_target_contents)
print('[I] backup of {:s} complete as {:s}.'.format(target_name, path_full+".bak"))
if verbose_debug:
print('[ORIGINAL]:{:s}'.format(target_name))
print('Contents:', path_target_contents)
candidates = value_target['candidates']
keys_item = candidates.keys()
for key_item in keys_item:
key_rendered = "name = \"{:s}\"".format(key_item)
topurl = "\n(.*)\"{:s}\"".format(candidates[key_item]['topurl'])
# print(key_item, path_full, candidates[key_item]['sha256'])
m_key = re.search(key_rendered, path_target_contents)
# print(m_key.start(), m_key.end(), path_target_contents[m_key.start():m_key.end()])
m_urls = re.finditer(topurl, path_target_contents)
m_url = None
for _m_url in m_urls:
if m_key.start() < _m_url.start() and m_key.end() < _m_url.start():
m_url = _m_url
break
else:
continue
m_url_fetched = path_target_contents[m_url.start():m_url.end()]
whitespace = m_url_fetched.replace('"'+candidates[key_item]['topurl']+'"', '')
# print('whitespace:', len(whitespace), whitespace)
local_url_join = os.path.join(localrepo, key_item, candidates[key_item]['topurl'].split('/')[-1])
inject_local_url = "{:s}\"file://{:s}\",".format(whitespace, local_url_join)
path_target_contents = \
path_target_contents[:m_url.start()] + inject_local_url + path_target_contents[m_url.start():]
with open(path_full, mode='w') as file_edit:
file_edit.write(path_target_contents)
print('[I] Local Repository ({:s}) update done.'.format(target_name))
if verbose_debug:
print('[UPDATED]:{:s}'.format(target_name))
print(path_target_contents)
# fin
return
def main():
# tensorflow/tensorflow commit 6b6d843ccab78f9f91c3b98a43ca09ffecad4747 requires 2 modifications:
# WORKSPACEgREP
# tensorflow/workspace.bzl
global verbose_debug
tf_rel = 'tensorflow-1.10.0'
path_root = os.path.join(os.getcwd(), tf_rel)
path_localrepo = '/tmp/tf_localrepo'
# path_cmake_external = 'tensorflow/contrib/cmake/external'
targets = list()
targets.append(os.path.join(path_root, 'WORKSPACE'))
targets.append(os.path.join(path_root, 'tensorflow/workspace.bzl'))
# for _path, dirs, files in os.walk(top=os.path.join(path_root, path_cmake_external)):
# for _file in files:
# targets.append(os.path.join(_path, _file))
fetch_candid_dict = fetch_candidate_dict(targets=targets)
if verbose_debug:
pp = pprint.PrettyPrinter()
pp.pprint(fetch_candid_dict)
# download dependencies
download_dependencies(path_root=path_root, dicts=fetch_candid_dict)
# insert local dependencies
insert_localized_deps(dicts=fetch_candid_dict, localrepo=path_localrepo)
# fin
return
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment