Last active
August 14, 2018 00:58
-
-
Save ontheklaud/2dd8c5699993b22ac51c739686f4d9c7 to your computer and use it in GitHub Desktop.
TensorFlow Dependencies Repository Fetcher for Offline Build
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import re | |
import pprint | |
import hashlib | |
import time | |
import urllib.request as urlreq | |
verbose_debug = True | |
def reporthook(count, block_size, total_size): | |
# reference: https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html | |
global start_time | |
if count == 0: | |
start_time = time.time() | |
return | |
duration = time.time() - start_time | |
progress_size = int(count * block_size) | |
speed = int(progress_size / (1024 * duration)) | |
percent = int(count * block_size * 100 / total_size) | |
sys.stdout.write("\r ...%d%%, %d MB, %d KB/s, %d seconds passed" % | |
(percent, progress_size / (1024 * 1024), speed, duration)) | |
sys.stdout.flush() | |
def fetch_candidate_dict(targets): | |
global verbose_debug | |
fetch_candid_dict = dict() | |
for target in targets: | |
# for debugging | |
print('target:', target) | |
fetch_candidate = list() | |
file_target = open(file=target, mode='r') | |
filename_target = os.path.split(target) | |
# print('filename:', filename_target) | |
buffer = str() | |
sw_buffering = False | |
for line in file_target: | |
# print(line.rstrip()) | |
# buffer switch trigger | |
if line.strip().endswith('http_archive(') and not line.lstrip().startswith('#'): | |
sw_buffering = True | |
elif line.strip().endswith('mkl_repository(') and not line.lstrip().startswith('#'): | |
sw_buffering = True | |
elif line.strip().endswith('java_import_external(') and not line.lstrip().startswith('#'): | |
sw_buffering = True | |
elif line.strip().endswith('filegroup_external(') and not line.lstrip().startswith('#'): | |
sw_buffering = True | |
elif sw_buffering and line.strip() == ')': | |
sw_buffering = False | |
buffer += ")" | |
fetch_candidate.append(buffer) | |
buffer = str() | |
continue | |
# check buffering | |
if sw_buffering: | |
buffer += line.lstrip() | |
else: | |
pass | |
if verbose_debug: | |
print('>>:', len(fetch_candidate), fetch_candidate) | |
if len(fetch_candidate) > 0: | |
dict_setup = dict({ | |
'path_full': target, | |
'candidates': parse_web_archive(fetch_candidate) | |
}) | |
fetch_candid_dict.update({ | |
filename_target[1]: dict_setup | |
}) | |
else: | |
continue | |
# fin | |
return fetch_candid_dict | |
def parse_web_archive(_plain_list): | |
global verbose_debug | |
parse_dict = dict() | |
for item in _plain_list: | |
inline = str() | |
for line in item.split('\n'): | |
# for debugging | |
# print('>', line) | |
if not line.strip().startswith('#'): | |
inline += line.strip().split('#')[0] | |
# for debugging | |
# print('inline:', inline) | |
m = re.match(pattern=r'([A-Za-z._]+)\((.*)\)', string=inline) | |
item_type = m.group(1) | |
item_contents = m.group(2) | |
if verbose_debug: | |
print(item_contents) | |
item_name = re.findall('name = \"(.+?)\",', item_contents)[-1] | |
# print('item_contents:', item_contents, 'item_type:', item_type) | |
try: | |
# print(item_type, file=sys.stderr) | |
if item_type != 'filegroup_external': | |
item_sha256 = re.findall('(jar_sha256|sha256)(\s)*=(\s)*\"(.+?)\",', item_contents)[-1][-1] | |
# item_sha256 = item_sha256[-1] | |
item_urls_set = re.findall('(jar_urls|urls)(\s)*=(\s)*\[(.+?)\],', item_contents)[-1][-1] | |
else: | |
# in case of 'filegroup_external', | |
sha256_urls = re.findall(r'sha256_urls(\s)*=(\s)*{\"(.+?)\"(\s)*:(\s)*\[(.+?)\},', item_contents)[-1] | |
item_sha256 = sha256_urls[2] | |
item_urls_set = sha256_urls[5].replace('],', '').replace(']', '') | |
item_urls_ = [x.replace('"', '') for x in item_urls_set.split(',')] | |
item_urls = list() | |
for url in item_urls_: | |
if url.startswith('http'): | |
item_urls.append(url) | |
else: | |
continue | |
item_urls_.clear() | |
parse_dict.update({ | |
item_name:dict({ | |
'type': item_type, | |
'sha256': item_sha256, | |
'urls': item_urls, | |
'topurl': item_urls[0], | |
}) | |
}) | |
except Exception as err: | |
print(err, inline, file=sys.stderr) | |
# fin | |
return parse_dict | |
def download_dependencies(path_root, dicts): | |
path_local_repo = os.path.join(path_root, 'tf_localrepo') | |
try: | |
os.makedirs(path_local_repo, exist_ok=True) | |
except FileExistsError: | |
print('[E] Directory \'localrepo\' already exist in tensorflow directory; Please remove it first.') | |
sys.exit(-1) | |
# loop through target file | |
for key_target, value_target in dicts.items(): | |
# target_name = key_target | |
# path_full = value_target['path_full'] | |
candidates = value_target['candidates'] | |
keys_item = candidates.keys() | |
for key_item in keys_item: | |
print('[I] fetching {:s}...'.format(key_item)) | |
key_sha256 = candidates[key_item]['sha256'] | |
urls = candidates[key_item]['urls'] | |
for i in range(len(urls)): | |
print(' [{:02d}] trying at {:s} ...'.format(i + 1, urls[i])) | |
path_local_repo_key = os.path.join(path_local_repo, key_item) | |
os.makedirs(path_local_repo_key, exist_ok=True) | |
try: | |
filename = urls[i].split('/')[-1] | |
path_local_repo_key_full = os.path.join(path_local_repo_key, filename) | |
if os.path.exists(path_local_repo_key_full): | |
os.remove(path_local_repo_key_full) | |
urlreq.urlretrieve(url=urls[i], filename=path_local_repo_key_full, reporthook=reporthook) | |
print() | |
except Exception as e: | |
print ('[E] {:s}'.format(str(e))) | |
continue | |
if os.path.exists(path_local_repo_key_full): | |
with open(path_local_repo_key_full, mode='rb') as f_test: | |
if hashlib.sha256(f_test.read()).hexdigest() == key_sha256: | |
print('[I] download complete ({:s}) as {:s} with valid sha256 checksum.'.format( | |
key_item, path_local_repo_key_full | |
)) | |
break | |
else: | |
continue | |
# fin | |
return | |
def insert_localized_deps(dicts, localrepo='/tmp/tf_localrepo'): | |
global verbose_debug | |
for key_target, value_target in dicts.items(): | |
target_name = key_target | |
path_full = value_target['path_full'] | |
path_target_contents = str() | |
with open(path_full, mode='r') as file_check: | |
for line in file_check: | |
path_target_contents += line | |
with open(path_full+".bak", mode='w') as file_backup: | |
file_backup.write(path_target_contents) | |
print('[I] backup of {:s} complete as {:s}.'.format(target_name, path_full+".bak")) | |
if verbose_debug: | |
print('[ORIGINAL]:{:s}'.format(target_name)) | |
print('Contents:', path_target_contents) | |
candidates = value_target['candidates'] | |
keys_item = candidates.keys() | |
for key_item in keys_item: | |
key_rendered = "name = \"{:s}\"".format(key_item) | |
topurl = "\n(.*)\"{:s}\"".format(candidates[key_item]['topurl']) | |
# print(key_item, path_full, candidates[key_item]['sha256']) | |
m_key = re.search(key_rendered, path_target_contents) | |
# print(m_key.start(), m_key.end(), path_target_contents[m_key.start():m_key.end()]) | |
m_urls = re.finditer(topurl, path_target_contents) | |
m_url = None | |
for _m_url in m_urls: | |
if m_key.start() < _m_url.start() and m_key.end() < _m_url.start(): | |
m_url = _m_url | |
break | |
else: | |
continue | |
m_url_fetched = path_target_contents[m_url.start():m_url.end()] | |
whitespace = m_url_fetched.replace('"'+candidates[key_item]['topurl']+'"', '') | |
# print('whitespace:', len(whitespace), whitespace) | |
local_url_join = os.path.join(localrepo, key_item, candidates[key_item]['topurl'].split('/')[-1]) | |
inject_local_url = "{:s}\"file://{:s}\",".format(whitespace, local_url_join) | |
path_target_contents = \ | |
path_target_contents[:m_url.start()] + inject_local_url + path_target_contents[m_url.start():] | |
with open(path_full, mode='w') as file_edit: | |
file_edit.write(path_target_contents) | |
print('[I] Local Repository ({:s}) update done.'.format(target_name)) | |
if verbose_debug: | |
print('[UPDATED]:{:s}'.format(target_name)) | |
print(path_target_contents) | |
# fin | |
return | |
def main(): | |
# tensorflow/tensorflow commit 6b6d843ccab78f9f91c3b98a43ca09ffecad4747 requires 2 modifications: | |
# WORKSPACEgREP | |
# tensorflow/workspace.bzl | |
global verbose_debug | |
tf_rel = 'tensorflow-1.10.0' | |
path_root = os.path.join(os.getcwd(), tf_rel) | |
path_localrepo = '/tmp/tf_localrepo' | |
# path_cmake_external = 'tensorflow/contrib/cmake/external' | |
targets = list() | |
targets.append(os.path.join(path_root, 'WORKSPACE')) | |
targets.append(os.path.join(path_root, 'tensorflow/workspace.bzl')) | |
# for _path, dirs, files in os.walk(top=os.path.join(path_root, path_cmake_external)): | |
# for _file in files: | |
# targets.append(os.path.join(_path, _file)) | |
fetch_candid_dict = fetch_candidate_dict(targets=targets) | |
if verbose_debug: | |
pp = pprint.PrettyPrinter() | |
pp.pprint(fetch_candid_dict) | |
# download dependencies | |
download_dependencies(path_root=path_root, dicts=fetch_candid_dict) | |
# insert local dependencies | |
insert_localized_deps(dicts=fetch_candid_dict, localrepo=path_localrepo) | |
# fin | |
return | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment