Skip to content

Instantly share code, notes, and snippets.

@zurk
Created September 13, 2017 17:27
Show Gist options
  • Save zurk/ad464aa73ad244980457dd2f09ff3abd to your computer and use it in GitHub Desktop.
Save zurk/ad464aa73ad244980457dd2f09ff3abd to your computer and use it in GitHub Desktop.
import multiprocessing
import threading
import time
import logging
from bblfsh import BblfshClient
BBLFSH_ENDPOINT = "172.17.0.1:9434"
#BBLFSH_ENDPOINT = "0.0.0.0:9432"
FILEPATH = "bblfsh_hang_ex.py"
LANG = "Python"
NUM_THREADS = 32
STOP_TIME = 1200
TIMEOUT = 10
with open(FILEPATH, 'w') as f:
with open(__file__) as f2:
f.write('\n')
f.write(f2.read())
bblfsh = [BblfshClient(BBLFSH_ENDPOINT) for _ in range(NUM_THREADS)]
stop = False
def thread_loop(thread_idx, filename):
log = logging.getLogger('hang_test')
log.setLevel(logging.INFO)
last_res = None
k = 0
while True and not stop:
k += 1
res = bblfsh[thread_idx].parse(filename, language=LANG, timeout=TIMEOUT)
#if last_res != res.uast:
# log.warning("{}: {}".format(thread_idx, res))
last_res = res.uast
if not k % 10 and k < 1001:
with open(FILEPATH, 'a') as f:
with open(__file__) as f2:
f.write('\n')
f.write(f2.read())
start_time = time.time()
pool = [threading.Thread(target=thread_loop, args=(i, FILEPATH),
name=str(i))
for i in range(NUM_THREADS)]
for thread in pool:
thread.start()
time.sleep(STOP_TIME)
stop = True
for thread in pool:
thread.join()
import logging
import os
from ast2vec import install_enry, ensure_bblfsh_is_running_noexc
from ast2vec.__main__ import setup_logging
from ast2vec.repo2.source import Repo2SourceTransformer
from collections import defaultdict
bblfsh_endpoint = "172.17.0.1:9434"
BASE_DIR = "/storage/konstantin/"
ENRY_PATH = os.path.join(BASE_DIR, "enry")
SOURCES_DIR = os.path.join(BASE_DIR, "data/sources/")
num_processes = 2
REPOS_DIR = os.path.join(BASE_DIR, "data/repos/")
linguist = ENRY_PATH
timeout = 120
log_level = "WARNING"
def subdirs_name(dir):
return [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
def subdirs(dir):
return [os.path.join(dir, d) for d in subdirs_name(dir)]
# preparations
setup_logging(level=log_level)
log = logging.getLogger("dependency_filter")
libnames = ['matplotlib']
log.info("Libs to process: {}".format(', '.join(libnames)))
r2cc = Repo2SourceTransformer(timeout=timeout, log_level=log_level,
linguist=linguist, bblfsh_endpoint=bblfsh_endpoint,
organize_files=1, overwrite_existing=False)
for libname in libnames:
log.warning("Start to process {} lib".format(libname))
repos_dir = REPOS_DIR + libname
sources_folder = SOURCES_DIR + libname
repos_path = subdirs(repos_dir)
log.warning("{} reps found in {}".format(len(repos_path), repos_dir))
# repos -> code&uast
repos = []
for repo_dir in os.listdir(repos_dir):
repos.append(os.path.join(repos_dir, repo_dir))
r2cc.transform(repos, output=sources_folder, num_processes=num_processes)
log.warning("Finish to process {} lib. {} model files in {}".format(
libname, len(os.listdir(sources_folder)), sources_folder))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment