-
-
Save zurk/ad464aa73ad244980457dd2f09ff3abd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
import threading | |
import time | |
import logging | |
from bblfsh import BblfshClient | |
BBLFSH_ENDPOINT = "172.17.0.1:9434" | |
#BBLFSH_ENDPOINT = "0.0.0.0:9432" | |
FILEPATH = "bblfsh_hang_ex.py" | |
LANG = "Python" | |
NUM_THREADS = 32 | |
STOP_TIME = 1200 | |
TIMEOUT = 10 | |
with open(FILEPATH, 'w') as f: | |
with open(__file__) as f2: | |
f.write('\n') | |
f.write(f2.read()) | |
bblfsh = [BblfshClient(BBLFSH_ENDPOINT) for _ in range(NUM_THREADS)] | |
stop = False | |
def thread_loop(thread_idx, filename): | |
log = logging.getLogger('hang_test') | |
log.setLevel(logging.INFO) | |
last_res = None | |
k = 0 | |
while True and not stop: | |
k += 1 | |
res = bblfsh[thread_idx].parse(filename, language=LANG, timeout=TIMEOUT) | |
#if last_res != res.uast: | |
# log.warning("{}: {}".format(thread_idx, res)) | |
last_res = res.uast | |
if not k % 10 and k < 1001: | |
with open(FILEPATH, 'a') as f: | |
with open(__file__) as f2: | |
f.write('\n') | |
f.write(f2.read()) | |
start_time = time.time() | |
pool = [threading.Thread(target=thread_loop, args=(i, FILEPATH), | |
name=str(i)) | |
for i in range(NUM_THREADS)] | |
for thread in pool: | |
thread.start() | |
time.sleep(STOP_TIME) | |
stop = True | |
for thread in pool: | |
thread.join() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import os | |
from ast2vec import install_enry, ensure_bblfsh_is_running_noexc | |
from ast2vec.__main__ import setup_logging | |
from ast2vec.repo2.source import Repo2SourceTransformer | |
from collections import defaultdict | |
bblfsh_endpoint = "172.17.0.1:9434" | |
BASE_DIR = "/storage/konstantin/" | |
ENRY_PATH = os.path.join(BASE_DIR, "enry") | |
SOURCES_DIR = os.path.join(BASE_DIR, "data/sources/") | |
num_processes = 2 | |
REPOS_DIR = os.path.join(BASE_DIR, "data/repos/") | |
linguist = ENRY_PATH | |
timeout = 120 | |
log_level = "WARNING" | |
def subdirs_name(dir): | |
return [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] | |
def subdirs(dir): | |
return [os.path.join(dir, d) for d in subdirs_name(dir)] | |
# preparations | |
setup_logging(level=log_level) | |
log = logging.getLogger("dependency_filter") | |
libnames = ['matplotlib'] | |
log.info("Libs to process: {}".format(', '.join(libnames))) | |
r2cc = Repo2SourceTransformer(timeout=timeout, log_level=log_level, | |
linguist=linguist, bblfsh_endpoint=bblfsh_endpoint, | |
organize_files=1, overwrite_existing=False) | |
for libname in libnames: | |
log.warning("Start to process {} lib".format(libname)) | |
repos_dir = REPOS_DIR + libname | |
sources_folder = SOURCES_DIR + libname | |
repos_path = subdirs(repos_dir) | |
log.warning("{} reps found in {}".format(len(repos_path), repos_dir)) | |
# repos -> code&uast | |
repos = [] | |
for repo_dir in os.listdir(repos_dir): | |
repos.append(os.path.join(repos_dir, repo_dir)) | |
r2cc.transform(repos, output=sources_folder, num_processes=num_processes) | |
log.warning("Finish to process {} lib. {} model files in {}".format( | |
libname, len(os.listdir(sources_folder)), sources_folder)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment