Skip to content

Instantly share code, notes, and snippets.

@Eh2406
Created June 26, 2018 03:36
Show Gist options
  • Save Eh2406/91dc9bed999bf3d1ec64cfb6f98d62c9 to your computer and use it in GitHub Desktop.
Save Eh2406/91dc9bed999bf3d1ec64cfb6f98d62c9 to your computer and use it in GitHub Desktop.
fuzz cargo generate-lockfile
from __future__ import print_function
import os
import subprocess
import time
import json
import csv
from threading import Timer
import pandas as pd
def run(folder, deps, timeout_sec=1.0):
open(os.path.join(folder, 'main.rs'), 'w')
with open(os.path.join(folder, 'Cargo.toml'), 'w') as f:
f.write("""
[package]
name = "cargo_speed_test"
version = "0.1.0"
[lib]
path = "main.rs"
[dependencies]
""")
for name, ver in deps.iteritems():
f.write('{} = "{}"\n'.format(name, ver))
start = time.clock()
with open(os.devnull, "w") as n:
proc = subprocess.Popen(cargo_path + ["generate-lockfile", "-Zno-index-update"],
cwd=folder, stdout=n, stderr=n)
timer = Timer(timeout_sec, proc.kill)
try:
timer.start()
out_code = proc.wait()
finally:
timer.cancel()
out_time = time.clock() - start
out_trans_deps = []
if out_code == 0:
with open(os.path.join(folder, 'Cargo.lock'), 'r') as f:
lock = f.read()
lock = lock.split("[metadata]")[1]
out_trans_deps = sorted(row.split()[1] for row in lock.splitlines() if len(row) > 5)
return out_code, out_time, out_trans_deps
def read_index(folder):
for root, dirnames, filenames in os.walk(folder):
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
for name in filenames:
if name != 'config.json':
with open(os.path.join(root, name)) as f:
for line in f.readlines():
line = json.loads(line)
if line['yanked']:
continue
yield name, line['vers'], len(line['deps'])
def logged_run(deps, log_writer, timeout_sec=1.0):
out = run("temp", deps, timeout_sec)
dumps = json.dumps(deps)
log_writer.writerow([out[0], out[1], dumps, ','.join(out[2])])
print(out[0], out[1])
return out
def save_index():
"""walk the simple file checkout of the index and convert to a csv"""
index = sorted(read_index(index_path))
pd.DataFrame(index, columns=['crate', 'ver', 'num_debs']).to_csv("index.csv")
def save_straight_pass():
"""read the index csv and check that each version builds on its own"""
index = pd.read_csv("index.csv", index_col=0)
with open("straight_pass.csv", "ab") as log_file:
log_writer = csv.writer(log_file)
log_writer.writerow(["code", "time", "dumps", "trans_deps"])
# if num_debs == 0 then the lock file is empty
# if num_debs == 1 then the lock file is redundant with that dep
index = index[index.num_debs > 1]
for _, row in index.iterrows():
logged_run({row.crate: row.ver}, log_writer)
def save_second_pass():
"""read the things that build on its own and check combination
This is to find cases like:
- https://github.com/rust-lang/cargo/issues/4810#issuecomment-357553286
where having two pinned deps courses cargo to hang.
First I was going to try all pairs of deps, but .1 sec * 0.5 (6k ^ 2) is a long time.
Then I was going to do all pairs with overlap in lock files (hence the `trans_deps` coll)
but even that would take years.
Then I realized that as long as the problematic deps are in the list then:
1. something else will unhelpfully cause it to fail fast.
2. it will hang.
so the plan is to add random deps one at a time.
if it fails fast:
-> then it will be hard to learn anything from extensions so back out the one just added.
if it passes fast:
-> add the next one. Hopefully it will have a bad reaction to anyone already in the list.
Thanks to the birthday paradox this will test a lot of combinations fast.
if it takes a long time:
-> it is probably just something random, like antivirus trying to understand what is going on
So retry with a longer time out. (in practice I never hit this.)
"""
straight_pass = pd.read_csv("straight_pass.csv",
converters={
'dumps': json.loads,
'trans_deps': lambda x: set(x.split(","))
})
straight_pass['crate'] = straight_pass.dumps.apply(lambda x: x.keys()[0])
straight_pass['ver'] = straight_pass.dumps.apply(lambda x: x.values()[0])
del straight_pass['dumps']
straight_pass = straight_pass.sort_values(["crate", "ver"])
straight_pass = straight_pass[straight_pass.code == 0]
straight_pass = straight_pass[straight_pass.ver > "0.0.9"]
print (len(straight_pass))
with open("second_pass.csv", "wb") as log_file:
log_writer = csv.writer(log_file)
log_writer.writerow(["code", "time", "dumps", "trans_deps"])
while True:
deta = {}
for _, row1 in straight_pass.sample(1000).iterrows():
print (row1.crate.rjust(20, ' '), len(deta), "\t", end='')
if row1.crate in deta:
print ("already in")
continue
deta[row1.crate] = "=" + row1.ver
out = logged_run(deta, log_writer, 30.0)
if out[0] == 1:
print ("time out!!")
out = logged_run(deta, log_writer, 60.0)
if out[0] == 1:
print ("time out!!")
out = logged_run(deta, log_writer, 120.0)
if out[0] != 0:
del deta[row1.crate]
cargo_path = ["cargo", "+nightly"]
index_path = r"../crates.io-index"
save_index()
save_straight_pass()
save_second_pass()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment