Created
March 31, 2023 08:10
-
-
Save diitaz93/8e224be203384380624ef030adf8aaed to your computer and use it in GitHub Desktop.
Statistical mechanics approach to model probability of merge conflict in collaborative project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import argparse | |
from math import comb, ceil | |
from matplotlib import pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
parser = argparse.ArgumentParser(description=" ") | |
parser.add_argument('total_files', type=int, help="Total number of files in repository.") | |
parser.add_argument('files_per_merge', type=int, help="Average number of files per merge.") | |
parser.add_argument('merges_per_day', type=int, help="Average number of merges per day.") | |
args = parser.parse_args() | |
total_files: int = args.total_files | |
avg_files_per_merge: int = args.files_per_merge | |
avg_merges_per_day: int = args.merges_per_day | |
total_microstates: int = comb(total_files,avg_files_per_merge) | |
# Phase 1: Expected number of modified files after 1 merge | |
# Calculates the probability that the files modified in a merge are the same as the already modified | |
def get_touched_files_after_one_merge(F: int, f: int, f_0: int, verbose: bool = False) -> int: | |
f_l: int = F-f_0 | |
touched = f_0 | |
total_probability = 0 | |
if verbose: | |
print ("Total files: {}, initially modified files: {}, files modified per iteration: {}, initially untouched files: {}."\ | |
.format(F,f_0,f,f_l)) | |
# Iterates over the number of newly modified files that were possibly not previously modified | |
for i in range(f+1): | |
n_files = (f_0+f-i) | |
microstates = comb(f_l,f-i)*comb(f_0,i) | |
probability = microstates/total_microstates | |
total_probability = total_probability + probability | |
touched = touched + (n_files-f_0)*probability | |
if verbose: | |
print("In iteration {}, {} files are modified with probability {}."\ | |
.format(i,n_files,probability)) | |
print("The partial expected number of touched files is {}.".format(touched)) | |
print("Total probability {}.".format(total_probability)) | |
return ceil(touched) | |
# Phase 3: Expected value after 1 day of merges | |
def get_touched_files_after_one_day(F: int, f: int, f_0: int, merges: int): | |
touched_files = f_0 | |
for n in range(merges): | |
touched_files = get_touched_files_after_one_merge(F = F, | |
f = f, | |
f_0=touched_files, | |
verbose=False) | |
return touched_files | |
# Phase 4: Probability of conflict vs. time | |
n_days: int = 50 | |
days = np.arange(n_days) | |
files_per_day = np.zeros_like(days,dtype=int) | |
prob_per_day = np.zeros_like(days,dtype=float) | |
f_0 = avg_files_per_merge | |
for i in range(n_days): | |
files_per_day[i] = get_touched_files_after_one_day(F=total_files, | |
f=avg_files_per_merge, | |
f_0=f_0, | |
merges=avg_merges_per_day) | |
f_0 = files_per_day[i] | |
prob_per_day[i] = 100*f_0/total_files | |
fig,ax = plt.subplots(1,1,figsize=[10,8]) | |
ax.plot(days,prob_per_day) | |
ax.set_yticks([10,20,30,40,50,60,70,80,90,100]) | |
ax.set_title("Probability of conflict") | |
ax.set_xlabel("Days") | |
fig.savefig("prob.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment