Skip to content

Instantly share code, notes, and snippets.

@diitaz93
Created March 31, 2023 08:10
Show Gist options
  • Save diitaz93/8e224be203384380624ef030adf8aaed to your computer and use it in GitHub Desktop.
Save diitaz93/8e224be203384380624ef030adf8aaed to your computer and use it in GitHub Desktop.
Statistical mechanics approach to model probability of merge conflict in collaborative project
#!/usr/bin/env python
# coding: utf-8
import argparse
from math import comb, ceil
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
parser = argparse.ArgumentParser(description=" ")
parser.add_argument('total_files', type=int, help="Total number of files in repository.")
parser.add_argument('files_per_merge', type=int, help="Average number of files per merge.")
parser.add_argument('merges_per_day', type=int, help="Average number of merges per day.")
args = parser.parse_args()
total_files: int = args.total_files
avg_files_per_merge: int = args.files_per_merge
avg_merges_per_day: int = args.merges_per_day
total_microstates: int = comb(total_files,avg_files_per_merge)
# Phase 1: Expected number of modified files after 1 merge
# Calculates the probability that the files modified in a merge are the same as the already modified
def get_touched_files_after_one_merge(F: int, f: int, f_0: int, verbose: bool = False) -> int:
f_l: int = F-f_0
touched = f_0
total_probability = 0
if verbose:
print ("Total files: {}, initially modified files: {}, files modified per iteration: {}, initially untouched files: {}."\
.format(F,f_0,f,f_l))
# Iterates over the number of newly modified files that were possibly not previously modified
for i in range(f+1):
n_files = (f_0+f-i)
microstates = comb(f_l,f-i)*comb(f_0,i)
probability = microstates/total_microstates
total_probability = total_probability + probability
touched = touched + (n_files-f_0)*probability
if verbose:
print("In iteration {}, {} files are modified with probability {}."\
.format(i,n_files,probability))
print("The partial expected number of touched files is {}.".format(touched))
print("Total probability {}.".format(total_probability))
return ceil(touched)
# Phase 3: Expected value after 1 day of merges
def get_touched_files_after_one_day(F: int, f: int, f_0: int, merges: int):
touched_files = f_0
for n in range(merges):
touched_files = get_touched_files_after_one_merge(F = F,
f = f,
f_0=touched_files,
verbose=False)
return touched_files
# Phase 4: Probability of conflict vs. time
n_days: int = 50
days = np.arange(n_days)
files_per_day = np.zeros_like(days,dtype=int)
prob_per_day = np.zeros_like(days,dtype=float)
f_0 = avg_files_per_merge
for i in range(n_days):
files_per_day[i] = get_touched_files_after_one_day(F=total_files,
f=avg_files_per_merge,
f_0=f_0,
merges=avg_merges_per_day)
f_0 = files_per_day[i]
prob_per_day[i] = 100*f_0/total_files
fig,ax = plt.subplots(1,1,figsize=[10,8])
ax.plot(days,prob_per_day)
ax.set_yticks([10,20,30,40,50,60,70,80,90,100])
ax.set_title("Probability of conflict")
ax.set_xlabel("Days")
fig.savefig("prob.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment