Skip to content

Instantly share code, notes, and snippets.

@gugray
Created November 28, 2017 20:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gugray/af3ac3b53ec0368860943c6675551caf to your computer and use it in GitHub Desktop.
Save gugray/af3ac3b53ec0368860943c6675551caf to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
namespace CE.History
{
class Pass02
{
static List<ChangeItem> changes = new List<ChangeItem>();
static Dictionary<string, int> simpToRank = new Dictionary<string, int>();
static void readChanges()
{
using (StreamReader sr = new StreamReader("10-history.txt"))
{
ChangeItem ci;
while ((ci = ChangeItem.Read(sr)) != null)
{
changes.Add(ci);
}
}
}
static void readRanks()
{
using (StreamReader sr = new StreamReader("subtlex-ch.txt"))
{
string line;
int rank = 0;
while ((line = sr.ReadLine()) != null)
{
string[] parts = line.Split('\t');
if (parts.Length != 2) continue;
simpToRank[parts[0]] = rank;
++rank;
}
}
}
private class MonthInfo
{
public int Year;
public int Month;
public int SizeAtEnd = 0;
public int Additions = 0;
public int Deletions = 0;
}
static int getMonthIX(DateTime dt)
{
DateTime dtFirst = changes[0].When;
int res = 12 * (dt.Year - dtFirst.Year) + (dt.Month - dtFirst.Month);
return res;
}
static void calcMonthly()
{
DateTime dtFirst = changes[0].When;
DateTime dtLast = changes[changes.Count - 1].When;
int mcount = 12 * (dtLast.Year - dtFirst.Year) + (dtLast.Month - dtFirst.Month) + 1;
MonthInfo[] minfos = new MonthInfo[mcount];
int year = dtFirst.Year;
int month = dtFirst.Month;
for (int i = 0; i != minfos.Length; ++i)
{
minfos[i] = new MonthInfo { Year = year, Month = month };
++month;
if (month == 13) { ++year; month = 1; }
}
HashSet<string> heads = new HashSet<string>();
int mix = 0;
int madds = 0;
int mdels = 0;
foreach (var ci in changes)
{
// New month? File info.
int currMix = getMonthIX(ci.When);
if (currMix != mix)
{
minfos[mix].SizeAtEnd = heads.Count;
minfos[mix].Additions = madds;
minfos[mix].Deletions = mdels;
mix = currMix;
madds = mdels = 0;
}
foreach (var cc in ci.Changes)
{
// Keep track of dictionary size and changes
if (cc.Delete)
{
heads.Remove(cc.Head);
++mdels;
}
else
{
heads.Add(cc.Head);
++madds;
}
}
}
minfos[mix].SizeAtEnd = heads.Count;
minfos[mix].Additions = madds;
minfos[mix].Deletions = mdels;
for (int i = 1; i < minfos.Length; ++i)
if (minfos[i].SizeAtEnd == 0) minfos[i].SizeAtEnd = minfos[i - 1].SizeAtEnd;
using (StreamWriter sw = new StreamWriter("20-month-counts.txt"))
{
sw.NewLine = "\n";
sw.WriteLine("year_month\tsize_at_end\tgrowth\tadditions\tdeletions");
for (int i = 0; i < minfos.Length; ++i)
{
MonthInfo mi = minfos[i];
sw.Write(mi.Year + "-" + mi.Month.ToString("00"));
sw.Write("\t" + mi.SizeAtEnd);
int growth = mi.SizeAtEnd;
if (i > 0) growth -= minfos[i - 1].SizeAtEnd;
sw.Write("\t" + growth);
sw.Write("\t" + mi.Additions + "\t" + (-mi.Deletions).ToString());
sw.WriteLine();
}
}
}
static int getQuarterIX(DateTime dt)
{
int yDiff = dt.Year - 2007;
int dtQ = (dt.Month - 1) / 3;
return yDiff * 4 + dtQ;
}
private class QuartInfo
{
public int Year;
public int Quarter;
public HashSet<string> Editors = new HashSet<string>();
public HashSet<string> Contributors = new HashSet<string>();
public int EditorChanges = 0;
public int EditorCSets = 0;
public int ContributorChanges = 0;
public int ContributorCSets = 0;
}
private class EditorLife
{
public int FirstQIx;
public int LastQIx;
public int CSets = 0;
}
private class ContributorLife
{
public bool[] Quarters;
}
static void calcQuarterly()
{
int qcount = getQuarterIX(changes[changes.Count - 1].When) + 1;
QuartInfo[] qinfos = new QuartInfo[qcount];
int year = 2007;
int quarter = 0;
for (int i = 0; i != qinfos.Length; ++i)
{
qinfos[i] = new QuartInfo { Year = year, Quarter = quarter };
++quarter;
if (quarter == 4) { quarter = 0; ++year; }
}
HashSet<string> editors = new HashSet<string>();
Dictionary<string, EditorLife> elives = new Dictionary<string, EditorLife>();
Dictionary<string, ContributorLife> clives = new Dictionary<string, ContributorLife>();
foreach (var ci in changes)
{
int currQix = getQuarterIX(ci.When);
editors.Add(ci.Editor);
bool isEditor = true;
if (ci.Submitter != "" && !editors.Contains(ci.Submitter))
{
qinfos[currQix].Contributors.Add(ci.Submitter);
isEditor = false;
}
if (isEditor)
{
++qinfos[currQix].EditorCSets;
EditorLife el;
if (!elives.ContainsKey(ci.Editor)) { el = new EditorLife { FirstQIx = currQix, LastQIx = currQix }; elives[ci.Editor] = el; }
else el = elives[ci.Editor];
if (currQix > el.LastQIx) el.LastQIx = currQix;
++el.CSets;
}
else if (ci.Submitter != "")
{
++qinfos[currQix].ContributorCSets;
ContributorLife cl;
if (!clives.ContainsKey(ci.Submitter)) { cl = new ContributorLife { Quarters = new bool[qinfos.Length] }; clives[ci.Submitter] = cl; }
else cl = clives[ci.Submitter];
cl.Quarters[currQix] = true;
}
foreach (var cc in ci.Changes)
{
qinfos[currQix].Editors.Add(ci.Editor);
if (isEditor) ++qinfos[currQix].EditorChanges;
else ++qinfos[currQix].ContributorChanges;
}
}
using (StreamWriter sw = new StreamWriter("21-quarter-counts.txt"))
{
sw.NewLine = "\n";
sw.WriteLine("year_quarter\tactive_editors\teditor_changes\teditor_csets\tactive_contributors\tcontributor_changes\tcontributor_csets");
for (int i = 0; i < qinfos.Length; ++i)
{
QuartInfo qi = qinfos[i];
sw.Write(qi.Year + "Q" + (qi.Quarter + 1));
sw.Write("\t");
sw.Write(qi.Editors.Count.ToString());
sw.Write("\t");
sw.Write(qi.EditorChanges.ToString());
sw.Write("\t");
sw.Write(qi.EditorCSets.ToString());
sw.Write("\t");
sw.Write(qi.Contributors.Count.ToString());
sw.Write("\t");
sw.Write(qi.ContributorChanges.ToString());
sw.Write("\t");
sw.Write(qi.ContributorCSets.ToString());
sw.WriteLine();
}
}
using (StreamWriter sw = new StreamWriter("22-editors-quarters.txt"))
{
sw.NewLine = "\n";
sw.Write("editor\tcsets");
for (int i = 0; i < qinfos.Length; ++i)
sw.Write("\t" + qinfos[i].Year + "Q" + (qinfos[i].Quarter + 1));
sw.WriteLine();
foreach (var x in elives)
{
sw.Write(x.Key);
sw.Write("\t" + x.Value.CSets);
for (int i = 0; i < qinfos.Length; ++i)
sw.Write("\t" + (i >= x.Value.FirstQIx && i <= x.Value.LastQIx ? "1" : "0"));
sw.WriteLine();
}
}
using (StreamWriter sw = new StreamWriter("22-contributors-quarters.txt"))
{
sw.NewLine = "\n";
sw.Write("contributor");
for (int i = 0; i < qinfos.Length; ++i)
sw.Write("\t" + qinfos[i].Year + "Q" + (qinfos[i].Quarter + 1));
sw.WriteLine();
foreach (var x in clives)
{
sw.Write(x.Key);
for (int i = 0; i < x.Value.Quarters.Length; ++i)
sw.Write("\t" + (x.Value.Quarters[i] ? "1" : "0"));
sw.WriteLine();
}
}
}
private class QuartHisto
{
public int Year;
public int Quarter;
public List<string> Simps = new List<string>();
public int[] RankBuckets = new int[100];
public int BeyondRanks = 0;
}
static void getRankHisto(List<string> simps, int[] buckets, ref int beyondRanks)
{
foreach (string simp in simps)
{
if (!simpToRank.ContainsKey(simp)) { ++beyondRanks; continue; }
int rank = simpToRank[simp];
++buckets[rank / 1000];
}
}
static Regex reHead = new Regex(@"^[^ ]+ ([^ ]+) \[[^\]]+\]");
static void calcQuarterlyHistograms()
{
int qcount = getQuarterIX(changes[changes.Count - 1].When) + 1;
QuartHisto[] qinfos = new QuartHisto[qcount];
int year = 2007;
int quarter = 0;
for (int i = 0; i != qinfos.Length; ++i)
{
qinfos[i] = new QuartHisto { Year = year, Quarter = quarter };
++quarter;
if (quarter == 4) { quarter = 0; ++year; }
}
HashSet<string> simps = new HashSet<string>();
int mix = 0;
foreach (var ci in changes)
{
int currQix = getQuarterIX(ci.When);
if (currQix != mix)
{
qinfos[mix].Simps.AddRange(simps);
mix = currQix;
}
foreach (var cc in ci.Changes)
{
Match m = reHead.Match(cc.Head);
string simp = m.Groups[1].Value;
if (cc.Delete) simps.Remove(simp);
else simps.Add(simp);
}
}
qinfos[mix].Simps.AddRange(simps);
foreach (var qi in qinfos) getRankHisto(qi.Simps, qi.RankBuckets, ref qi.BeyondRanks);
using (StreamWriter sw = new StreamWriter("23-quarter-histograms.txt"))
{
sw.NewLine = "\n";
foreach (QuartHisto qi in qinfos)
{
sw.Write(qi.Year + "Q" + (qi.Quarter + 1));
sw.Write("\t");
sw.Write(qi.Simps.Count.ToString());
sw.Write("\t");
sw.Write(qi.BeyondRanks.ToString());
for (int i = 0; i != qi.RankBuckets.Length; ++i)
{
sw.Write("\t");
sw.Write(qi.RankBuckets[i].ToString());
}
sw.WriteLine();
}
}
}
static void calcContribs()
{
HashSet<string> editors = new HashSet<string>();
Dictionary<string, int> personToContribs = new Dictionary<string, int>();
foreach (var ci in changes)
{
editors.Add(ci.Editor);
if (ci.Submitter == "") continue;
if (!personToContribs.ContainsKey(ci.Submitter))
personToContribs[ci.Submitter] = 0;
personToContribs[ci.Submitter] += ci.Changes.Count;
}
List<string> ordered = new List<string>();
foreach (var x in personToContribs)
if (x.Value > 0)
ordered.Add(x.Key);
ordered.Sort((x, y) => personToContribs[y].CompareTo(personToContribs[x]));
using (StreamWriter sw = new StreamWriter("24-contrib-ranks.txt"))
{
sw.NewLine = "\n";
for (int i = 0; i != ordered.Count; ++i)
{
sw.Write(ordered[i]);
sw.Write("\t");
sw.Write((i + 1).ToString());
sw.Write("\t");
sw.Write(personToContribs[ordered[i]]);
sw.WriteLine();
}
}
// Same, but excluding editors
ordered.Clear();
foreach (var x in personToContribs)
if (x.Value > 0 && !editors.Contains(x.Key))
ordered.Add(x.Key);
ordered.Sort((x, y) => personToContribs[y].CompareTo(personToContribs[x]));
using (StreamWriter sw = new StreamWriter("24-contrib-ranks-noneditors.txt"))
{
sw.NewLine = "\n";
for (int i = 0; i != ordered.Count; ++i)
{
sw.Write(ordered[i]);
sw.Write("\t");
sw.Write((i + 1).ToString());
sw.Write("\t");
sw.Write(personToContribs[ordered[i]]);
sw.WriteLine();
}
}
}
public static void Run()
{
readChanges();
readRanks();
calcMonthly();
calcQuarterly();
calcQuarterlyHistograms();
calcContribs();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment