Created
November 28, 2017 20:10
-
-
Save gugray/af3ac3b53ec0368860943c6675551caf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Text.RegularExpressions; | |
namespace CE.History | |
{ | |
class Pass02 | |
{ | |
static List<ChangeItem> changes = new List<ChangeItem>(); | |
static Dictionary<string, int> simpToRank = new Dictionary<string, int>(); | |
static void readChanges() | |
{ | |
using (StreamReader sr = new StreamReader("10-history.txt")) | |
{ | |
ChangeItem ci; | |
while ((ci = ChangeItem.Read(sr)) != null) | |
{ | |
changes.Add(ci); | |
} | |
} | |
} | |
static void readRanks() | |
{ | |
using (StreamReader sr = new StreamReader("subtlex-ch.txt")) | |
{ | |
string line; | |
int rank = 0; | |
while ((line = sr.ReadLine()) != null) | |
{ | |
string[] parts = line.Split('\t'); | |
if (parts.Length != 2) continue; | |
simpToRank[parts[0]] = rank; | |
++rank; | |
} | |
} | |
} | |
private class MonthInfo | |
{ | |
public int Year; | |
public int Month; | |
public int SizeAtEnd = 0; | |
public int Additions = 0; | |
public int Deletions = 0; | |
} | |
static int getMonthIX(DateTime dt) | |
{ | |
DateTime dtFirst = changes[0].When; | |
int res = 12 * (dt.Year - dtFirst.Year) + (dt.Month - dtFirst.Month); | |
return res; | |
} | |
static void calcMonthly() | |
{ | |
DateTime dtFirst = changes[0].When; | |
DateTime dtLast = changes[changes.Count - 1].When; | |
int mcount = 12 * (dtLast.Year - dtFirst.Year) + (dtLast.Month - dtFirst.Month) + 1; | |
MonthInfo[] minfos = new MonthInfo[mcount]; | |
int year = dtFirst.Year; | |
int month = dtFirst.Month; | |
for (int i = 0; i != minfos.Length; ++i) | |
{ | |
minfos[i] = new MonthInfo { Year = year, Month = month }; | |
++month; | |
if (month == 13) { ++year; month = 1; } | |
} | |
HashSet<string> heads = new HashSet<string>(); | |
int mix = 0; | |
int madds = 0; | |
int mdels = 0; | |
foreach (var ci in changes) | |
{ | |
// New month? File info. | |
int currMix = getMonthIX(ci.When); | |
if (currMix != mix) | |
{ | |
minfos[mix].SizeAtEnd = heads.Count; | |
minfos[mix].Additions = madds; | |
minfos[mix].Deletions = mdels; | |
mix = currMix; | |
madds = mdels = 0; | |
} | |
foreach (var cc in ci.Changes) | |
{ | |
// Keep track of dictionary size and changes | |
if (cc.Delete) | |
{ | |
heads.Remove(cc.Head); | |
++mdels; | |
} | |
else | |
{ | |
heads.Add(cc.Head); | |
++madds; | |
} | |
} | |
} | |
minfos[mix].SizeAtEnd = heads.Count; | |
minfos[mix].Additions = madds; | |
minfos[mix].Deletions = mdels; | |
for (int i = 1; i < minfos.Length; ++i) | |
if (minfos[i].SizeAtEnd == 0) minfos[i].SizeAtEnd = minfos[i - 1].SizeAtEnd; | |
using (StreamWriter sw = new StreamWriter("20-month-counts.txt")) | |
{ | |
sw.NewLine = "\n"; | |
sw.WriteLine("year_month\tsize_at_end\tgrowth\tadditions\tdeletions"); | |
for (int i = 0; i < minfos.Length; ++i) | |
{ | |
MonthInfo mi = minfos[i]; | |
sw.Write(mi.Year + "-" + mi.Month.ToString("00")); | |
sw.Write("\t" + mi.SizeAtEnd); | |
int growth = mi.SizeAtEnd; | |
if (i > 0) growth -= minfos[i - 1].SizeAtEnd; | |
sw.Write("\t" + growth); | |
sw.Write("\t" + mi.Additions + "\t" + (-mi.Deletions).ToString()); | |
sw.WriteLine(); | |
} | |
} | |
} | |
static int getQuarterIX(DateTime dt) | |
{ | |
int yDiff = dt.Year - 2007; | |
int dtQ = (dt.Month - 1) / 3; | |
return yDiff * 4 + dtQ; | |
} | |
private class QuartInfo | |
{ | |
public int Year; | |
public int Quarter; | |
public HashSet<string> Editors = new HashSet<string>(); | |
public HashSet<string> Contributors = new HashSet<string>(); | |
public int EditorChanges = 0; | |
public int EditorCSets = 0; | |
public int ContributorChanges = 0; | |
public int ContributorCSets = 0; | |
} | |
private class EditorLife | |
{ | |
public int FirstQIx; | |
public int LastQIx; | |
public int CSets = 0; | |
} | |
private class ContributorLife | |
{ | |
public bool[] Quarters; | |
} | |
static void calcQuarterly() | |
{ | |
int qcount = getQuarterIX(changes[changes.Count - 1].When) + 1; | |
QuartInfo[] qinfos = new QuartInfo[qcount]; | |
int year = 2007; | |
int quarter = 0; | |
for (int i = 0; i != qinfos.Length; ++i) | |
{ | |
qinfos[i] = new QuartInfo { Year = year, Quarter = quarter }; | |
++quarter; | |
if (quarter == 4) { quarter = 0; ++year; } | |
} | |
HashSet<string> editors = new HashSet<string>(); | |
Dictionary<string, EditorLife> elives = new Dictionary<string, EditorLife>(); | |
Dictionary<string, ContributorLife> clives = new Dictionary<string, ContributorLife>(); | |
foreach (var ci in changes) | |
{ | |
int currQix = getQuarterIX(ci.When); | |
editors.Add(ci.Editor); | |
bool isEditor = true; | |
if (ci.Submitter != "" && !editors.Contains(ci.Submitter)) | |
{ | |
qinfos[currQix].Contributors.Add(ci.Submitter); | |
isEditor = false; | |
} | |
if (isEditor) | |
{ | |
++qinfos[currQix].EditorCSets; | |
EditorLife el; | |
if (!elives.ContainsKey(ci.Editor)) { el = new EditorLife { FirstQIx = currQix, LastQIx = currQix }; elives[ci.Editor] = el; } | |
else el = elives[ci.Editor]; | |
if (currQix > el.LastQIx) el.LastQIx = currQix; | |
++el.CSets; | |
} | |
else if (ci.Submitter != "") | |
{ | |
++qinfos[currQix].ContributorCSets; | |
ContributorLife cl; | |
if (!clives.ContainsKey(ci.Submitter)) { cl = new ContributorLife { Quarters = new bool[qinfos.Length] }; clives[ci.Submitter] = cl; } | |
else cl = clives[ci.Submitter]; | |
cl.Quarters[currQix] = true; | |
} | |
foreach (var cc in ci.Changes) | |
{ | |
qinfos[currQix].Editors.Add(ci.Editor); | |
if (isEditor) ++qinfos[currQix].EditorChanges; | |
else ++qinfos[currQix].ContributorChanges; | |
} | |
} | |
using (StreamWriter sw = new StreamWriter("21-quarter-counts.txt")) | |
{ | |
sw.NewLine = "\n"; | |
sw.WriteLine("year_quarter\tactive_editors\teditor_changes\teditor_csets\tactive_contributors\tcontributor_changes\tcontributor_csets"); | |
for (int i = 0; i < qinfos.Length; ++i) | |
{ | |
QuartInfo qi = qinfos[i]; | |
sw.Write(qi.Year + "Q" + (qi.Quarter + 1)); | |
sw.Write("\t"); | |
sw.Write(qi.Editors.Count.ToString()); | |
sw.Write("\t"); | |
sw.Write(qi.EditorChanges.ToString()); | |
sw.Write("\t"); | |
sw.Write(qi.EditorCSets.ToString()); | |
sw.Write("\t"); | |
sw.Write(qi.Contributors.Count.ToString()); | |
sw.Write("\t"); | |
sw.Write(qi.ContributorChanges.ToString()); | |
sw.Write("\t"); | |
sw.Write(qi.ContributorCSets.ToString()); | |
sw.WriteLine(); | |
} | |
} | |
using (StreamWriter sw = new StreamWriter("22-editors-quarters.txt")) | |
{ | |
sw.NewLine = "\n"; | |
sw.Write("editor\tcsets"); | |
for (int i = 0; i < qinfos.Length; ++i) | |
sw.Write("\t" + qinfos[i].Year + "Q" + (qinfos[i].Quarter + 1)); | |
sw.WriteLine(); | |
foreach (var x in elives) | |
{ | |
sw.Write(x.Key); | |
sw.Write("\t" + x.Value.CSets); | |
for (int i = 0; i < qinfos.Length; ++i) | |
sw.Write("\t" + (i >= x.Value.FirstQIx && i <= x.Value.LastQIx ? "1" : "0")); | |
sw.WriteLine(); | |
} | |
} | |
using (StreamWriter sw = new StreamWriter("22-contributors-quarters.txt")) | |
{ | |
sw.NewLine = "\n"; | |
sw.Write("contributor"); | |
for (int i = 0; i < qinfos.Length; ++i) | |
sw.Write("\t" + qinfos[i].Year + "Q" + (qinfos[i].Quarter + 1)); | |
sw.WriteLine(); | |
foreach (var x in clives) | |
{ | |
sw.Write(x.Key); | |
for (int i = 0; i < x.Value.Quarters.Length; ++i) | |
sw.Write("\t" + (x.Value.Quarters[i] ? "1" : "0")); | |
sw.WriteLine(); | |
} | |
} | |
} | |
private class QuartHisto | |
{ | |
public int Year; | |
public int Quarter; | |
public List<string> Simps = new List<string>(); | |
public int[] RankBuckets = new int[100]; | |
public int BeyondRanks = 0; | |
} | |
static void getRankHisto(List<string> simps, int[] buckets, ref int beyondRanks) | |
{ | |
foreach (string simp in simps) | |
{ | |
if (!simpToRank.ContainsKey(simp)) { ++beyondRanks; continue; } | |
int rank = simpToRank[simp]; | |
++buckets[rank / 1000]; | |
} | |
} | |
static Regex reHead = new Regex(@"^[^ ]+ ([^ ]+) \[[^\]]+\]"); | |
static void calcQuarterlyHistograms() | |
{ | |
int qcount = getQuarterIX(changes[changes.Count - 1].When) + 1; | |
QuartHisto[] qinfos = new QuartHisto[qcount]; | |
int year = 2007; | |
int quarter = 0; | |
for (int i = 0; i != qinfos.Length; ++i) | |
{ | |
qinfos[i] = new QuartHisto { Year = year, Quarter = quarter }; | |
++quarter; | |
if (quarter == 4) { quarter = 0; ++year; } | |
} | |
HashSet<string> simps = new HashSet<string>(); | |
int mix = 0; | |
foreach (var ci in changes) | |
{ | |
int currQix = getQuarterIX(ci.When); | |
if (currQix != mix) | |
{ | |
qinfos[mix].Simps.AddRange(simps); | |
mix = currQix; | |
} | |
foreach (var cc in ci.Changes) | |
{ | |
Match m = reHead.Match(cc.Head); | |
string simp = m.Groups[1].Value; | |
if (cc.Delete) simps.Remove(simp); | |
else simps.Add(simp); | |
} | |
} | |
qinfos[mix].Simps.AddRange(simps); | |
foreach (var qi in qinfos) getRankHisto(qi.Simps, qi.RankBuckets, ref qi.BeyondRanks); | |
using (StreamWriter sw = new StreamWriter("23-quarter-histograms.txt")) | |
{ | |
sw.NewLine = "\n"; | |
foreach (QuartHisto qi in qinfos) | |
{ | |
sw.Write(qi.Year + "Q" + (qi.Quarter + 1)); | |
sw.Write("\t"); | |
sw.Write(qi.Simps.Count.ToString()); | |
sw.Write("\t"); | |
sw.Write(qi.BeyondRanks.ToString()); | |
for (int i = 0; i != qi.RankBuckets.Length; ++i) | |
{ | |
sw.Write("\t"); | |
sw.Write(qi.RankBuckets[i].ToString()); | |
} | |
sw.WriteLine(); | |
} | |
} | |
} | |
static void calcContribs() | |
{ | |
HashSet<string> editors = new HashSet<string>(); | |
Dictionary<string, int> personToContribs = new Dictionary<string, int>(); | |
foreach (var ci in changes) | |
{ | |
editors.Add(ci.Editor); | |
if (ci.Submitter == "") continue; | |
if (!personToContribs.ContainsKey(ci.Submitter)) | |
personToContribs[ci.Submitter] = 0; | |
personToContribs[ci.Submitter] += ci.Changes.Count; | |
} | |
List<string> ordered = new List<string>(); | |
foreach (var x in personToContribs) | |
if (x.Value > 0) | |
ordered.Add(x.Key); | |
ordered.Sort((x, y) => personToContribs[y].CompareTo(personToContribs[x])); | |
using (StreamWriter sw = new StreamWriter("24-contrib-ranks.txt")) | |
{ | |
sw.NewLine = "\n"; | |
for (int i = 0; i != ordered.Count; ++i) | |
{ | |
sw.Write(ordered[i]); | |
sw.Write("\t"); | |
sw.Write((i + 1).ToString()); | |
sw.Write("\t"); | |
sw.Write(personToContribs[ordered[i]]); | |
sw.WriteLine(); | |
} | |
} | |
// Same, but excluding editors | |
ordered.Clear(); | |
foreach (var x in personToContribs) | |
if (x.Value > 0 && !editors.Contains(x.Key)) | |
ordered.Add(x.Key); | |
ordered.Sort((x, y) => personToContribs[y].CompareTo(personToContribs[x])); | |
using (StreamWriter sw = new StreamWriter("24-contrib-ranks-noneditors.txt")) | |
{ | |
sw.NewLine = "\n"; | |
for (int i = 0; i != ordered.Count; ++i) | |
{ | |
sw.Write(ordered[i]); | |
sw.Write("\t"); | |
sw.Write((i + 1).ToString()); | |
sw.Write("\t"); | |
sw.Write(personToContribs[ordered[i]]); | |
sw.WriteLine(); | |
} | |
} | |
} | |
public static void Run() | |
{ | |
readChanges(); | |
readRanks(); | |
calcMonthly(); | |
calcQuarterly(); | |
calcQuarterlyHistograms(); | |
calcContribs(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment