Last active
April 26, 2017 08:08
-
-
Save dimnikolos/e3253dd054f437855b0d84896d26cf23 to your computer and use it in GitHub Desktop.
Frequency distribution of processes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import sys | |
csv.field_size_limit(sys.maxsize) | |
#allBlocks.csv is from https://github.com/TUDelftScratchLab/ScratchDataset | |
with open('allBlocks.csv','r') as rawcsv: | |
reader = csv.DictReader(rawcsv,fieldnames =[ | |
'projectID', 'scriptCoordinates', 'scriptIndex', | |
'stageSpriteindication', 'spriteName', 'indentation', | |
'blockIndex', 'blockType', 'parameters']) | |
largeMsgs = [] | |
procDict = {} | |
#for each block | |
for (i,row) in enumerate(reader): | |
#if block is the first block of a process | |
if (row['stageSpriteindication']=='procDef' and row['blockIndex']=='0'): | |
#this project interests us (has process) | |
if row['projectID'] in procDict: | |
#actually row['spriteName'] is the name of the process | |
#if the same process appears many times it must be | |
#in different sprites (can't tell from csv but it's safe to assume) | |
if row['spriteName'] in procDict[row['projectID']]: | |
procDict[row['projectID']][row['spriteName']] += 1 | |
else: | |
procDict[row['projectID']][row['spriteName']] = 1 | |
else: | |
procDict[row['projectID']] = {row['spriteName']:1} | |
freqDict = {} | |
for projectID in procDict: | |
for process in procDict[projectID]: | |
processAppearances = procDict[projectID][process] | |
if processAppearances in freqDict: | |
freqDict[processAppearances] += 1 | |
else: | |
freqDict[processAppearances] = 1 | |
for freq in sorted(freqDict.keys()): | |
print(";".join([str(freq),str(freqDict[freq])])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment