Skip to content

Instantly share code, notes, and snippets.

@dimnikolos
Last active April 26, 2017 08:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimnikolos/e3253dd054f437855b0d84896d26cf23 to your computer and use it in GitHub Desktop.
Save dimnikolos/e3253dd054f437855b0d84896d26cf23 to your computer and use it in GitHub Desktop.
Frequency distribution of processes
import csv
import sys
csv.field_size_limit(sys.maxsize)
#allBlocks.csv is from https://github.com/TUDelftScratchLab/ScratchDataset
with open('allBlocks.csv','r') as rawcsv:
reader = csv.DictReader(rawcsv,fieldnames =[
'projectID', 'scriptCoordinates', 'scriptIndex',
'stageSpriteindication', 'spriteName', 'indentation',
'blockIndex', 'blockType', 'parameters'])
largeMsgs = []
procDict = {}
#for each block
for (i,row) in enumerate(reader):
#if block is the first block of a process
if (row['stageSpriteindication']=='procDef' and row['blockIndex']=='0'):
#this project interests us (has process)
if row['projectID'] in procDict:
#actually row['spriteName'] is the name of the process
#if the same process appears many times it must be
#in different sprites (can't tell from csv but it's safe to assume)
if row['spriteName'] in procDict[row['projectID']]:
procDict[row['projectID']][row['spriteName']] += 1
else:
procDict[row['projectID']][row['spriteName']] = 1
else:
procDict[row['projectID']] = {row['spriteName']:1}
freqDict = {}
for projectID in procDict:
for process in procDict[projectID]:
processAppearances = procDict[projectID][process]
if processAppearances in freqDict:
freqDict[processAppearances] += 1
else:
freqDict[processAppearances] = 1
for freq in sorted(freqDict.keys()):
print(";".join([str(freq),str(freqDict[freq])]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment