Skip to content

Instantly share code, notes, and snippets.

@doyousketch2
Last active January 13, 2017 19:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save doyousketch2/8c1d07923554c78ed888b089e45536e2 to your computer and use it in GitHub Desktop.
Save doyousketch2/8c1d07923554c78ed888b089e45536e2 to your computer and use it in GitHub Desktop.
Python app to extract and parse JSON data from Scratch .sb or .sb2 files
#!/usr/bin/python3
# -*- coding: utf-8 -*-
##==============================================
## ScratchJsonExtract.py
##
## Eli Innis
## Twitter: @Doyousketch2
## Email: Doyousketch2 @ yahoo.com
##
## 20 Dec, 2016 -- 13 Jan, 2017
## GNU GPLv3 gnu.org/licenses/gpl-3.0.html
##
##==============================================
## libs ##------------------------------------
import easygui as eg ## EasyGUI lib
import json ## convert project.json file into python dicts and lists
import os ## file procedures
import subprocess as sp ## run and get output from md5sum
import sys ## used to check commandline args & exit
import zipfile ## for openining zipped .sb & .sb2 files
##==============================================
## vars ##------------------------------------
title = 'ScratchJsonExtract.py'
message = 'Open Scratch file to parse'
Msg = 'Parsed JSON file from: '
cloudMsg = '\n\n(CloudStorage ignored.\nSome features may not function as intended.)'
filetypes = [['*.sb', 'Scratch v1 files'], ['*.sb', '*.sb2', 'All Scratch Files'], ['*.*', 'All files']]
defaultOpenDir = '~/Documents/Scratch Projects/*.sb2'
subdir = 'ScratchObjects-'
##==============================================
## global placeholders - Do not change below.
md5s = [] ## to hold a list of all the calculated md5's in the current working directory
text = [] ## to hold the text we gather
cloud = False ## used to check if CloudStorage is requested
args = 0 ## running tally of how many arguements the current command uses
listEnd = 0 ## are we nearing the end of a list? if so, we'll need an extra newline
listContent = 0 ## are we printing out the contents of a list? if so, we can skip x, y, width, & height
## no args needed for these commands:
command = ['whenGreenFlag', 'bounceOffEdge', 'show', 'hide', 'nextCostume', 'filterReset', 'comeToFront', 'stopAllSounds',
'clearPenTrails', 'stampCostume', 'putPenDown', 'putPenUp', 'whenClicked', 'whenCloned', 'deleteClone']
## show variables?
var = ['xpos', 'ypos', 'heading', 'costumeIndex', 'sceneName', 'scale', 'tempo', 'volume', 'answer', 'mousePressed',
'mouseX', 'mouseY', 'soundLevel', 'timer', 'timerReset', 'timestamp', 'getUserName']
## these commands use 1 argument:
arg1 = ['forward:', 'turnRight:', 'turnLeft:', 'heading:', 'pointTowards:', 'gotoSpriteOrMouse', 'changeXposBy:', 'xpos:',
'changeYposBy:', 'ypos:', 'setRotationStyle', 'think:', 'say:', 'changeSizeBy:', 'setSizeTo:', 'goBackByLayers:', 'lookLike:',
'startScene', 'wait:elapsed:from:', 'playSound:', 'doPlaySoundAndWait', 'rest:elapsed:from:', 'instrument:', 'changeVolumeBy:',
'setVolumeTo:', 'changeTempoBy:', 'setTempoTo:', 'wait:elapsed:from:', 'wait:elapsed:from:', 'penColor:', 'changePenHueBy:',
'setPenHueTo:', 'changePenShadeBy:', 'setPenShadeTo:', 'changePenSizeBy:', 'penSize:', 'readVariable', 'showVariable:',
'hideVariable:', 'contentsOfList:', 'lineCountOfList:', 'showList:', 'hideList:', 'whenKeyPressed', 'whenSceneStarts',
'whenIReceive', 'broadcast:', 'doBroadcastAndWait', 'wait:elapsed:from:', 'doForever', 'doWaitUntil', 'stopScripts', 'createCloneOf',
'touching:', 'touchingColor:', 'distanceTo:', 'doAsk', 'keyPressed:', 'setVideoState', 'setVideoTransparency', 'timeAndDate',
'not', 'stringLength:', 'rounded', 'call']
## these commands use 2 arguments:
arg2 = ['gotoX:y:', 'say:duration:elapsed:from:', 'think:duration:elapsed:from:', 'changeGraphicEffect:by:', 'setGraphicEffect:to:',
'playDrum', 'noteOn:duration:elapsed:from:', 'setVar:to:', 'changeVar:by:', 'append:toList:', 'deleteLine:ofList:', 'getLine:ofList:',
'list:contains:', 'whenSensorGreaterThan', 'doRepeat', 'doIf', 'doUntil', 'color:sees:', 'senseVideoMotion', 'getAttribute:of:',
'+', '-', '*', '/', 'randomFrom:to:', '<', '=', '>', '&', '|', 'concatenate:with:', 'letter:of:', '%', 'computeFunction:of:']
## these commands use 3 arguments:
arg3 = ['glideSecs:toX:y:elapsed:from:', 'insert:at:ofList:', 'setLine:ofList:to:', 'doIfElse']
'''
new = 'procDef'
Blah %n %s %b Label1
number1, string1, boolean1, Str
1, Str, True
run witout refresh Bool
'''
##==============================================
## open file ##-------------------------------
try:
scratchfile = str(sys .argv[1]) ## sys.argv returns the command, as well as the args.
## so we skip element 0, and convert the filename we passed, to a string.
if os .path .isfile(scratchfile) == 0: ## check if the file we specified actually exists
raise ## if not, raise an exception, and continue down to file open box
except: ## if no args were passed, then we open a file dialog box.
## fileopenbox(msg=None, title=None, default='*', filetypes=None, multiple=False)
scratchfile = eg .fileopenbox(message, title, defaultOpenDir, filetypes)
if type(scratchfile) != str: ## if we close the dialog box, the filetype is blank, and isn't a string
sys .exit() ## go ahead and close program at this point, because we have no file
pathparts = str(scratchfile) .split('/') ## scratchfile includes full path, we'll split the folder names
up2filename = pathparts[0:-1] ## grab everything upto the filename.
path = '/' .join(up2filename) ## put all those folder pieces together again.
name = pathparts[-1] .split('.')[0] ## we don't need file extension, so we'll split and discard at the dot
ext = pathparts[-1] .split('.')[-1] ## same as name above, but keep extension instead.
ext = '.' + ext ## make sure extension still has dot.
with zipfile .ZipFile(scratchfile) as myzip: ## "with" is Python's crash resistant file open
myzip .extractall(path + '/' + subdir + name) ## extract contents of zip
with open((path + '/' + subdir + name + '/project.json'), 'r') as project: ## "with" is Python's crash resistant file open
jay = json .load(project) ## load JSON file into the variable jay
##==============================================
## compute md5's ##----------------------------
currentDir = (path + '/' + subdir + name)
files = os .listdir(currentDir)
for f in files:
currentFile = (currentDir + '/' + f)
ext = f .split('.')[-1]
output = sp .Popen(["md5sum", currentFile], stdout = sp .PIPE) ## pipe the output from md5sum
answer = output .communicate()[0] ## we only need the first element returned
stripped = answer .decode("utf-8") .split()[0]
md5s .append(stripped + '.' + ext)
filenames = dict(zip(md5s, files))
print(filenames)
##==============================================
## scan funct ##------------------------------
## In order to use a JSON file, we need to traverse down it's branches.
## Modified code that was intended for recursive printing:
## code.activestate.com/recipes/578094-recursively-print-nested-dictionaries
## 'item' is the item we are currently scanning
## 'depth' is how far we are within it, ie this[0] [1] [2] [3]
## 'key' is used for dictionaries, so we can get the correct values associated with that particular key
## 'scriptLoc' is so we can skip X,Y location of scripts. not needed in Python
def scan(item, depth = 0, dictkey = '', scriptLoc = 0):
global args, listContent, listEnd, text ## access these global vars within this scan function
tabs = lambda n: ' ' * n ## simple spaces will suffice for tabs
if isinstance(item, list): ## if we have a list entry
for listitem in item: ## iterate through list items
if scriptLoc > 0:
scan(listitem, depth + 2, '', scriptLoc - 1) ## go even deeper in this list
else:
scan(listitem, depth + 2) ## go even deeper in this list
if listEnd != 0: ## once we get to the end of this list
text .append('\n') ## finish up with a newline.
listEnd = 0 ## we've reached the end of this list, no need to keep newlining any more list entries.
elif isinstance(item, dict): ## if we have a dictionary entry
for key, value in sorted(item .items()): ## iterate through keys & values
if key == 'hasCloudData' or key == 'isPersistent': ## if we find a possible cloud variable
if value is True: ## check if it's True
global cloud ## access the global placeholder we had originally set to false
cloud = True ## indicate we're in a project that expects the cloud
elif isinstance(value, list): ## children, costumes, variables, scripts
text .append('\n' + tabs(depth) + ':: List - %s ::' % (key) + '\n') ## write the name
if key == 'scripts':
scan(value, depth + 2, key, 4) ## we found scripts, no need to post location
elif key == 'contents':
listContent = 4 ## we can skip 4 key entries: x, y, width, & height for this list
listEnd = 1 ## keep track of the fact that we haven't reached the end of this list yet,
## it'll need a newline before we're through with it.
scan(value, depth + 2, key) ## scan deeper in this list, skipping those 4 keys
else: ## just regular list
scan(value, depth + 2, key) ## scan deeper
elif isinstance(value, dict): ## info or spriteInfo
text .append('\n' + tabs(depth) + '## Dict :: %s ##' % (key) + '\n') ## write the name
scan(value, depth + 1, key) ## scan deeper in this dict
elif listContent > 0:
if key == 'x' or key == 'y' or key == 'width' or key == 'height':
listContent -= 1
else: ## it's not x, y, width, or height - so we can print it
text .append(tabs(depth) + '%s = %s' % (key, value) + '\n')
elif key == 'baseLayerMD5' or key == 'penLayerMD5' or key == 'md5':
newval = filenames[value] ## convert md5 to filename from the dictionary we created earlier
text .append(tabs(depth) + '%s = %s %s' % (key, value, newval) + '\n') ## write its md5 + filename
else: ## it's a regular key-pair
text .append(tabs(depth) + '%s = %s' % (key, value) + '\n') ## write its key & value
else: ## can't go any further down. likely a number, boolean, or empty set.
if scriptLoc < 1: ## don't do anything if this is just XY positional script-coordinates.
if args > 0: ## keep a running track of how many arguements the current command has.
args -= 1 ## decriment counter.
else: ## we are at the root command now, it's not an arg...
text .append('\n' + tabs(depth)) ## then print a newline, and tab it out
for a3 in arg3: ## scan through each item in the command-list that uses 3 args.
if a3 == item: args += 3 ## if we have a match, note it.
for a2 in arg2: ## do the same thing with command-list that takes 2 args.
if a2 == item: args += 2
for a in arg1: ## commands that only take 1 arg.
if a == item: args += 1
if item == 'procDef' or item == 'call': ## we'll need to define a function to parse out parameters from NewBlocks
text .append('== %s == ' % (item)) ## for now, just append the == NewBlock ==
else:
text .append('%s ' % (item)) ## append the command or arg, with a bit of padding,
## so the next one we paste doesn't collide.
'''
else:
text .append(tabs(depth) + '%s == loc' % (item) + '\n') ## write X,Y location of this script
## we could test for values that are offscreen here, and fix them, if neccissary.
'''
##==============================================
## main ##------------------------------------
scan(jay) ## begin our scan here
if cloud is True: ## add warning that cloud vars won't be used
eg .codebox(Msg + name + ext + cloudMsg, title, text)
else: ## just use regular textbox, if it's not a Cloud project
eg .codebox(Msg + name + ext, title, text)
##==============================================
## ToDo ##------------------------------------
## parse params from NewBlocks.
## variables could be listed as name:value pairs
## instead of name: value: on seperate lines.
##==============================================
## eof ##-------------------------------------
@doyousketch2
Copy link
Author

doyousketch2 commented Dec 20, 2016

link to this page - git.io/v1bwj

I may add further capabilities in the future,
such as file replacement, and whatnot.

Oh yeah, I also modded the EasyGui textbox, so it would show more text.
You can find that mod here - https://git.io/v1brh

Here's a Pic

older version:

Here's a Pic

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment