Last active
January 13, 2017 19:06
-
-
Save doyousketch2/8c1d07923554c78ed888b089e45536e2 to your computer and use it in GitHub Desktop.
Python app to extract and parse JSON data from Scratch .sb or .sb2 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
##============================================== | |
## ScratchJsonExtract.py | |
## | |
## Eli Innis | |
## Twitter: @Doyousketch2 | |
## Email: Doyousketch2 @ yahoo.com | |
## | |
## 20 Dec, 2016 -- 13 Jan, 2017 | |
## GNU GPLv3 gnu.org/licenses/gpl-3.0.html | |
## | |
##============================================== | |
## libs ##------------------------------------ | |
import easygui as eg ## EasyGUI lib | |
import json ## convert project.json file into python dicts and lists | |
import os ## file procedures | |
import subprocess as sp ## run and get output from md5sum | |
import sys ## used to check commandline args & exit | |
import zipfile ## for openining zipped .sb & .sb2 files | |
##============================================== | |
## vars ##------------------------------------ | |
title = 'ScratchJsonExtract.py' | |
message = 'Open Scratch file to parse' | |
Msg = 'Parsed JSON file from: ' | |
cloudMsg = '\n\n(CloudStorage ignored.\nSome features may not function as intended.)' | |
filetypes = [['*.sb', 'Scratch v1 files'], ['*.sb', '*.sb2', 'All Scratch Files'], ['*.*', 'All files']] | |
defaultOpenDir = '~/Documents/Scratch Projects/*.sb2' | |
subdir = 'ScratchObjects-' | |
##============================================== | |
## global placeholders - Do not change below. | |
md5s = [] ## to hold a list of all the calculated md5's in the current working directory | |
text = [] ## to hold the text we gather | |
cloud = False ## used to check if CloudStorage is requested | |
args = 0 ## running tally of how many arguements the current command uses | |
listEnd = 0 ## are we nearing the end of a list? if so, we'll need an extra newline | |
listContent = 0 ## are we printing out the contents of a list? if so, we can skip x, y, width, & height | |
## no args needed for these commands: | |
command = ['whenGreenFlag', 'bounceOffEdge', 'show', 'hide', 'nextCostume', 'filterReset', 'comeToFront', 'stopAllSounds', | |
'clearPenTrails', 'stampCostume', 'putPenDown', 'putPenUp', 'whenClicked', 'whenCloned', 'deleteClone'] | |
## show variables? | |
var = ['xpos', 'ypos', 'heading', 'costumeIndex', 'sceneName', 'scale', 'tempo', 'volume', 'answer', 'mousePressed', | |
'mouseX', 'mouseY', 'soundLevel', 'timer', 'timerReset', 'timestamp', 'getUserName'] | |
## these commands use 1 argument: | |
arg1 = ['forward:', 'turnRight:', 'turnLeft:', 'heading:', 'pointTowards:', 'gotoSpriteOrMouse', 'changeXposBy:', 'xpos:', | |
'changeYposBy:', 'ypos:', 'setRotationStyle', 'think:', 'say:', 'changeSizeBy:', 'setSizeTo:', 'goBackByLayers:', 'lookLike:', | |
'startScene', 'wait:elapsed:from:', 'playSound:', 'doPlaySoundAndWait', 'rest:elapsed:from:', 'instrument:', 'changeVolumeBy:', | |
'setVolumeTo:', 'changeTempoBy:', 'setTempoTo:', 'wait:elapsed:from:', 'wait:elapsed:from:', 'penColor:', 'changePenHueBy:', | |
'setPenHueTo:', 'changePenShadeBy:', 'setPenShadeTo:', 'changePenSizeBy:', 'penSize:', 'readVariable', 'showVariable:', | |
'hideVariable:', 'contentsOfList:', 'lineCountOfList:', 'showList:', 'hideList:', 'whenKeyPressed', 'whenSceneStarts', | |
'whenIReceive', 'broadcast:', 'doBroadcastAndWait', 'wait:elapsed:from:', 'doForever', 'doWaitUntil', 'stopScripts', 'createCloneOf', | |
'touching:', 'touchingColor:', 'distanceTo:', 'doAsk', 'keyPressed:', 'setVideoState', 'setVideoTransparency', 'timeAndDate', | |
'not', 'stringLength:', 'rounded', 'call'] | |
## these commands use 2 arguments: | |
arg2 = ['gotoX:y:', 'say:duration:elapsed:from:', 'think:duration:elapsed:from:', 'changeGraphicEffect:by:', 'setGraphicEffect:to:', | |
'playDrum', 'noteOn:duration:elapsed:from:', 'setVar:to:', 'changeVar:by:', 'append:toList:', 'deleteLine:ofList:', 'getLine:ofList:', | |
'list:contains:', 'whenSensorGreaterThan', 'doRepeat', 'doIf', 'doUntil', 'color:sees:', 'senseVideoMotion', 'getAttribute:of:', | |
'+', '-', '*', '/', 'randomFrom:to:', '<', '=', '>', '&', '|', 'concatenate:with:', 'letter:of:', '%', 'computeFunction:of:'] | |
## these commands use 3 arguments: | |
arg3 = ['glideSecs:toX:y:elapsed:from:', 'insert:at:ofList:', 'setLine:ofList:to:', 'doIfElse'] | |
''' | |
new = 'procDef' | |
Blah %n %s %b Label1 | |
number1, string1, boolean1, Str | |
1, Str, True | |
run witout refresh Bool | |
''' | |
##============================================== | |
## open file ##------------------------------- | |
try: | |
scratchfile = str(sys .argv[1]) ## sys.argv returns the command, as well as the args. | |
## so we skip element 0, and convert the filename we passed, to a string. | |
if os .path .isfile(scratchfile) == 0: ## check if the file we specified actually exists | |
raise ## if not, raise an exception, and continue down to file open box | |
except: ## if no args were passed, then we open a file dialog box. | |
## fileopenbox(msg=None, title=None, default='*', filetypes=None, multiple=False) | |
scratchfile = eg .fileopenbox(message, title, defaultOpenDir, filetypes) | |
if type(scratchfile) != str: ## if we close the dialog box, the filetype is blank, and isn't a string | |
sys .exit() ## go ahead and close program at this point, because we have no file | |
pathparts = str(scratchfile) .split('/') ## scratchfile includes full path, we'll split the folder names | |
up2filename = pathparts[0:-1] ## grab everything upto the filename. | |
path = '/' .join(up2filename) ## put all those folder pieces together again. | |
name = pathparts[-1] .split('.')[0] ## we don't need file extension, so we'll split and discard at the dot | |
ext = pathparts[-1] .split('.')[-1] ## same as name above, but keep extension instead. | |
ext = '.' + ext ## make sure extension still has dot. | |
with zipfile .ZipFile(scratchfile) as myzip: ## "with" is Python's crash resistant file open | |
myzip .extractall(path + '/' + subdir + name) ## extract contents of zip | |
with open((path + '/' + subdir + name + '/project.json'), 'r') as project: ## "with" is Python's crash resistant file open | |
jay = json .load(project) ## load JSON file into the variable jay | |
##============================================== | |
## compute md5's ##---------------------------- | |
currentDir = (path + '/' + subdir + name) | |
files = os .listdir(currentDir) | |
for f in files: | |
currentFile = (currentDir + '/' + f) | |
ext = f .split('.')[-1] | |
output = sp .Popen(["md5sum", currentFile], stdout = sp .PIPE) ## pipe the output from md5sum | |
answer = output .communicate()[0] ## we only need the first element returned | |
stripped = answer .decode("utf-8") .split()[0] | |
md5s .append(stripped + '.' + ext) | |
filenames = dict(zip(md5s, files)) | |
print(filenames) | |
##============================================== | |
## scan funct ##------------------------------ | |
## In order to use a JSON file, we need to traverse down it's branches. | |
## Modified code that was intended for recursive printing: | |
## code.activestate.com/recipes/578094-recursively-print-nested-dictionaries | |
## 'item' is the item we are currently scanning | |
## 'depth' is how far we are within it, ie this[0] [1] [2] [3] | |
## 'key' is used for dictionaries, so we can get the correct values associated with that particular key | |
## 'scriptLoc' is so we can skip X,Y location of scripts. not needed in Python | |
def scan(item, depth = 0, dictkey = '', scriptLoc = 0): | |
global args, listContent, listEnd, text ## access these global vars within this scan function | |
tabs = lambda n: ' ' * n ## simple spaces will suffice for tabs | |
if isinstance(item, list): ## if we have a list entry | |
for listitem in item: ## iterate through list items | |
if scriptLoc > 0: | |
scan(listitem, depth + 2, '', scriptLoc - 1) ## go even deeper in this list | |
else: | |
scan(listitem, depth + 2) ## go even deeper in this list | |
if listEnd != 0: ## once we get to the end of this list | |
text .append('\n') ## finish up with a newline. | |
listEnd = 0 ## we've reached the end of this list, no need to keep newlining any more list entries. | |
elif isinstance(item, dict): ## if we have a dictionary entry | |
for key, value in sorted(item .items()): ## iterate through keys & values | |
if key == 'hasCloudData' or key == 'isPersistent': ## if we find a possible cloud variable | |
if value is True: ## check if it's True | |
global cloud ## access the global placeholder we had originally set to false | |
cloud = True ## indicate we're in a project that expects the cloud | |
elif isinstance(value, list): ## children, costumes, variables, scripts | |
text .append('\n' + tabs(depth) + ':: List - %s ::' % (key) + '\n') ## write the name | |
if key == 'scripts': | |
scan(value, depth + 2, key, 4) ## we found scripts, no need to post location | |
elif key == 'contents': | |
listContent = 4 ## we can skip 4 key entries: x, y, width, & height for this list | |
listEnd = 1 ## keep track of the fact that we haven't reached the end of this list yet, | |
## it'll need a newline before we're through with it. | |
scan(value, depth + 2, key) ## scan deeper in this list, skipping those 4 keys | |
else: ## just regular list | |
scan(value, depth + 2, key) ## scan deeper | |
elif isinstance(value, dict): ## info or spriteInfo | |
text .append('\n' + tabs(depth) + '## Dict :: %s ##' % (key) + '\n') ## write the name | |
scan(value, depth + 1, key) ## scan deeper in this dict | |
elif listContent > 0: | |
if key == 'x' or key == 'y' or key == 'width' or key == 'height': | |
listContent -= 1 | |
else: ## it's not x, y, width, or height - so we can print it | |
text .append(tabs(depth) + '%s = %s' % (key, value) + '\n') | |
elif key == 'baseLayerMD5' or key == 'penLayerMD5' or key == 'md5': | |
newval = filenames[value] ## convert md5 to filename from the dictionary we created earlier | |
text .append(tabs(depth) + '%s = %s %s' % (key, value, newval) + '\n') ## write its md5 + filename | |
else: ## it's a regular key-pair | |
text .append(tabs(depth) + '%s = %s' % (key, value) + '\n') ## write its key & value | |
else: ## can't go any further down. likely a number, boolean, or empty set. | |
if scriptLoc < 1: ## don't do anything if this is just XY positional script-coordinates. | |
if args > 0: ## keep a running track of how many arguements the current command has. | |
args -= 1 ## decriment counter. | |
else: ## we are at the root command now, it's not an arg... | |
text .append('\n' + tabs(depth)) ## then print a newline, and tab it out | |
for a3 in arg3: ## scan through each item in the command-list that uses 3 args. | |
if a3 == item: args += 3 ## if we have a match, note it. | |
for a2 in arg2: ## do the same thing with command-list that takes 2 args. | |
if a2 == item: args += 2 | |
for a in arg1: ## commands that only take 1 arg. | |
if a == item: args += 1 | |
if item == 'procDef' or item == 'call': ## we'll need to define a function to parse out parameters from NewBlocks | |
text .append('== %s == ' % (item)) ## for now, just append the == NewBlock == | |
else: | |
text .append('%s ' % (item)) ## append the command or arg, with a bit of padding, | |
## so the next one we paste doesn't collide. | |
''' | |
else: | |
text .append(tabs(depth) + '%s == loc' % (item) + '\n') ## write X,Y location of this script | |
## we could test for values that are offscreen here, and fix them, if neccissary. | |
''' | |
##============================================== | |
## main ##------------------------------------ | |
scan(jay) ## begin our scan here | |
if cloud is True: ## add warning that cloud vars won't be used | |
eg .codebox(Msg + name + ext + cloudMsg, title, text) | |
else: ## just use regular textbox, if it's not a Cloud project | |
eg .codebox(Msg + name + ext, title, text) | |
##============================================== | |
## ToDo ##------------------------------------ | |
## parse params from NewBlocks. | |
## variables could be listed as name:value pairs | |
## instead of name: value: on seperate lines. | |
##============================================== | |
## eof ##------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
link to this page - git.io/v1bwj
I may add further capabilities in the future,
such as file replacement, and whatnot.
Oh yeah, I also modded the EasyGui textbox, so it would show more text.
You can find that mod here - https://git.io/v1brh
older version: