Skip to content

Instantly share code, notes, and snippets.

@nik-hil
Created August 7, 2018 03:39
Show Gist options
  • Save nik-hil/e86ca97030c31c9b413ff014d74b576a to your computer and use it in GitHub Desktop.
Save nik-hil/e86ca97030c31c9b413ff014d74b576a to your computer and use it in GitHub Desktop.
# copied from kaggle microsoft malware classification problem discussion forum
import sys
import os
from math import log, pow
import numpy as np
import scipy as sp
from PIL import Image
import matplotlib.pyplot as plt
def saveimg(array,name):
print( name)
if array.shape[1]!=16:
assert(False)
b_=int((array.shape[0]*16)**(0.5))
b_=int(pow(2, (int(log(b_)/log(2))+1)))
a_=int(array.shape[0]*16/b_)
print (a_,b_,int(a_*b_/16),array.shape)
array=array[:int(a_*b_/16),:]
array=np.reshape(array,(a_,b_))
#print array.shape
im = Image.fromarray(np.uint8(array))
im.save('dataSample/'+name+'.jpg', "JPEG")
files=os.listdir('dataSample')
c=0
for cc,x in enumerate(files):
if '.bytes' != x[-6:]:
continue
print (cc)
f=open('dataSample/'+x)
array=[]
c+=1
for line in f:
xx=line.split()
if len(xx)!=17:
continue
#if xx[1]=='??':
# break
array.append([int(i,16) if i!='??' else 0 for i in xx[1:] ])
saveimg(np.array(array),x)
del array
f.close()
print (c)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment