spookyahell/decoding.html Secret

## decoding.html
<!DOCTYPE html>
<html>
<head>
  <title>Well... Here we are.</title>
</head>
<body>
  <h1>Move along, nothing to see here. - Scram!</h1>
  <!--You're gonna wanna use a certain file for decoding the emoji-html-data. -->
  <!--This file does not exist yet. (I didn't make it yet) -->
  <!--So if you want access to the decoded content, you're gonna have to get creative. Sucks to be you.-->
  <!--At a later point there might be such a file, right now there is not.-->
  <!--This is not the type of storage format you'll wanna use if you don't have it to spare.-->
  <!--266 MB input creates a massive total of 3.64 GB HTML content with a shit ton of emojis.-->
  <!--Optionally place a name here -  OPTIONALLY! - Might make sense... idk... -->
  <!--Password encoded RAR archives are an option, so the content can be additionally protected-->
  <!--IA crawled 16 urls while the settings were like this... well...
  <!--only gave it "a few parts" (a little over 100) last time and it crawled ALL of them-->
  <!--If it only crawls ~11 MB every time, we'd need to crawl a lot of URLs to crawl all the emojis-->
  <!--We can assume their bot doesn't accept having over 2.2k assets in the HTML document-->
  <!--Made the files somewhat smaller, now it saved a total of 24 files/parts -->
  <!--I must have gotten lucky the first time because I tested with a 11 MB asset -->
  <!--IA will store up to 143 MB or something, if files are small enough -->
  <!--I mean if they crawl 24 MB, we just need to make 125 crawl requests -->
  <!--Could also check if just listing less, gets you somewhere -->
</body>
</html>

## encode_to_emoji.py
'''Hiding any kind of binary data "in plain sight" should be fairly easy with this; Dear NSA... please: Have some fun with this."'''
from copy import copy

path_to_encode = 'blob'

try:
	# Technically we don't need this part right now, however...
	#~ let's just leave it for nostalgic purposes,
	#~ also we may re-implement it soon... cause especially for encoding large data amounts,
	#~ having the ability to remove already encodes bytes
	#~ (before encoding the next ones) will be greatly appreciated, I'm sure.
	with open('encoding_position') as f:
		seek_to = int(f.read())
except FileNotFoundError:
	seek_to = 0

emoji_lookup = {
	'1':'😂', '2':'😘', '3':'❤️',
	'4':'😍', '5':'😊', '6':'😁',
	'7':'👍', '8':'☺️', '9':'😔',
	'0':'😄'
	}


def create_index(parts):
	base_html = '''<!DOCTYPE html>
<html>

<head>
    <title>Mom and dad arguing: Day 1</title>
</head>

<body>
	<h1>Mom and Dad arguing</h1>
	<h2>for a long time</h2>
	<!--An boring comment: decoding (is a thing we do). (But only if we want to know what's really in this) html-->

	<!--2MB parts so that they will still be accepted.-->
    {0}
</body>
</html>'''

	frame_tag_list = []
	for i in range(1,parts+1):
		frame_tag_list.append('\t<h3>part {0}</h3>\n\t<iframe src="part-{0}.html" height="600" width="100%">Unsupported.</iframe>\n\n'.format(i))

	html_out = base_html.format('\n'.join(frame_tag_list))
	with open('index.html', 'w') as f:
		f.write(html_out)


parts = 0
while True:
	#~ print('iteration starts here')

	f = open(path_to_encode, 'rb')
	f.seek(seek_to)

	encoded_lines = []
	bpl = 20
	file_ended = False
	lines_in_file = 8000
	chunk_size = bpl*lines_in_file
	for i in range(0,lines_in_file):
		data_chunk = f.read(bpl)
		if data_chunk == b'':
			file_ended = True
			break
		bytes_line = [str(f) for f in data_chunk]
		#~ print(bytes_line)

		by_reprs = []
		for by in bytes_line:
			by_reprs.append(''.join([emoji_lookup[byt] for byt in by]))

		encoded_lines.append('-'.join(by_reprs))


	parts += 1
	part_fname = 'part-{0}.html'.format(parts)

	if encoded_lines == []:
		pass # Maybe we wanna do something to prevent overwriting perfectly good lines, but eh...

	with open(part_fname, 'w', encoding = 'utf-8-sig') as f:
		f.write('<!DOCTYPE html>\n<html>\n\n <body>\n')
		for idx, line in enumerate(encoded_lines):
			if idx % 2 == 1:
				person = 'Mom'
			else:
				person = 'Dad'
			f.write('  <p><strong>{0}:</strong>{1}</p>\n'.format(person, line))
		f.write(' </body>\n</html>')
	print('File written: {0!r}'.format(part_fname))

	new_position = seek_to + chunk_size
	#~ print('Position:', new_position/chunk_size)

	if file_ended:
		print('Looks like the file we want encoded has come to an end.')
		create_index(parts)
		break

	seek_to = copy(new_position)

# Save encoding position for next time
#~ with open('encoding_position', 'w') as posf:
	#~ posf.write(str(new_position))
	<!DOCTYPE html>
	<html>
	<head>
	<title>Well... Here we are.</title>
	</head>
	<body>
	<h1>Move along, nothing to see here. - Scram!</h1>
	<!--You're gonna wanna use a certain file for decoding the emoji-html-data. -->
	<!--This file does not exist yet. (I didn't make it yet) -->
	<!--So if you want access to the decoded content, you're gonna have to get creative. Sucks to be you.-->
	<!--At a later point there might be such a file, right now there is not.-->
	<!--This is not the type of storage format you'll wanna use if you don't have it to spare.-->
	<!--266 MB input creates a massive total of 3.64 GB HTML content with a shit ton of emojis.-->
	<!--Optionally place a name here - OPTIONALLY! - Might make sense... idk... -->
	<!--Password encoded RAR archives are an option, so the content can be additionally protected-->
	<!--IA crawled 16 urls while the settings were like this... well...
	<!--only gave it "a few parts" (a little over 100) last time and it crawled ALL of them-->
	<!--If it only crawls ~11 MB every time, we'd need to crawl a lot of URLs to crawl all the emojis-->
	<!--We can assume their bot doesn't accept having over 2.2k assets in the HTML document-->
	<!--Made the files somewhat smaller, now it saved a total of 24 files/parts -->
	<!--I must have gotten lucky the first time because I tested with a 11 MB asset -->
	<!--IA will store up to 143 MB or something, if files are small enough -->
	<!--I mean if they crawl 24 MB, we just need to make 125 crawl requests -->
	<!--Could also check if just listing less, gets you somewhere -->
	</body>
	</html>
	'''Hiding any kind of binary data "in plain sight" should be fairly easy with this; Dear NSA... please: Have some fun with this."'''
	from copy import copy

	path_to_encode = 'blob'

	try:
	# Technically we don't need this part right now, however...
	#~ let's just leave it for nostalgic purposes,
	#~ also we may re-implement it soon... cause especially for encoding large data amounts,
	#~ having the ability to remove already encodes bytes
	#~ (before encoding the next ones) will be greatly appreciated, I'm sure.
	with open('encoding_position') as f:
	seek_to = int(f.read())
	except FileNotFoundError:
	seek_to = 0

	emoji_lookup = {
	'1':'😂', '2':'😘', '3':'❤️',
	'4':'😍', '5':'😊', '6':'😁',
	'7':'👍', '8':'☺️', '9':'😔',
	'0':'😄'
	}


	def create_index(parts):
	base_html = '''<!DOCTYPE html>
	<html>

	<head>
	<title>Mom and dad arguing: Day 1</title>
	</head>

	<body>
	<h1>Mom and Dad arguing</h1>
	<h2>for a long time</h2>
	<!--An boring comment: decoding (is a thing we do). (But only if we want to know what's really in this) html-->

	<!--2MB parts so that they will still be accepted.-->
	{0}
	</body>
	</html>'''

	frame_tag_list = []
	for i in range(1,parts+1):
	frame_tag_list.append('\t<h3>part {0}</h3>\n\t<iframe src="part-{0}.html" height="600" width="100%">Unsupported.</iframe>\n\n'.format(i))

	html_out = base_html.format('\n'.join(frame_tag_list))
	with open('index.html', 'w') as f:
	f.write(html_out)


	parts = 0
	while True:
	#~ print('iteration starts here')

	f = open(path_to_encode, 'rb')
	f.seek(seek_to)

	encoded_lines = []
	bpl = 20
	file_ended = False
	lines_in_file = 8000
	chunk_size = bpl*lines_in_file
	for i in range(0,lines_in_file):
	data_chunk = f.read(bpl)
	if data_chunk == b'':
	file_ended = True
	break
	bytes_line = [str(f) for f in data_chunk]
	#~ print(bytes_line)

	by_reprs = []
	for by in bytes_line:
	by_reprs.append(''.join([emoji_lookup[byt] for byt in by]))

	encoded_lines.append('-'.join(by_reprs))


	parts += 1
	part_fname = 'part-{0}.html'.format(parts)

	if encoded_lines == []:
	pass # Maybe we wanna do something to prevent overwriting perfectly good lines, but eh...

	with open(part_fname, 'w', encoding = 'utf-8-sig') as f:
	f.write('<!DOCTYPE html>\n<html>\n\n <body>\n')
	for idx, line in enumerate(encoded_lines):
	if idx % 2 == 1:
	person = 'Mom'
	else:
	person = 'Dad'
	f.write(' <p><strong>{0}:</strong>{1}</p>\n'.format(person, line))
	f.write(' </body>\n</html>')
	print('File written: {0!r}'.format(part_fname))

	new_position = seek_to + chunk_size
	#~ print('Position:', new_position/chunk_size)

	if file_ended:
	print('Looks like the file we want encoded has come to an end.')
	create_index(parts)
	break

	seek_to = copy(new_position)

	# Save encoding position for next time
	#~ with open('encoding_position', 'w') as posf:
	#~ posf.write(str(new_position))