Mikael-Lovqvist/minecrack-log-peek.py

## minecrack-log-peek.py
#Python script for having a peek in a large file
#Tested in python 3.11
#Script character encoding: utf-8 (this is mostly important for the current place_holder for unprintables which you can replace if needed)

#File to open
filename = '/srv/storage/Artifacts/Wiki Data Dumps/enwiktionary-20200220-pages-articles-multistream.xml'

#Start position
offset = 2 << 30	# 2 gigabytes in

#Create list of unprintable bytes
unprintable_bytes = bytes(b for b in range(32) if b not in (9, 10, 13))	#bytes 0 .. 31 except tab, line feed and carriage return

#Place holder character (replace this if your system is having issues rendering this one)
place_holder = '·'

#Open file
with open(filename, 'rb') as infile:
	#Seek to position
	infile.seek(offset)

	#Read 1 megabyte of data at this position
	chunk = infile.read(1 << 20)

	#Filter out unprintable characters
	filtered_chunk = ''.join(place_holder if b in unprintable_bytes else chr(b) for b in chunk)

	#Present result
	print(filtered_chunk)
	#Python script for having a peek in a large file
	#Tested in python 3.11
	#Script character encoding: utf-8 (this is mostly important for the current place_holder for unprintables which you can replace if needed)

	#File to open
	filename = '/srv/storage/Artifacts/Wiki Data Dumps/enwiktionary-20200220-pages-articles-multistream.xml'

	#Start position
	offset = 2 << 30 # 2 gigabytes in

	#Create list of unprintable bytes
	unprintable_bytes = bytes(b for b in range(32) if b not in (9, 10, 13)) #bytes 0 .. 31 except tab, line feed and carriage return

	#Place holder character (replace this if your system is having issues rendering this one)
	place_holder = '·'

	#Open file
	with open(filename, 'rb') as infile:
	#Seek to position
	infile.seek(offset)

	#Read 1 megabyte of data at this position
	chunk = infile.read(1 << 20)

	#Filter out unprintable characters
	filtered_chunk = ''.join(place_holder if b in unprintable_bytes else chr(b) for b in chunk)

	#Present result
	print(filtered_chunk)