sudoaza/decipher.py

## decipher.py
import math

from nltk.util import ngrams

ciphertext = """['|']][]]}[[/]] [{}]{[]}['|'], ]]]]])([](_) {[9}])([['|'] ]]/[[[{}]{[]}['|'] ][['|'] ['|']{[]}]]([]{[[/]] ['|']])([ [{=][{[9}](_)]][}[]{ ])([](_)['|'] ['|'][{}][]{ [}/[}])([[[/]]['|'] [{}]{[]}]][}[[))[]{[}{][]{[[)) [{][]]}[{}][]{]][}[[/]]. [{[{}][]{[]{]][}[[/]], ]]]]])([](_)]][}[[/]] ]{]0]{]]][}00['|'] """

charset = list(set(ciphertext))

occurrences = {s: ciphertext.count(s) for s in charset}
print(sorted(occurrences.items(), key=lambda x: -x[1]))

used_symbols =  [c for c in charset if c not in " ,.0"]
print(used_symbols)

symbols_count = len([s for s in ciphertext if s in used_symbols])
print(symbols_count)

occurrence_rate = {s: float(occurrences[s]) / symbols_count for s in used_symbols }
print(sorted(occurrence_rate.items(), key=lambda x: -x[1]))

ciphertext = ciphertext.replace("['|']","T")

ngram_likelihood = {}
for n in range(2,6):
  for ngram in ngrams(list(ciphertext), n):
    if not all(s in used_symbols for s in ngram):
      continue
    str_ngram = "".join(ngram)
    expected_occurrence_rate = math.prod([occurrence_rate[s] for s in str_ngram])
    actual_occurrence_rate = ciphertext.count( str_ngram ) / len(used_symbols) ** n
    if ciphertext.count( str_ngram ) > 1:
      ngram_likelihood[str_ngram] = actual_occurrence_rate / expected_occurrence_rate

print(sorted(ngram_likelihood.items(), key=lambda x: -x[1]))

print(ciphertext)
	import math

	from nltk.util import ngrams

	ciphertext = """['\|']][]]}[[/]] [{}]{[]}['\|'], ]]]]])([](_) {[9}])([['\|'] ]]/[[[{}]{[]}['\|'] ][['\|'] ['\|']{[]}]]([]{[[/]] ['\|']])([ [{=][{[9}](_)]][}[]{ ])([](_)['\|'] ['\|'][{}][]{ [}/[}])([[[/]]['\|'] [{}]{[]}]][}[[))[]{[}{][]{[[)) [{][]]}[{}][]{]][}[[/]]. [{[{}][]{[]{]][}[[/]], ]]]]])([](_)]][}[[/]] ]{]0]{]]][}00['\|'] """

	charset = list(set(ciphertext))

	occurrences = {s: ciphertext.count(s) for s in charset}
	print(sorted(occurrences.items(), key=lambda x: -x[1]))

	used_symbols = [c for c in charset if c not in " ,.0"]
	print(used_symbols)

	symbols_count = len([s for s in ciphertext if s in used_symbols])
	print(symbols_count)

	occurrence_rate = {s: float(occurrences[s]) / symbols_count for s in used_symbols }
	print(sorted(occurrence_rate.items(), key=lambda x: -x[1]))

	ciphertext = ciphertext.replace("['\|']","T")

	ngram_likelihood = {}
	for n in range(2,6):
	for ngram in ngrams(list(ciphertext), n):
	if not all(s in used_symbols for s in ngram):
	continue
	str_ngram = "".join(ngram)
	expected_occurrence_rate = math.prod([occurrence_rate[s] for s in str_ngram])
	actual_occurrence_rate = ciphertext.count( str_ngram ) / len(used_symbols) ** n
	if ciphertext.count( str_ngram ) > 1:
	ngram_likelihood[str_ngram] = actual_occurrence_rate / expected_occurrence_rate

	print(sorted(ngram_likelihood.items(), key=lambda x: -x[1]))

	print(ciphertext)