Skip to content

Instantly share code, notes, and snippets.

@cairdcoinheringaahing
Created March 14, 2021 00:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cairdcoinheringaahing/bec5a6a0298238bcc5a431e03967600d to your computer and use it in GitHub Desktop.
Save cairdcoinheringaahing/bec5a6a0298238bcc5a431e03967600d to your computer and use it in GitHub Desktop.
import csv
import collections
digraphs = collections.Counter()
trigraphs = collections.Counter()
quadgraphs = collections.Counter()
with open('QueryResults.csv', newline = '', encoding = 'utf-8') as f:
for row in csv.reader(f):
if row[0] == 'Post Link': continue
code = row[1]
if '<pre><code>' not in code: continue
# Extract the first bit of code
husk = code.partition('<pre><code>')[2].partition('</code></pre>')[0].strip()
husk = husk.replace('&quot;', '"')
husk = husk.replace('&gt;', '>').replace('&lt;', '<')
husk = husk.replace('&amp;', '&')
# Some special cases which didn't fit the standard format
if row[0] == 173590: husk = '▲mLġ≥'
if row[0] == 137949: continue
if row[0] == 182413: husk = '''Lø
¬ø
←İp
←İπ
□←tN
←İ5
D←İ3
←İ7
½LΘİ€
□←İπ
←İ⁰
D→D←İ⁰'''
if any(husk.count(c) >= 10 for c in husk):
continue
if len(husk) > 100: continue
for line in husk.split('\n'):
for (a,b) in zip(line, line[1:]): digraphs[a,b] += 1
for (a,b,c) in zip(line, line[1:], line[2:]): trigraphs[a,b,c] += 1
for (a,b,c,d) in zip(line, line[1:], line[2:], line[3:]): quadgraphs[a,b,c,d] += 1
with open('most-common.txt', 'w', encoding = 'utf-8') as f:
f.write('2-graphs:\n')
for d, n in digraphs.most_common(30):
f.write('%4d %s\n' % (n, ''.join(d)))
f.write('\n3-graphs:\n')
for d, n in trigraphs.most_common(30):
f.write('%4d %s\n' % (n, ''.join(d)))
f.write('\n4-graphs:\n')
for d, n in quadgraphs.most_common(30):
f.write('%4d %s\n' % (n, ''.join(d)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment