Skip to content

Instantly share code, notes, and snippets.

@imptype
Last active September 25, 2023 18:44
Show Gist options
  • Save imptype/4b372c40c8252e726819e51eef35e2ef to your computer and use it in GitHub Desktop.
Save imptype/4b372c40c8252e726819e51eef35e2ef to your computer and use it in GitHub Desktop.
Printable ascii characters sorted by frequency
etaonisrhldcumfpgyw,.bv01k52TS9AM-CIN"438B6RPEDHx7WLOFYGJzjUq:)($K;V*?Q/X&Z!%#@`'+<=>[\]^_{|}~
etaonisrhldcumfpgyw,.bv01k52TS9AM-CIN"438B6RPEDHx7WLOFYGJzjUq:)($K;V*?Q/X&Z!%#
# Case-sensitive letter and bigram frequency counts
# from large-scale English corpora
# MICHAEL N. JONES and D. J. K. MEWHORT
# Queen’s University, Kingston, Ontario, Canada
# https://link.springer.com/content/pdf/10.3758/BF03195586.pdf
# Letter Uppercase ƒ Lowercase ƒ Rank Rank
a = """A 280,937 5,263,779 3 3
B 169,474 866,156 8 20
C 229,363 1,960,412 5 12
D 129,632 2,369,820 12 11
E 138,443 7,741,842 11 1
F 100,751 1,296,925 17 15
G 93,212 1,206,747 19 17
H 123,632 2,955,858 13 9
I 223,312 4,527,332 6 6
J 78,706 65,856 20 25
K 46,580 460,788 22 22
L 106,984 2,553,152 15 10
M 259,474 1,467,376 4 14
N 205,409 4,535,545 7 5
O 105,700 4,729,266 16 4
P 144,239 1,255,579 10 16
Q 11,659 54,221 24 26
R 146,448 4,137,949 9 8
S 304,971 4,186,210 2 7
T 325,462 5,507,692 1 2
U 57,488 1,613,323 21 13
V 31,053 653,370 23 21
W 107,195 1,015,656 14 19
X 7,578 123,577 25 23
Y 94,297 1,062,040 18 18
Z 5,610 66,423 26 24"""
b = """! 2,178 58 1,866
“ 284,671 142,168 26,827
# 10 0 0
$ 51,572 427 61
% 1,993 13 9
& 6,523 438 350
‘ 204,497 187,914 185,857
( 53,398 43,473 55
) 53,735 11 37,506
* 20,716 882 530
 309 8 112
, 984,969 111 810,376
- 252,302 160,049 138,556
. 946,136 41,636 847,611
/ 8,161 3,948 4,207
0 546,233 2,006 38
1 460,946 959 5,792
2 333,499 1,065 2,435
3 187,606 1,335 1,945
4 192,528 880 1,820
5 374,413 999 1,514
6 153,865 1,576 1,491
7 120,094 840 1,074
8 182,627 828 1,021
9 282,364 1,697 481
: 54,036 13 48,354
; 36,727 58 28,301
 82 74 18
 22 1 1
 83 52 70
? 12,357 10 11,938"""
lines = a.split('\n')
data = {}
def tonumber(text):
return int(text.replace(',', ''))
for line in lines:
words = line.split(' ')
data[words[0]] = tonumber(words[1])
data[words[0].lower()] = tonumber(words[2])
lines = b.split('\n')
for line in lines:
words = line.split(' ')
data[words[0]] = tonumber(words[1])
data = ''.join(sorted(data, key = data.get, reverse = True))
print(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment