Skip to content

Instantly share code, notes, and snippets.

@lydell
Created August 23, 2015 08:54
Show Gist options
  • Star 28 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lydell/c439049abac2c9226e53 to your computer and use it in GitHub Desktop.
Save lydell/c439049abac2c9226e53 to your computer and use it in GitHub Desktop.
English bigram and letter pair frequencies from the Google Corpus Data in JSON format

English Letter Frequency Counts: Mayzner Revisited or ETAOIN SRHLDCU by Peter Norvig is an analysis of English letter frequencies using the Google Corpus Data. Among other things it contains the frequency of all bigrams.

This gist contains a program that extracts those bigram frequencies into a easily usable JSON format.

It also contains the result of running that program (bigrams.json), as well as a version of it where the order of the letters of a bigram is not taken into account (pairs.json). The two JSON files were generated from a copy of the above article retrieved 2015-08-23.

To regenerate the JSON files:

$ curl http://norvig.com/mayzner.html >article.html
$ npm install
$ node extract <article.html >bigrams.json
$ node bigrams-to-pairs <bigrams.json >pairs.json

All of the files are in the public domain.

// By Simon Lydell 2015.
// This file is in the public domain.
var stdin = require("get-stdin")
var tools = require("text-frequencies-analysis")
var helpers = require("text-frequencies-analysis/lib/helpers")
stdin(function(text) {
process.stdout.write(tools.jsonStringifyRow(convert(JSON.parse(text))))
})
function convert(bigrams) {
var pairMap = Object.create(null)
bigrams.forEach(function(tuple) {
var bigram = tuple[0]
var frequency = tuple[1]
var pair = bigram.split("").sort().join("")
if (pair in pairMap) {
pairMap[pair] += frequency
} else {
pairMap[pair] = frequency
}
})
return tools.sortTuples(helpers.objectToArray(pairMap))
}
[
["th",100272945963],
["he",86697336727],
["in",68595215308],
["er",57754162106],
["an",55974567611],
["re",52285662239],
["on",49570981965],
["at",41920838452],
["en",41004903554],
["nd",38129777631],
["ti",37856196209],
["es",37766388079],
["or",35994097756],
["te",33973261529],
["of",33130341561],
["ed",32937140633],
["is",31817918249],
["it",31672532308],
["al",30662410438],
["ar",30308513014],
["st",29704461829],
["to",29360205581],
["nt",29359771944],
["ng",26871805511],
["se",26282488562],
["ha",26103411208],
["as",24561944198],
["ou",24531132241],
["io",23542263265],
["le",23382173640],
["ve",23270129573],
["co",22384167777],
["me",22360109325],
["de",21565300071],
["hi",21520845924],
["ri",20516398905],
["ro",20491179118],
["ic",19701195496],
["ne",19504235770],
["ea",19403941063],
["ra",19332539912],
["ce",18367773425],
["li",17604626629],
["ch",16854985236],
["ll",16257360474],
["be",16249257887],
["ma",15938689768],
["si",15509759748],
["om",15402602484],
["ur",15303657594],
["ca",15174413181],
["el",14952716079],
["ta",14941000711],
["la",14874551789],
["ns",14350320288],
["di",13899990598],
["fo",13753006196],
["ho",13672603513],
["pe",13477683504],
["ec",13457763533],
["pr",13378480175],
["no",13099447521],
["ct",12997849406],
["us",12808517567],
["ac",12625666388],
["ot",12465822481],
["il",12167821320],
["tr",12006693396],
["ly",11983948242],
["nc",11722631112],
["et",11634161334],
["ut",11423899818],
["ss",11421755201],
["so",11214705934],
["rs",11180732354],
["un",11121118166],
["lo",10908830081],
["wa",10865206430],
["ge",10861045622],
["ie",10845731320],
["wh",10680697684],
["ee",10647199443],
["wi",10557401491],
["em",10536054813],
["ad",10375130449],
["ol",10305660447],
["rt",10198055461],
["po",10189505383],
["we",10176141608],
["na",9790855551],
["ul",9751225781],
["ni",9564648232],
["ts",9516029773],
["mo",9498813191],
["ow",9318366591],
["pa",9123652775],
["im",8959759181],
["mi",8957825538],
["ai",8922759715],
["sh",8888705287],
["ir",8886799024],
["su",8774129154],
["id",8332214014],
["os",8176085241],
["iv",8116349309],
["ia",8072199471],
["am",8032259916],
["fi",8024355222],
["ci",7936922442],
["vi",7600241898],
["pl",7415349106],
["ig",7189051323],
["tu",7187510085],
["ev",7184041787],
["ld",7122648226],
["ry",6985436186],
["mp",6743935008],
["fe",6670566518],
["bl",6581097936],
["ab",6479202253],
["gh",6414827751],
["ty",6408447994],
["op",6313536754],
["wo",6252724050],
["sa",6147356936],
["ay",6128842727],
["ex",6035335807],
["ke",6027536039],
["fr",6011200185],
["oo",5928601045],
["av",5778409728],
["ag",5772552144],
["if",5731148470],
["ap",5719570727],
["gr",5548472398],
["od",5511014957],
["bo",5509918152],
["sp",5392724233],
["rd",5338083783],
["do",5307591560],
["uc",5291161134],
["bu",5214802738],
["ei",5169898489],
["ov",5021440160],
["by",4975814759],
["rm",4938158020],
["ep",4837800987],
["tt",4812693687],
["oc",4692062395],
["fa",4624241031],
["ef",4588497002],
["cu",4585165906],
["rn",4521640992],
["sc",4363410770],
["gi",4275639800],
["da",4259590348],
["yo",4226720021],
["cr",4214150542],
["cl",4201617719],
["du",4186093215],
["ga",4175274057],
["qu",4160167957],
["ue",4158448570],
["ff",4125634219],
["ba",4122472992],
["ey",4053144855],
["ls",3990203351],
["va",3946966167],
["um",3901923211],
["pp",3850125519],
["ua",3844138094],
["up",3835093459],
["lu",3811884104],
["go",3725558729],
["ht",3670802795],
["ru",3618438291],
["ug",3606562400],
["ds",3560125353],
["lt",3486149365],
["pi",3470838749],
["rc",3422694015],
["rr",3404547067],
["eg",3370515965],
["au",3356322923],
["ck",3316660134],
["ew",3293529190],
["mu",3231856188],
["br",3145611704],
["bi",3005679357],
["pt",2982699529],
["ak",2952167845],
["pu",2947681332],
["ui",2852182384],
["rg",2813274913],
["ib",2780268452],
["tl",2775935006],
["ny",2760941827],
["ki",2759841743],
["rk",2736041446],
["ys",2730343336],
["ob",2725791138],
["mm",2708822249],
["fu",2706168901],
["ph",2661480326],
["og",2651165734],
["ms",2617582287],
["ye",2612941418],
["ud",2577213760],
["mb",2544901434],
["ip",2515455253],
["ub",2497666762],
["oi",2474275212],
["rl",2432373251],
["gu",2418410978],
["dr",2409399231],
["hr",2379584978],
["cc",2344219345],
["tw",2322619238],
["ft",2302659749],
["wn",2227183930],
["nu",2217508482],
["af",2092395523],
["hu",2077887429],
["nn",2051719074],
["eo",2044268477],
["vo",2004982879],
["rv",1953555667],
["nf",1894270041],
["xp",1885334638],
["gn",1850801359],
["sm",1838392669],
["fl",1830098844],
["iz",1814164135],
["ok",1813376076],
["nl",1798491132],
["my",1753447198],
["gl",1709752272],
["aw",1689436638],
["ju",1655210582],
["oa",1620913259],
["eq",1614312175],
["sy",1602829285],
["sl",1575646777],
["ps",1538723474],
["jo",1516687319],
["lf",1507867867],
["nv",1466426243],
["je",1463052212],
["nk",1455100124],
["kn",1450401608],
["gs",1443474876],
["dy",1421751251],
["hy",1412343465],
["ze",1402290616],
["ks",1339590722],
["xt",1315669490],
["bs",1292319275],
["ik",1209994695],
["dd",1205446875],
["cy",1176324279],
["rp",1173542093],
["sk",1112771273],
["xi",1111463633],
["oe",1089254517],
["oy",1020190223],
["ws",989253674],
["lv",984229060],
["dl",911886482],
["rf",909634941],
["eu",878402090],
["dg",874188188],
["wr",867361010],
["xa",834649781],
["yi",812619095],
["nm",782441941],
["eb",763383542],
["rb",753194669],
["tm",746621025],
["xc",746076293],
["eh",742240059],
["tc",736955048],
["gy",731420025],
["ja",729206855],
["hn",726288117],
["yp",702499946],
["za",702199296],
["gg",697999944],
["ym",667551857],
["sw",663415953],
["bj",654853039],
["lm",649112313],
["cs",643530723],
["ii",642384029],
["ix",621227893],
["xe",614533122],
["oh",602121281],
["lk",555883002],
["dv",537221821],
["lp",536595562],
["ax",531206960],
["ox",523764012],
["uf",522547858],
["dm",512522701],
["iu",490874936],
["sf",483979931],
["bt",482272940],
["ka",478095427],
["yt",470429861],
["ek",464449289],
["pm",449910017],
["ya",444542870],
["gt",434302509],
["wl",429185823],
["rh",426095630],
["yl",416082307],
["hs",414044112],
["ah",384694919],
["yc",380670476],
["yn",372595315],
["rw",359714599],
["hm",359316447],
["lw",356374125],
["hl",355049620],
["ae",349540062],
["zi",341671190],
["az",334669428],
["lc",333338045],
["py",331698156],
["aj",331384552],
["iq",318727904],
["nj",312598990],
["bb",308276690],
["nh",306883963],
["uo",300484143],
["kl",298033281],
["lr",283411884],
["tn",282266629],
["gm",277966576],
["sn",258702825],
["nr",258048421],
["fy",256535008],
["mn",247850339],
["dw",230152384],
["sb",223212317],
["yr",219696469],
["dn",213431654],
["sq",209894196],
["zo",202480511],
["oj",196696657],
["yd",192245315],
["lb",188643782],
["wt",184446342],
["lg",171657388],
["ko",171324962],
["np",170186564],
["sr",168896339],
["nq",167770304],
["ky",167761726],
["ln",165509578],
["nw",163456000],
["tf",159626603],
["fs",155349948],
["cq",154363546],
["dh",153344431],
["sd",148275222],
["vy",138085211],
["dj",134832736],
["hw",134615178],
["xu",134528161],
["ao",130442323],
["ml",129888836],
["uk",129819900],
["uy",128782521],
["ej",128194584],
["ez",127540198],
["hb",123778334],
["nz",123192934],
["nb",122258836],
["mc",121591374],
["yb",121220723],
["tp",121089391],
["xh",117618666],
["ux",110947766],
["tz",108527540],
["bv",108385069],
["mf",107664447],
["wd",99767462],
["oz",97904996],
["yw",95070267],
["kh",89811517],
["gd",89087728],
["bm",88228719],
["mr",87580303],
["ku",85313841],
["uv",82252351],
["dt",81648332],
["hd",80544316],
["aa",79794787],
["xx",79068246],
["df",78347492],
["db",78190243],
["ji",77899882],
["kr",76743394],
["xo",76097183],
["cm",75144874],
["zz",75012595],
["nx",73899576],
["yg",73102462],
["xy",72645837],
["kg",72267691],
["tb",71746167],
["dc",71030077],
["bd",69761165],
["sg",69588685],
["wy",68368953],
["zy",66473188],
["aq",63283982],
["hf",63249924],
["cd",62905910],
["vu",62384927],
["kw",61416538],
["zu",60692846],
["bn",59062122],
["ih",58966344],
["tg",55522877],
["xv",55076715],
["uz",53873803],
["bc",53278096],
["xf",52374239],
["yz",51097953],
["km",50449220],
["dp",48855638],
["lh",45643026],
["wf",45330551],
["kf",44759608],
["pf",41022263],
["cf",39704311],
["mt",38538709],
["yu",37436235],
["cp",37067423],
["pb",36901495],
["td",36539510],
["zl",35456851],
["sv",35005005],
["hc",34631551],
["mg",34537023],
["pw",34037460],
["gf",33962536],
["pd",33798376],
["pn",33536129],
["pc",33156666],
["rx",32990613],
["tv",32805751],
["ij",31324465],
["wm",30732232],
["uh",30097154],
["wk",30095733],
["wb",29929113],
["bh",29797934],
["oq",29227658],
["kt",29132180],
["rq",28152573],
["kb",25406204],
["cg",24975673],
["vr",24701238],
["cn",24249641],
["pk",23099462],
["uu",22006895],
["yf",21246637],
["wp",20982546],
["cz",20601701],
["kp",20492678],
["dq",19927900],
["wu",19601657],
["fm",19340776],
["wc",19008254],
["md",18929019],
["kd",18894758],
["zh",18782710],
["gw",18260884],
["rz",17993128],
["cb",17751935],
["iw",17611969],
["xl",16728256],
["hp",16696129],
["mw",16465357],
["vs",16263248],
["fc",16254390],
["rj",15598009],
["bp",15427250],
["mh",15033898],
["hh",14730425],
["yh",14682887],
["uj",14548024],
["fg",14424524],
["fd",13966832],
["gb",13944852],
["pg",13354952],
["tk",13081991],
["kk",12782664],
["hq",11925353],
["fn",11823066],
["lz",11767790],
["vl",11621019],
["gp",11612944],
["hz",10729982],
["dk",9494027],
["yk",9292584],
["qi",8954617],
["lx",8612462],
["vd",8430332],
["zs",8395904],
["bw",8319869],
["xq",8225536],
["mv",8172535],
["uw",7824504],
["hg",7789748],
["fb",7730842],
["sj",7621847],
["ww",7377619],
["gk",7338894],
["uq",7235727],
["bg",7203255],
["sz",7041052],
["jr",6846578],
["ql",6708919],
["zt",6627349],
["hk",6595610],
["vc",6570845],
["xm",6569222],
["gc",6455066],
["fw",6451511],
["pz",6382200],
["kc",6326022],
["hv",6292998],
["xw",6292525],
["zw",6279286],
["fp",6262895],
["iy",6247588],
["pv",6222096],
["vt",6181932],
["jp",6129447],
["cv",5869407],
["zb",5858211],
["vp",5510046],
["zr",5320518],
["fh",5166165],
["yv",5115763],
["zg",4726653],
["zm",4713608],
["zv",4618705],
["qs",4448602],
["kv",4414960],
["vn",4317772],
["zn",4300522],
["qa",4298294],
["yx",4211192],
["jn",4150888],
["bf",4108696],
["mk",3956883],
["cw",3909223],
["jm",3659540],
["lq",3582930],
["jh",3541869],
["kj",3471162],
["jc",3447571],
["gz",3431194],
["js",3329038],
["tx",3328898],
["fk",3293208],
["jl",3192327],
["vm",3178223],
["lj",3169833],
["tj",3169658],
["jj",2979950],
["cj",2962048],
["vg",2960268],
["mj",2923325],
["jt",2917850],
["pj",2810773],
["wg",2751783],
["vh",2691078],
["bk",2639491],
["vv",2622571],
["jd",2615147],
["tq",2517400],
["vb",2496014],
["jf",2421784],
["dz",2200704],
["xb",2164724],
["jb",2126115],
["zc",2100797],
["fj",2065436],
["yy",1993017],
["qn",1859810],
["xs",1804740],
["qr",1744325],
["jk",1740133],
["jv",1719726],
["qq",1712219],
["xn",1613611],
["vf",1550317],
["px",1473468],
["zd",1415016],
["qt",1385505],
["zp",1361846],
["qo",1324363],
["dx",1296277],
["hj",1282370],
["gv",1192366],
["jw",1165914],
["qc",1127399],
["jy",1120221],
["gj",1093028],
["qb",1083443],
["pq",1075049],
["jg",1034900],
["bz",1007374],
["mx",994334],
["qm",989868],
["mz",970282],
["qf",947879],
["wj",914179],
["zq",913036],
["xr",907409],
["zk",906537],
["cx",876736],
["fx",812116],
["fv",807297],
["bx",778622],
["vw",742188],
["vj",724370],
["mq",708424],
["qv",635514],
["zf",608389],
["qe",572443],
["yj",561334],
["gx",557030],
["kx",555422],
["xg",542548],
["qd",519921],
["xj",511074],
["sx",503772],
["vz",501189],
["vx",449854],
["wv",389123],
["yq",384882],
["bq",346506],
["gq",345624],
["vk",337545],
["zj",309029],
["xk",281255],
["qp",272180],
["hx",263997],
["fz",263860],
["qh",263509],
["qj",236928],
["jz",220675],
["vq",212856],
["kq",187242],
["xd",168263],
["qw",163292],
["jx",161750],
["qx",159505],
["kz",150760],
["wx",150637],
["fq",149430],
["xz",136521],
["zx",108224],
["jq",0],
["qg",0],
["qk",0],
["qy",0],
["qz",0],
["wq",0],
["wz",0]
]
// By Simon Lydell 2015.
// This file is in the public domain.
var cheerio = require("cheerio")
var stdin = require("get-stdin")
var tools = require("text-frequencies-analysis")
stdin(function(text) {
process.stdout.write(tools.jsonStringifyRow(extract(text)))
})
function extract(text) {
var $ = cheerio.load(text)
var bigrams = []
$('table').first().find('td').each(function(index, element) {
var $cell = $(element)
bigrams.push([$cell.text().trim().toLowerCase(), parse($cell.attr('title'))])
})
return tools.sortTuples(bigrams)
}
function parse(title) {
return Number(title.split(/\s+/)[2].replace(/,/g, ''))
}
{
"private": true,
"dependencies": {
"cheerio": "^0.19.0",
"get-stdin": "^4.0.1",
"text-frequencies-analysis": "2.0.0"
}
}
[
["er",110039824345],
["ht",103943748758],
["eh",87439576786],
["in",78159863540],
["it",69528728517],
["an",65765423162],
["es",64048876641],
["no",62670429486],
["en",60509139324],
["at",56861839163],
["or",56485276874],
["de",54502440704],
["ar",49641052926],
["is",47327677997],
["fo",46883347757],
["et",45607422863],
["al",45536962227],
["ot",41826028062],
["st",39220491602],
["dn",38343209285],
["el",38334889719],
["em",32896164138],
["ce",31825536958],
["as",30709301134],
["ev",30454171360],
["il",29772447949],
["nt",29642038573],
["ir",29403197929],
["gn",28722606870],
["ac",27800079569],
["ci",27638117938],
["co",27076230172],
["ah",26488106127],
["io",26016538477],
["mo",24901415675],
["ou",24831616384],
["am",23970949684],
["di",22232204612],
["rt",22204748857],
["su",21582646721],
["hi",21579812268],
["lo",21214490528],
["ae",19753481125],
["os",19390791175],
["ru",18922095885],
["tu",18611409903],
["ep",18315484491],
["im",17917584719],
["be",17012641429],
["ai",16994959186],
["ch",16889616787],
["op",16503042137],
["ll",16257360474],
["ei",16015629809],
["iv",15716591207],
["ow",15571090641],
["ap",14843223502],
["ad",14634720797],
["ns",14609023113],
["pr",14552022268],
["ho",14274724794],
["eg",14231561587],
["fi",13755503692],
["ct",13734804454],
["lu",13563109885],
["ew",13469670798],
["nu",13338626648],
["aw",12554643068],
["ly",12400030549],
["cn",11746880753],
["gi",11464691123],
["ss",11421755201],
["rs",11349628693],
["ef",11259063520],
["do",10818606517],
["hw",10815312862],
["ee",10647199443],
["ab",10601675245],
["iw",10575013460],
["ag",9947826201],
["cu",9876327040],
["av",9725375895],
["hs",9302749399],
["gr",8361747311],
["bo",8235709290],
["dl",8034534708],
["lp",7951944668],
["dr",7747483014],
["bu",7712469500],
["cr",7636844557],
["ry",7205132655],
["au",7200461017],
["mp",7193845025],
["mu",7133779399],
["ov",7026423039],
["ps",6931447707],
["fr",6920835126],
["ty",6878877855],
["pu",6782774791],
["bl",6769741718],
["du",6763306975],
["af",6716636554],
["ey",6666086273],
["ex",6649868929],
["ay",6573385597],
["ek",6491985328],
["gh",6422617499],
["go",6376724463],
["lt",6262084371],
["gu",6024973378],
["ip",5986294002],
["oo",5928601045],
["bi",5785947809],
["ls",5565850128],
["oy",5246910244],
["by",5097035482],
["eu",5036850660],
["mr",5025738323],
["cs",5006941493],
["tt",4812693687],
["nr",4779689413],
["cl",4534955764],
["ms",4455974956],
["sy",4333172621],
["qu",4167403684],
["ff",4125634219],
["ik",3969836438],
["br",3898806373],
["pp",3850125519],
["ds",3708400575],
["ak",3430263272],
["rr",3404547067],
["iu",3343057320],
["fl",3337966711],
["ck",3322986156],
["fu",3228716759],
["ny",3133537142],
["eo",3133522994],
["pt",3103788920],
["kn",2905501732],
["kr",2812784840],
["hr",2805680608],
["lr",2715785135],
["mm",2708822249],
["hp",2678176455],
["bm",2633130153],
["tw",2507065580],
["ft",2462286352],
["ks",2452361995],
["my",2420999055],
["nw",2390639930],
["cc",2344219345],
["iz",2155835325],
["hu",2107984583],
["nn",2051719074],
["ko",1984701038],
["rv",1978256905],
["ln",1964000710],
["fn",1906093107],
["px",1886808106],
["gl",1881409660],
["ao",1751355582],
["ix",1732691526],
["jo",1713383976],
["ju",1669758606],
["sw",1652669627],
["eq",1614884618],
["dy",1613996566],
["ej",1591246796],
["cy",1556994755],
["ez",1529830814],
["bs",1515531592],
["gs",1513063561],
["nv",1470744015],
["hy",1427026352],
["ax",1365856741],
["tx",1318998388],
["rw",1227075609],
["dd",1205446875],
["aj",1060591407],
["az",1036868724],
["py",1034198102],
["hn",1033172080],
["mn",1030292280],
["lv",995850079],
["dg",963275916],
["kl",853916283],
["iy",818866683],
["gy",804522487],
["lw",785559948],
["mt",785159734],
["lm",779001149],
["cx",746953029],
["gg",697999944],
["bj",656979154],
["ii",642384029],
["fs",639329879],
["ox",599861195],
["bt",554019107],
["dv",545652153],
["dm",531451720],
["gt",489825386],
["hl",400692646],
["hm",374350345],
["dw",329919846],
["iq",327682521],
["jn",316749878],
["gm",312503599],
["bb",308276690],
["oz",300385507],
["fy",277781645],
["ux",245475927],
["dh",233888747],
["ku",215133741],
["qs",214342798],
["np",203722693],
["cm",196736248],
["bn",181320958],
["ky",177054310],
["nq",169630114],
["uy",166218756],
["wy",163439220],
["cq",155490945],
["bh",153576268],
["bd",147951408],
["uv",144637278],
["vy",143200974],
["dj",137447883],
["cd",133935987],
["nz",127493456],
["fm",127005223],
["dt",118187842],
["hx",117882663],
["yz",117571141],
["tz",115154889],
["uz",114566649],
["bv",110881083],
["ij",109224347],
["hk",96407127],
["df",92314324],
["kw",91512271],
["dp",82654014],
["aa",79794787],
["gk",79606585],
["xx",79068246],
["xy",76857029],
["nx",75513187],
["zz",75012595],
["bc",71030031],
["cp",70224089],
["fh",68416089],
["aq",67582276],
["cf",55958701],
["vx",55526569],
["pw",55020006],
["km",54406103],
["fx",53186355],
["bp",52328745],
["fw",51782062],
["sv",51268253],
["fg",48387060],
["fk",48052816],
["fp",47285158],
["lz",47224641],
["mw",47197589],
["kp",43592140],
["kt",42214171],
["tv",38987683],
["bw",38248982],
["rx",33898022],
["cg",31430739],
["oq",30552021],
["qr",29896898],
["hz",29512692],
["dk",28388785],
["bk",28045695],
["uw",27426161],
["lx",25340718],
["gp",24967896],
["rz",23313646],
["cw",22917477],
["cz",22702498],
["jr",22444587],
["uu",22006895],
["bg",21148107],
["gw",21012667],
["dq",20447821],
["sz",15436956],
["hh",14730425],
["kk",12782664],
["cv",12440252],
["hq",12188862],
["bf",11839538],
["pv",11732142],
["mv",11350758],
["js",10950885],
["lq",10291849],
["hv",8984076],
["jp",8940220],
["qx",8385041],
["gz",8157847],
["pz",7744046],
["mx",7563556],
["ww",7377619],
["bz",6865585],
["jm",6582865],
["wx",6443162],
["cj",6409619],
["jl",6362160],
["wz",6279286],
["jt",6087508],
["mz",5683890],
["jk",5211295],
["vz",5119894],
["hj",4824239],
["kv",4752505],
["fj",4487220],
["gv",4152634],
["qt",3902905],
["dz",3615720],
["jj",2979950],
["bx",2943346],
["vv",2622571],
["jv",2444096],
["fv",2357614],
["sx",2308512],
["gj",2127928],
["jw",2080093],
["yy",1993017],
["qq",1712219],
["mq",1698292],
["jy",1681555],
["dx",1464540],
["bq",1429949],
["pq",1347229],
["vw",1131311],
["gx",1099578],
["fq",1097309],
["kz",1057297],
["qz",913036],
["fz",872249],
["qv",848370],
["kx",836677],
["jx",672824],
["jz",529704],
["qy",384882],
["gq",345624],
["xz",244745],
["jq",236928],
["kq",187242],
["qw",163292]
]
@jmorris-writer
Copy link

Does anyone know where to find the frequencies of the bigrams involving non-alphanumeric characters?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment