Skip to content

Instantly share code, notes, and snippets.

@sebastian-nagel
Created October 21, 2019 13:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sebastian-nagel/7bca5d0ace6de2805e89cb2701420d4d to your computer and use it in GitHub Desktop.
Save sebastian-nagel/7bca5d0ace6de2805e89cb2701420d4d to your computer and use it in GitHub Desktop.
character set and content language correlations
-- correlation metrics between character set and content language
-- * calculate probabilities for character sets given the content language
-- and vice versa
-- * calculate log-likelihood ratio (cf. http://aclweb.org/anthology/J93-1003)
-- get the total number of pages/captures
-- (usually between 2.5 and 3 billion per monthly crawl)
CREATE OR REPLACE VIEW tmp_total_pages AS
SELECT COUNT(*) as total_pages,
'xxxjoin' as join_column
FROM "ccindex"."ccindex"
WHERE crawl = 'CC-MAIN-2019-39'
AND subset = 'warc';
-- charset counts
CREATE OR REPLACE VIEW charset_counts AS
SELECT COUNT(*) as n_pages_charset,
content_charset,
'xxxjoin' as join_column
FROM "ccindex"."ccindex"
WHERE crawl = 'CC-MAIN-2019-39'
AND subset = 'warc'
GROUP BY content_charset
HAVING COUNT(*) >= 1000;
-- add total pages to charset counts
CREATE OR REPLACE VIEW charset_counts_total AS
SELECT content_charset,
total_pages,
n_pages_charset
FROM tmp_total_pages
FULL OUTER JOIN charset_counts
ON tmp_total_pages.join_column = charset_counts.join_column;
-- count content languages
CREATE OR REPLACE VIEW language_counts AS
SELECT COUNT(*) as n_pages_languages,
content_languages
FROM "ccindex"."ccindex"
WHERE crawl = 'CC-MAIN-2019-39'
AND subset = 'warc'
-- skip pages with more than one language:
AND NOT content_languages LIKE '%,%'
GROUP BY content_languages
HAVING COUNT(*) >= 1000;
-- combinations of content language and charset
CREATE OR REPLACE VIEW language_charset_counts AS
SELECT COUNT(*) as n_pages,
content_languages,
content_charset
FROM "ccindex"."ccindex"
WHERE crawl = 'CC-MAIN-2019-39'
AND subset = 'warc'
-- skip pages with more than one language:
AND NOT content_languages LIKE '%,%'
GROUP BY content_languages,
content_charset
HAVING COUNT(*) >= 50;
-- join tables and calculate log-likelihood
CREATE OR REPLACE VIEW language_charset_loglikelihood AS
SELECT language_charset_counts.content_charset AS charset,
language_charset_counts.content_languages AS language,
total_pages,
n_pages_charset,
n_pages_languages,
n_pages,
(2*(ln((n_pages)/(n_pages_charset*n_pages_languages/CAST(total_pages AS DOUBLE)))
+if((n_pages_charset=n_pages),0,
ln((n_pages_charset-n_pages)/(n_pages_charset*(total_pages-n_pages_languages)/CAST(total_pages AS DOUBLE))))
+if((n_pages_languages=n_pages),0,
ln((n_pages_languages-n_pages)/(n_pages_languages*(total_pages-n_pages_charset)/CAST(total_pages AS DOUBLE))))
+if(((total_pages+n_pages)=(n_pages_languages+n_pages_charset)),0,
ln((total_pages-n_pages_languages-n_pages_charset+n_pages)
/((total_pages-n_pages_charset)*(total_pages-n_pages_languages)/CAST(total_pages AS DOUBLE))))))
AS loglikelihood
FROM language_charset_counts
INNER JOIN charset_counts_total
ON language_charset_counts.content_charset = charset_counts_total.content_charset
INNER JOIN language_counts
ON language_charset_counts.content_languages = language_counts.content_languages;
-- get charset language pairs with positive loglikelihood
-- (more pages in a specific language per charset than expected by for a random distribution)
SELECT language,
charset,
n_pages_languages,
n_pages_charset,
n_pages,
loglikelihood,
(n_pages/CAST(n_pages_languages AS DOUBLE)) AS probability_charset_given_language,
(n_pages/CAST(n_pages_charset AS DOUBLE)) AS probability_language_given_charset
FROM language_charset_loglikelihood
ORDER BY language, probability_charset_given_language DESC;
language charset n_pages_languages n_pages_charset n_pages loglikelihood probability_charset_given_language probability_language_given_charset
aar UTF-8 2900 2279847342 2014 1.5500624650553319 0.6944827586206896 8.833924811093777E-7
aar ISO-8859-1 2900 92311849 797 3.4935014736095025 0.27482758620689657 8.633777880453895E-6
aar windows-1252 2900 13338428 54 2.5209382440122923 0.018620689655172412 4.04845308607581E-6
abk UTF-8 3927 2279847342 3774 -1.9191251693328493 0.961038961038961 1.6553739938961228E-6
abk x-MacCyrillic 3927 111324 113 12.930154066221736 0.028775146422205244 0.0010150551543243146
afr UTF-8 157642 2279847342 147215 -0.9179382253463142 0.9338564595729565 6.45723059118754E-5
afr ISO-8859-1 157642 92311849 8916 0.8579624676192301 0.05655853135585694 9.65856506676624E-5
afr windows-1252 157642 13338428 1464 1.1484355846604486 0.00928686517552429 1.0975806144472197E-4
aka UTF-8 1055 2279847342 1024 -2.463525614669245 0.9706161137440759 4.4915288016683355E-7
amh UTF-8 13494 2279847342 13470 -8.016704377042041 0.9982214317474433 5.9082903279758285E-6
ara UTF-8 8052521 2279847342 6934018 0.40219430681074453 0.8610990272487337 0.0030414396053014326
ara windows-1256 8052521 3109658 1109453 8.293675059686093 0.13777710110908123 0.35677653298208356
ara windows-1252 8052521 13338428 3543 -4.927176259973587 4.399864340620782E-4 2.656235052586407E-4
ara ISO-8859-1 8052521 92311849 3357 -8.840796791340885 4.1688807765915794E-4 3.636586241491057E-5
ara ISO-8859-15 8052521 4433324 919 -5.429252959115507 1.1412575018432115E-4 2.07293669490432E-4
ara US-ASCII 8052521 1092812 772 -2.9806308062437594 9.587059754330352E-5 7.064344095782258E-4
ara UTF-16LE 8052521 17460 170 2.2473474261166886 2.111140101342176E-5 0.009736540664375716
ara ISO-8859-9 8052521 1249981 98 -7.375867422484723 1.2170101760678426E-5 7.840119169811381E-5
ara x-MacCyrillic 8052521 111324 87 -2.7793993402654826 1.0804069930398195E-5 7.815026409399591E-4
asm UTF-8 3256 2279847342 3256 0.23263000277457188 1.0 1.4281657986554785E-6
aym UTF-8 1145 2279847342 1107 -2.2279089123101987 0.9668122270742358 4.855588265084812E-7
aze UTF-8 1268198 2279847342 1261011 -5.705959698811539 0.994332903852553 5.531120337617763E-4
aze windows-1251 1268198 28402934 3676 -2.6664613193778197 0.0028986010070982606 1.2942324902068215E-4
aze EUC-JP 1268198 6657154 1055 -2.274438922243389 8.318890267923463E-4 1.58476129589311E-4
aze ISO-8859-9 1268198 1249981 966 0.8891082049540832 7.61710710788063E-4 7.728117467385504E-4
aze windows-1254 1268198 903946 609 0.6151362923735649 4.802089263663876E-4 6.737128102784901E-4
aze ISO-8859-1 1268198 92311849 567 -8.706018872378271 4.470910693756022E-4 6.142223410561303E-6
aze Big5 1268198 1662412 111 -4.005358426530633 8.752576490421843E-5 6.677045160886711E-5
aze windows-1252 1268198 13338428 110 -8.178958378834858 8.673724449967592E-5 8.246848879043317E-6
aze ISO-8859-15 1268198 4433324 86 -6.475184861665517 6.781275479065571E-5 1.9398537079626933E-5
bak UTF-8 15558 2279847342 14634 -1.1189462309346616 0.9406093328191284 6.418850828477971E-6
bak windows-1251 15558 28402934 923 3.253902493748007 0.05932639156703946 3.249664277641176E-5
bel UTF-8 287416 2279847342 265651 -0.6679482985562955 0.9242735268739388 1.1652139821210888E-4
bel windows-1251 287416 28402934 21608 3.692344942394969 0.07518022657054584 7.607664757450762E-4
bel x-MacCyrillic 287416 111324 146 4.913488123914868 5.079745038550394E-4 0.0013114871905429198
ben UTF-8 968921 2279847342 967927 -9.115057875459184 0.998974116568843 4.245578123449636E-4
ben ISO-8859-1 968921 92311849 977 -7.080782720566104 0.0010083381410868379 1.058369007428288E-5
bih UTF-8 1430 2279847342 1428 -8.496623259762437 0.9986013986013986 6.263577274201546E-7
bis UTF-8 2811 2279847342 2688 -1.6969669922301986 0.9562433297758804 1.179026310437938E-6
bis ISO-8859-1 2811 92311849 89 -0.25023465030778885 0.03166133048737104 9.641232513932204E-7
blu UTF-8 8150 2279847342 8123 -6.775764015844917 0.9966871165644172 3.562957857026552E-6
bod UTF-8 29784 2279847342 29779 -12.734152806288229 0.9998321246306742 1.3061839471179821E-5
bos UTF-8 626456 2279847342 599562 -1.733107701609273 0.9570696106350646 2.6298339759627643E-4
bos ISO-8859-1 626456 92311849 14944 -0.80023568126596 0.023854827793173026 1.6188604346989086E-4
bos windows-1250 626456 4195074 6714 3.7357844950019317 0.010717432668854636 0.0016004485260569896
bos ISO-8859-2 626456 5465900 1821 0.6161863697334087 0.0029068282529020393 3.331564792623356E-4
bos windows-1256 626456 3109658 1119 0.7706675242473169 0.0017862387781424395 3.598466455153589E-4
bos ISO-8859-9 626456 1249981 938 2.2388987555062814 0.0014973118622856195 7.50411406253375E-4
bos EUC-KR 626456 3740983 672 -0.6165856760296528 0.0010727010356673095 1.7963193096573815E-4
bos windows-1252 626456 13338428 534 -3.610673995283159 8.524142158427727E-4 4.0034702740083013E-5
bos EUC-JP 626456 6657154 74 -6.177131207372727 1.1812481642765015E-4 1.1115861222378212E-5
bre UTF-8 43844 2279847342 43122 -3.5951796479975004 0.9835325244047076 1.8914424314994333E-5
bre ISO-8859-1 43844 92311849 593 -1.9141817063790585 0.013525225800565642 6.423877394114379E-6
bre windows-1252 43844 13338428 109 -1.4735506563016234 0.002486087035854393 8.171877525597468E-6
bul UTF-8 3133272 2279847342 2835426 -0.2552559388789563 0.9049409052262299 0.001243691166406176
bul windows-1251 3133272 28402934 297260 4.097479019845296 0.09487206983626062 0.010465820186041343
bul ISO-8859-1 3133272 92311849 234 -12.282901773190595 7.468231293038076E-5 2.5348858519776807E-6
bul windows-1252 3133272 13338428 81 -10.598406260078638 2.5851569860516418E-5 6.072679629113716E-6
bul KOI8-R 3133272 231624 73 -2.710670712337549 2.3298328392811092E-5 3.1516595862259527E-4
cat UTF-8 4550443 2279847342 4092704 -0.15439704777458196 0.8994078158983642 0.0017951658098342973
cat ISO-8859-1 4550443 92311849 371903 1.535862905277125 0.08172896572927076 0.0040287677478976726
cat windows-1252 4550443 13338428 51301 1.5281568355064221 0.011273847403428633 0.0038461054031254656
cat ISO-8859-15 4550443 4433324 28244 2.535661284773185 0.006206868210413799 0.00637084047996492
cat US-ASCII 4550443 1092812 5252 1.9825732395865774 0.0011541733409252681 0.004805950154280883
cat GB18030 4550443 1946824 445 -4.096942624125448 9.779267644930395E-5 2.285774163458022E-4
cat windows-1250 4550443 4195074 303 -6.3989191393543745 6.658692351491931E-5 7.222756976396603E-5
cat EUC-JP 4550443 6657154 151 -8.713298115522313 3.3183582345718865E-5 2.268236546728527E-5
ceb UTF-8 6722 2279847342 6612 -3.6075764477828343 0.9836358226718238 2.9001941832647494E-6
ceb ISO-8859-1 6722 92311849 80 -2.1667403047846205 0.011901219875037191 8.666276417017711E-7
ces UTF-8 22260512 2279847342 20386700 -0.4712184230641797 0.9158234994774603 0.008942133810641783
ces windows-1250 22260512 4195074 1629389 6.4859117665629 0.07319638470130427 0.38840530584204236
ces ISO-8859-2 22260512 5465900 218287 2.970427725584529 0.009806018837302574 0.039936149581953566
ces ISO-8859-1 22260512 92311849 14097 -7.994541859463638 6.332738438361166E-4 1.5271062331337335E-4
ces windows-1252 22260512 13338428 11324 -4.627129442316158 5.087034835497045E-4 8.489756064207866E-4
ces ISO-8859-15 22260512 4433324 247 -10.079056059443573 1.1095881352594227E-5 5.571440300776573E-5
ces windows-1251 22260512 28402934 244 -13.799414245608581 1.0961113562886604E-5 8.590661795714484E-6
ces UTF-16LE 22260512 17460 145 -0.09035991024694696 6.513776502535072E-6 0.008304696449026346
cos UTF-8 22538 2279847342 19979 0.05852495872768634 0.8864584257698109 8.763306047708171E-6
cos ISO-8859-1 22538 92311849 2195 1.8564281780910465 0.09739107285473422 2.3778095919192346E-5
cos windows-1252 22538 13338428 186 0.9144935244667127 0.008252728724820304 1.394467174092779E-5
cos ISO-8859-15 22538 4433324 150 2.683470272029574 0.006655426390984116 3.383465769702372E-5
crs UTF-8 2020 2279847342 1784 0.10821900607130577 0.8831683168316832 7.825085334156553E-7
crs ISO-8859-1 2020 92311849 214 2.005707662282567 0.10594059405940594 2.318228941552238E-6
cym UTF-8 153862 2279847342 151718 -3.923998842385082 0.9860654352601682 6.654743815737467E-5
cym ISO-8859-1 153862 92311849 1949 -2.0434702810119725 0.012667195278886274 2.11132159209594E-5
cym windows-1252 153862 13338428 95 -4.255498404042093 6.174364040503828E-4 7.122278577355592E-6
cym US-ASCII 153862 1092812 63 -0.0824670196643762 4.0945782584393805E-4 5.7649440159881115E-5
dan UTF-8 6472236 2279847342 5814338 -0.13577356054902776 0.8983507399915578 0.0025503190028940102
dan ISO-8859-1 6472236 92311849 490415 1.3964032051180857 0.07577211337781874 0.005312589936314676
dan windows-1252 6472236 13338428 150850 2.942757421515001 0.023307246521912982 0.011309428667306222
dan ISO-8859-15 6472236 4433324 5154 -1.5483502694426736 7.96324485077491E-4 0.0011625588384697352
dan EUC-JP 6472236 6657154 4467 -2.6446153443552727 6.901787882889314E-4 6.710074605454523E-4
dan windows-1251 6472236 28402934 2760 -6.4904589086425055 4.264368604605889E-4 9.717305965644253E-5
dan windows-1250 6472236 4195074 1037 -4.641839605986877 1.602228348904459E-4 2.471946859578639E-4
dan GBK 6472236 21765704 941 -8.114783050485514 1.4539024843964282E-4 4.323315248613139E-5
dan EUC-KR 6472236 3740983 480 -5.953261417565391 7.416293225401547E-5 1.2830852211838438E-4
dan ISO-8859-2 6472236 5465900 438 -6.893301227124493 6.767367568178911E-5 8.013318941071004E-5
dan Shift_JIS 6472236 7879945 367 -7.976625293114629 5.670374195254932E-5 4.65739291327541E-5
dan ISO-8859-4 6472236 49925 240 1.2814235858420195 3.708146612700773E-5 0.0048072108162243365
dan x-MacCyrillic 6472236 111324 209 -0.593095081313935 3.2291776752269235E-5 0.001877402896051166
dan US-ASCII 6472236 1092812 110 -6.440579824805781 1.699567197487854E-5 1.0065775266010988E-4
dan IBM866 6472236 18855 81 1.057583564793611 1.2514994817865108E-5 0.0042959427207637235
dan GB2312 6472236 28523770 81 -13.55491340626609 1.2514994817865108E-5 2.8397368230076177E-6
deu UTF-8 80556322 2279847342 72360971 -0.13043921211928228 0.898265576226283 0.03173939310187375
deu ISO-8859-1 80556322 92311849 6574493 1.4568573174231672 0.08161361935069479 0.07122046704968503
deu windows-1252 80556322 13338428 793726 1.207270346323533 0.009853056598090463 0.05950671248515942
deu ISO-8859-15 80556322 4433324 766453 3.077327653806684 0.009514498439985877 0.1728844993057128
deu ISO-8859-2 80556322 5465900 29015 -3.5017889835332268 3.60182779943702E-4 0.005308366417241442
deu windows-1250 80556322 4195074 13431 -4.509404312260138 1.6672806884107742E-4 0.003201612176567088
deu US-ASCII 80556322 1092812 4647 -3.946022561037652 5.7686347695963576E-5 0.004252332514650278
deu ISO-8859-9 80556322 1249981 3846 -4.590649623403432 4.7742994025968563E-5 0.0030768467680708745
deu windows-1251 80556322 28402934 2316 -11.825093841654436 2.8750071285528652E-5 8.1540871798667E-5
deu windows-1254 80556322 903946 1445 -5.8975106167422 1.7937760365971027E-5 0.0015985468158496194
deu EUC-JP 80556322 6657154 1422 -9.915829961772376 1.765224584111474E-5 2.1360479267867319E-4
deu ISO-8859-5 80556322 60446 1040 -1.1773877951850198 1.291022199350164E-5 0.017205439565893526
deu EUC-KR 80556322 3740983 608 -10.464518059577193 7.547514396200959E-6 1.6252412801662022E-4
deu windows-1253 80556322 141359 580 -4.017828675698319 7.199931496375914E-6 0.004103028459454297
deu GB18030 80556322 1946824 465 -9.69600115045606 5.772358872094483E-6 2.3885055865347868E-4
deu ISO-8859-4 80556322 49925 383 -2.773496741273618 4.7544375226068535E-6 0.007671507260891337
deu windows-1257 80556322 382749 357 -6.974022279602489 4.431681972769313E-6 9.327261469004491E-4
deu UTF-16LE 80556322 17460 311 -1.1093282434901022 3.86065292305674E-6 0.017812142038946162
deu Shift_JIS 80556322 7879945 193 -14.246012296296005 2.3958392737940545E-6 2.4492556737388394E-5
deu windows-1255 80556322 864449 110 -10.955977103847268 1.3655042493126735E-6 1.2724868673571257E-4
deu windows-1256 80556322 3109658 91 -13.89367615155332 1.1296444244313934E-6 2.9263668223322308E-5
deu ISO-8859-13 80556322 13920 71 -3.5846613114187575 8.81370924556362E-7 0.0051005747126436785
div UTF-8 15879 2279847342 15679 -4.123471632618171 0.9874047484098495 6.877214851695101E-6
div windows-1252 15879 13338428 197 1.7214174992926627 0.012406322816298256 1.4769356628832123E-5
ell UTF-8 4787720 2279847342 4574557 -1.661902724286474 0.955477137343036 0.0020065189961302244
ell ISO-8859-7 4787720 344776 145360 9.684341958400125 0.03036100690934307 0.42160707241803375
ell windows-1253 4787720 141359 52361 9.634487528267815 0.01093652093271954 0.37041150545773527
ell ISO-8859-1 4787720 92311849 12716 -5.1444487197394855 0.002655961501508025 1.377504636484965E-4
ell GBK 4787720 21765704 948 -7.49843859375579 1.9800656680006348E-4 4.355475935903566E-5
ell GB2312 4787720 28523770 762 -8.470627948829183 1.5915717711144345E-4 2.6714561223849442E-5
ell windows-1252 4787720 13338428 578 -7.515086470358929 1.2072552279581931E-4 4.333344229170034E-5
ell ISO-8859-15 4787720 4433324 171 -7.7546940859393425 3.571637439115069E-5 3.857150977460704E-5
ell Shift_JIS 4787720 7879945 128 -9.481547674479712 2.673506387173853E-5 1.6243768198889714E-5
eng UTF-8 1002297355 2279847342 947075405 -0.7847934289997496 0.9449046236383613 0.4154117635653519
eng ISO-8859-1 1002297355 92311849 45147128 0.08897808507320419 0.04504364675291396 0.4890718633531
eng windows-1252 1002297355 13338428 6632340 0.09564769646246683 0.006617138084735343 0.4972355063130378
eng US-ASCII 1002297355 1092812 1011214 -2.4757193214753754 0.0010088962072537845 0.9253320790767305
eng windows-1251 1002297355 28402934 874138 -4.14799995954826 8.72134397680816E-4 0.030776327544189627
eng ISO-8859-15 1002297355 4433324 395379 -2.150682657224631 3.944727560415442E-4 0.08918342083727696
eng ISO-8859-2 1002297355 5465900 168271 -4.153242340679913 1.6788530784858752E-4 0.03078559798020454
eng GB2312 1002297355 28523770 160292 -7.497198476124627 1.5992459642877137E-4 0.005619593763377001
eng windows-1250 1002297355 4195074 137262 -4.035701674510303 1.3694738324436664E-4 0.032719804227529715
eng EUC-JP 1002297355 6657154 102046 -5.515851585189633 1.0181210145965117E-4 0.015328772625659553
eng windows-1256 1002297355 3109658 90620 -4.260222001808799 9.041229087150489E-5 0.02914146829008206
eng GBK 1002297355 21765704 59291 -8.94143518951982 5.915509973584636E-5 0.0027240561573381684
eng GB18030 1002297355 1946824 46890 -4.63124003896205 4.678252393472594E-5 0.024085382140347562
eng windows-1255 1002297355 864449 44354 -3.1756020707423724 4.4252336673082414E-5 0.05130898410432541
eng EUC-KR 1002297355 3740983 43272 -6.072142303586825 4.317281671365879E-5 0.011567013268972352
eng Shift_JIS 1002297355 7879945 43100 -7.556604231569646 4.3001210953011045E-5 0.0054695813232198956
eng ISO-8859-9 1002297355 1249981 42608 -3.957416403696418 4.251033866092563E-5 0.03408691812115544
eng Big5 1002297355 1662412 34471 -4.923969660597361 3.439198939121215E-5 0.02073553367035368
eng ISO-8859-7 1002297355 344776 21753 -2.7872370411342877 2.1703140182386294E-5 0.06309313873355454
eng KOI8-R 1002297355 231624 21434 -2.085119854601117 2.138487135885937E-5 0.0925379062618727
eng x-MacCyrillic 1002297355 111324 21430 -0.8536060359463848 2.1380880527216396E-5 0.19250116776256693
eng windows-1254 1002297355 903946 18198 -4.9820395076456325 1.815628855969594E-5 0.020131733532755276
eng ISO-8859-8 1002297355 28421 17866 -0.04020809233647751 1.782504953332936E-5 0.6286196826290419
eng x-windows-874 1002297355 452276 11780 -4.479132034971366 1.175299918854919E-5 0.0260460426819022
eng ISO-8859-5 1002297355 60446 11343 -0.8926393018068977 1.1317000831554623E-5 0.1876550971114714
eng ISO-8859-6 1002297355 12282 11281 -2.3149932735143257 1.1255142941088575E-5 0.918498615860609
eng TIS-620 1002297355 387297 9872 -4.5212061014706375 9.849372494852089E-6 0.025489482231982174
eng IBM866 1002297355 18855 9820 0.09325148343813851 9.797491683493468E-6 0.520816759480244
eng windows-1257 1002297355 382749 7159 -5.126367585039195 7.142590933007102E-6 0.018704163825379035
eng windows-1253 1002297355 141359 6690 -3.328997141379914 6.6746659228687675E-6 0.0473263110237056
eng UTF-16LE 1002297355 17460 4531 -0.42950587753766584 4.520614543575245E-6 0.25950744558991984
eng ISO-2022-JP 1002297355 16892 3055 -0.9497948503990289 3.0479976673189964E-6 0.18085484252900783
eng ISO-8859-4 1002297355 49925 2862 -2.9667483638980316 2.855440040545652E-6 0.05732598898347521
eng Big5-HKSCS 1002297355 11241 2411 -0.6925649376817044 2.4054737728006874E-6 0.21448269726892624
eng IBM855 1002297355 5749 2389 0.03883953126400547 2.3835241987643477E-6 0.41555053052704816
eng ISO-8859-3 1002297355 5592 2290 0.03012388634659963 2.284751115600819E-6 0.40951359084406297
eng windows-31j 1002297355 63073 852 -5.766794891823688 8.500471399527937E-7 0.013508157214656034
eng ISO-8859-13 1002297355 13920 474 -3.9597240108292886 4.7291354969204723E-7 0.03405172413793103
eng KOI8-U 1002297355 3475 342 -1.974972598553204 3.412161054740088E-7 0.09841726618705036
eng UTF-16BE 1002297355 1054 103 -1.9875929421288343 1.0276391480649972E-7 0.09772296015180265
eng x-iso-8859-11 1002297355 1419 78 -3.0457592964061426 7.782121703793183E-8 0.05496828752642706
eng x-windows-949 1002297355 1214 55 -3.4120998778086955 5.487393509084936E-8 0.045304777594728174
epo UTF-8 122614 2279847342 121463 -4.7048664122261625 0.9906128174596702 5.327681277705479E-5
epo ISO-8859-1 122614 92311849 465 -4.437604615880955 0.003792389123591107 5.037273167391545E-6
epo windows-1251 122614 28402934 255 -3.329477788435143 0.0020796972613241556 8.977945729127844E-6
epo windows-1252 122614 13338428 110 -3.508847994098887 8.971243088064984E-4 8.246848879043317E-6
epo ISO-8859-2 122614 5465900 108 -1.7674660672625875 8.808129577372893E-4 1.9758868621818915E-5
epo windows-1250 122614 4195074 71 -2.0775177901769264 5.790529629569217E-4 1.6924612056902928E-5
epo GB2312 122614 28523770 62 -6.162965716481619 5.05651883145481E-4 2.173625716376201E-6
est UTF-8 3292762 2279847342 3059679 -0.7914663661194303 0.9292135295536088 0.001342054331285134
est ISO-8859-1 3292762 92311849 149412 0.4403644539517675 0.04537588808422838 0.0016185571150243128
est ISO-8859-4 3292762 49925 33863 10.250336584123342 0.01028407154844474 0.6782774161241862
est windows-1252 3292762 13338428 22025 0.4967416952271793 0.006688913441056475 0.0016512440596448098
est windows-1257 3292762 382749 15783 6.845063793341413 0.004793240446773863 0.04123590133481733
est ISO-8859-15 3292762 4433324 9241 0.9626391070976686 0.002806458529344058 0.002084440478521308
est EUC-JP 3292762 6657154 1536 -3.429251192249476 4.6647768651363205E-4 2.3072922753476937E-4
est ISO-8859-13 3292762 13920 793 7.4671762845982785 2.4083125351908216E-4 0.0569683908045977
est windows-1251 3292762 28402934 273 -9.767496730837335 8.290912006394632E-5 9.611683074713337E-6
est EUC-KR 3292762 3740983 52 -9.048895138799864 1.5792213345513586E-5 1.390008989615831E-5
eus UTF-8 707378 2279847342 581119 0.8107580957267863 0.8215112712015358 2.548938208688185E-4
eus ISO-8859-1 707378 92311849 105164 2.5837746586820467 0.1486673320346406 0.0011392253663990632
eus windows-1252 707378 13338428 14359 2.688515112728286 0.02029890666659127 0.0010765136641289364
eus ISO-8859-15 707378 4433324 6663 3.3701511722908126 0.009419292089943425 0.0015029354949017938
eus windows-1250 707378 4195074 53 -6.165868515240231 7.492458063439915E-5 1.2633865338251483E-5
fao UTF-8 106833 2279847342 98829 -0.6876392132169624 0.9250793294206846 4.334895507227343E-5
fao ISO-8859-1 106833 92311849 6807 1.0810644351828933 0.06371626744545225 7.373917946329945E-5
fao windows-1252 106833 13338428 1170 1.4748585265577945 0.01095167223610682 8.771648353164255E-5
fas UTF-8 9356152 2279847342 9346768 -9.154476849207562 0.9989970235626783 0.004099734147901557
fas windows-1256 9356152 3109658 5536 -1.4327067932658182 5.916962443534479E-4 0.0017802600800473879
fas windows-1252 9356152 13338428 2124 -6.2489655505084745 2.270164058899428E-4 1.592391547189819E-4
fas ISO-8859-1 9356152 92311849 1355 -10.953817879637564 1.4482449622451622E-4 1.4678505681323749E-5
fas windows-1257 9356152 382749 151 -4.444626296227218 1.6139113601403653E-5 3.94514420677781E-4
fas UTF-16LE 9356152 17460 141 1.5775578570442077 1.5070298131112021E-5 0.008075601374570448
fin UTF-8 6949219 2279847342 6366909 -0.4819240010174286 0.9162049720983034 0.0027926909327247403
fin ISO-8859-1 6949219 92311849 516641 1.361275437484212 0.07434518900613148 0.005596692142955559
fin windows-1252 6949219 13338428 41821 0.28661404360463966 0.006018086348983965 0.0031353769724588236
fin ISO-8859-15 6949219 4433324 22195 1.2174923093118604 0.003193884089708498 0.005006401517236277
fin EUC-KR 6949219 3740983 883 -4.876346564225035 1.270646384867134E-4 2.3603421881361128E-4
fin windows-1251 6949219 28402934 281 -11.200622702757276 4.0436198657719666E-5 9.89334411719578E-6
fin ISO-8859-4 6949219 49925 165 0.3932382371593638 2.3743675368411904E-5 0.0033049574361542315
fin ISO-8859-2 6949219 5465900 112 -9.762361756753704 1.611691903795232E-5 2.049067857077517E-5
fin US-ASCII 6949219 1092812 86 -7.074634778389412 1.2375491404141961E-5 7.869606117063137E-5
fra UTF-8 62078878 2279847342 56033222 -0.20739611452757792 0.9026133172058941 0.02457762016242928
fra ISO-8859-1 62078878 92311849 4620935 1.3174532792503575 0.07443650962892726 0.050057875018839675
fra windows-1252 62078878 13338428 718776 1.5235923862298046 0.011578430911718476 0.05388760954439309
fra ISO-8859-15 62078878 4433324 685686 3.4013168453579854 0.011045399370781153 0.15466634065094273
fra windows-1256 62078878 3109658 6552 -4.838430225985835 1.0554314464253043E-4 0.0021069841120792063
fra US-ASCII 62078878 1092812 2915 -4.369260574618899 4.6956389901247894E-5 0.002667430445492912
fra ISO-8859-2 62078878 5465900 2881 -7.604651514242129 4.640869959022133E-5 5.270861157357434E-4
fra ISO-8859-9 62078878 1249981 1471 -6.002711576245734 2.369566022117861E-5 0.001176817887631892
fra windows-1250 62078878 4195074 1346 -8.597925021804077 2.1682092901228014E-5 3.208525046280471E-4
fra GB18030 62078878 1946824 1158 -7.365643793699146 1.865368765202232E-5 5.948149396144695E-4
fra windows-1251 62078878 28402934 1106 -12.796741116795927 1.781604364692287E-5 3.8939639123197624E-5
fra EUC-JP 62078878 6657154 569 -11.241137263308373 9.165758440415113E-6 8.547195993963787E-5
fra ISO-8859-5 62078878 60446 470 -2.240506639286742 7.57101312301424E-6 0.007775535188432651
fra Shift_JIS 62078878 7879945 390 -12.332851014671473 6.282330038245859E-6 4.949273123099209E-5
fra windows-1253 62078878 141359 379 -4.359697429014611 6.105136114090206E-6 0.0026811168726434114
fra UTF-16LE 62078878 17460 350 -0.37134605746124794 5.637988495861668E-6 0.02004581901489118
fra windows-1254 62078878 903946 148 -9.945648974319255 2.3840637068215055E-6 1.6372659428771187E-4
fra EUC-KR 62078878 3740983 97 -13.628880085047424 1.5625282402816624E-6 2.5929013844756845E-5
fra x-MacCyrillic 62078878 111324 75 -7.1180696015799425 1.2081403919703574E-6 6.737091732241026E-4
fra ISO-8859-3 62078878 5592 73 -1.214924444469685 1.175923314851148E-6 0.013054363376251788
fra KOI8-R 62078878 231624 51 -9.353735734169836 8.215354665398431E-7 2.201844368459227E-4
fry UTF-8 31867 2279847342 29760 -0.9186745772937199 0.933881444754762 1.30535055798486E-5
fry ISO-8859-1 31867 92311849 1674 0.7187875099828005 0.05253083126745536 1.813418340260956E-5
fry windows-1252 31867 13338428 398 1.7345812039787736 0.012489409106599304 2.983859867144764E-5
gla UTF-8 15349 2279847342 14953 -2.716311480754964 0.9742002736334615 6.558772477670569E-6
gla windows-1251 15349 28402934 206 0.376789642950811 0.013421069776532674 7.252771843922885E-6
gla ISO-8859-1 15349 92311849 129 -2.8554272273270445 0.008404456316372402 1.397437072244106E-6
gle UTF-8 81061 2279847342 79504 -3.292885567783385 0.9807922428788196 3.487251033670306E-5
gle ISO-8859-1 81061 92311849 899 -2.306180687648896 0.011090413392383513 9.738728123623653E-6
gle GB2312 81061 28523770 347 -1.898526538309884 0.004280726860018998 1.216529231584745E-5
gle windows-1252 81061 13338428 264 -0.935016456714725 0.003256806602435203 1.979243730970396E-5
glg UTF-8 1000963 2279847342 851390 0.524931104206113 0.8505709002230851 3.734416705519926E-4
glg ISO-8859-1 1000963 92311849 134260 2.411458606189077 0.13413083200877554 0.0014544178396859973
glg windows-1252 1000963 13338428 7128 0.6215472279865885 0.007121142339926651 5.34395807362007E-4
glg ISO-8859-15 1000963 4433324 4930 2.083447640147221 0.004925256977530638 0.0011120324163088463
glg windows-1251 1000963 28402934 3111 -2.5275415468291014 0.0031080069892693335 1.0953093789535969E-4
glg GB18030 1000963 1946824 60 -5.078179757942502 5.994227558860817E-5 3.081942692302951E-5
glv UTF-8 4527 2279847342 4367 -2.1065061308503705 0.9646565054119726 1.9154791286021115E-6
glv ISO-8859-1 4527 92311849 110 -0.7644348555396197 0.02429865252926883 1.1916130073399353E-6
grn UTF-8 17698 2279847342 15851 -0.08947535298403893 0.895637925189287 6.952658499535624E-6
grn ISO-8859-1 17698 92311849 1717 1.8495594068694199 0.09701661204655894 1.8599995760024264E-5
grn windows-1252 17698 13338428 87 -0.11495223147297945 0.004915809696010849 6.522507749788806E-6
guj UTF-8 20438 2279847342 20374 -6.8880552749678605 0.9968685781387612 8.936563262225651E-6
hat UTF-8 11147 2279847342 11004 -4.087238465441897 0.9871714362608773 4.826638958355309E-6
hat windows-1252 11147 13338428 92 0.9146460071763698 0.008253341706288688 6.897364517018047E-6
hau UTF-8 20019 2279847342 19269 -1.9943472125488735 0.9625355911883711 8.451881687436245E-6
hau ISO-8859-1 20019 92311849 692 -0.08062888586176248 0.03456716119686298 7.49632910072032E-6
haw UTF-8 10184 2279847342 10092 -4.780972369121927 0.9909662215239592 4.426612174456723E-6
haw ISO-8859-1 10184 92311849 68 -3.3120934271343887 0.006677140612725845 7.366334954465055E-7
heb UTF-8 5437323 2279847342 4867180 -0.08094602376219223 0.8951427016566792 0.00213487101102579
heb windows-1255 5437323 864449 556632 9.153592003338963 0.1023724358475669 0.6439153726824833
heb windows-1251 5437323 28402934 10381 -3.4967969578272573 0.0019092115734158151 3.6549041025127897E-4
heb ISO-8859-8 5437323 28421 1921 6.784817201503847 3.532988568087642E-4 0.06759086590901094
heb ISO-8859-1 5437323 92311849 501 -11.861055526844597 9.214093038063032E-5 5.427255606157342E-6
heb windows-1252 5437323 13338428 459 -8.230002847335934 8.441654100740383E-5 3.441185123164439E-5
heb UTF-16LE 5437323 17460 227 3.6023541345910144 4.1748485421962244E-5 0.01300114547537228
hin UTF-8 535230 2279847342 534107 -7.686487036836383 0.997901836593614 2.3427314196022164E-4
hin ISO-8859-1 535230 92311849 851 -6.171446986216434 0.001589970666816135 9.218751538602591E-6
hin windows-1252 535230 13338428 252 -4.797108677079035 4.708256263662351E-4 1.889278106835378E-5
hrv UTF-8 4159240 2279847342 3928439 -1.2450939054574093 0.9445088525788365 0.0017231149330173003
hrv windows-1250 4159240 4195074 135664 5.857281625037379 0.03261749742741463 0.03233888126884055
hrv ISO-8859-2 4159240 5465900 68683 4.0411033407959165 0.016513353401102125 0.012565725681040633
hrv ISO-8859-1 4159240 92311849 13237 -4.7842354185831235 0.003182552581721661 1.433943761650793E-4
hrv windows-1252 4159240 13338428 9138 -1.7183204446499487 0.0021970359969609833 6.850882277881622E-4
hrv EUC-KR 4159240 3740983 3558 -1.0675620386169518 8.554447447129764E-4 9.510869202025243E-4
hrv US-ASCII 4159240 1092812 116 -5.45182364383681 2.7889710620209463E-5 1.0614817553247952E-4
hrv Big5 4159240 1662412 88 -6.842796927895469 2.1157711504986487E-5 5.2935132807029785E-5
hrv ISO-8859-13 4159240 13920 73 2.337155987150238 1.755128340754561E-5 0.005244252873563219
hrv windows-1255 4159240 864449 65 -6.141486877225804 1.5627855088910474E-5 7.519240579837561E-5
hrv Shift_JIS 4159240 7879945 63 -10.618345378752071 1.514699800925169E-5 7.994979660391031E-6
hun UTF-8 10194091 2279847342 9072019 0.004233688909647888 0.8899291756371411 0.003979222131619373
hun ISO-8859-2 10194091 5465900 930607 6.9600742781024945 0.09128886528480078 0.17025686529208364
hun windows-1250 10194091 4195074 126962 3.9819947054235834 0.012454469947344986 0.030264543605190278
hun ISO-8859-1 10194091 92311849 40109 -4.3575385788827 0.003934534231644587 4.3449460101270424E-4
hun windows-1252 10194091 13338428 22635 -1.694521687096566 0.0022204039575475635 0.0016969765852467773
hun US-ASCII 10194091 1092812 732 -3.556885910578821 7.180630425998748E-5 6.698315904290949E-4
hun GB18030 10194091 1946824 624 -5.029651755520539 6.12119315003172E-5 3.205220399995069E-4
hun windows-1251 10194091 28402934 158 -13.115927551329834 1.5499174963221342E-5 5.562805589028232E-6
hun ISO-8859-15 10194091 4433324 115 -10.055363717622177 1.1281045068167432E-5 2.5939904234384855E-5
hun UTF-16LE 10194091 17460 90 0.5146592746072918 8.828643966391904E-6 0.005154639175257732
hye UTF-8 597192 2279847342 596212 -8.177024688851604 0.9983589867245375 2.615140009668244E-4
hye windows-1252 597192 13338428 772 -2.778756622031589 0.0012927165802622941 5.787788486019492E-5
hye ISO-8859-1 597192 92311849 116 -10.373319211900338 1.9424238770780587E-4 1.2566100804675682E-6
hye windows-1251 597192 28402934 68 -9.135068883758926 1.1386622727698965E-4 2.394118861100758E-6
ibo UTF-8 2954 2279847342 2925 -4.6162288117328565 0.9901828029790115 1.282980639148426E-6
iku UTF-8 1053 2279847342 931 0.09370657555719535 0.8841405508072174 4.0836067522980665E-7
iku ISO-8859-1 1053 92311849 122 2.1623951131474755 0.11585944919278253 1.321607153595201E-6
ile UTF-8 6788 2279847342 4993 1.3761919929470086 0.7355627578078963 2.1900589166728517E-6
ile ISO-8859-1 6788 92311849 1567 3.2624811383557457 0.2308485562757808 1.6975068931833443E-5
ile windows-1252 6788 13338428 146 2.803387497094131 0.02150854449027696 1.0945817603093858E-5
ile ISO-8859-9 6788 1249981 53 5.530079016901287 0.007807896287566294 4.240064448979624E-5
ina UTF-8 15414 2279847342 12723 0.776277546925955 0.8254184507590502 5.580636810900999E-6
ina windows-1251 15414 28402934 1520 4.184811070924043 0.09861165174516673 5.3515598071664E-5
ina ISO-8859-1 15414 92311849 988 1.0922356084669518 0.06409757363435838 1.0702851375016873E-5
ina windows-1252 15414 13338428 131 0.9727544317430865 0.008498767354353185 9.821247301406132E-6
ind UTF-8 5687761 2279847342 5526621 -2.5314440903250643 0.9716689924207434 0.0024241188864653376
ind ISO-8859-1 5687761 92311849 131183 -0.8646529663052372 0.023064084443773218 0.001421085174017043
ind windows-1252 5687761 13338428 20718 -0.7106053623922811 0.0036425581173329894 0.0015532565006910859
ind windows-1256 5687761 3109658 2633 -1.9243133610733827 4.629238113204827E-4 8.467169058462377E-4
ind US-ASCII 5687761 1092812 2092 -0.29632398174062213 3.6780729710689323E-4 0.0019143274414995443
ind Shift_JIS 5687761 7879945 1303 -5.185283742825035 2.290883882075917E-4 1.6535648408713512E-4
ind GB18030 5687761 1946824 946 -3.03455250652912 1.6632203779307886E-4 4.859196311530986E-4
ind EUC-KR 5687761 3740983 915 -4.405594427598847 1.6087173845736485E-4 2.445881202881702E-4
ind windows-1251 5687761 28402934 805 -8.696237407373602 1.4153196662096033E-4 2.834214239979574E-5
ind EUC-JP 5687761 6657154 244 -8.1988651872166 4.2899130255297295E-5 3.665229916567951E-5
ind ISO-8859-2 5687761 5465900 172 -8.504780913552684 3.024037050783252E-5 3.146782780511901E-5
ipk UTF-8 2077 2279847342 1968 -1.3517366136671918 0.9475204622051036 8.632156915706332E-7
ipk ISO-8859-1 2077 92311849 65 -0.2727452170755928 0.031295137217140105 7.04134958882689E-7
isl UTF-8 895628 2279847342 784541 0.21138496945501292 0.8759674775688121 3.4411997046774196E-4
isl ISO-8859-1 895628 92311849 91746 1.9450414424191662 0.10243761919011019 9.938702451946337E-4
isl windows-1252 895628 13338428 19303 2.805209564558977 0.021552474911458775 0.0014471720355652105
ita UTF-8 29006423 2279847342 26358144 -0.32457350389201944 0.908700255801965 0.011561363567824359
ita ISO-8859-1 29006423 92311849 2103169 1.2982999842881529 0.07250700991294239 0.022783304882128402
ita windows-1252 29006423 13338428 457187 2.1466360045487645 0.015761578047731015 0.03427592816784707
ita ISO-8859-15 29006423 4433324 72446 0.7215037313160576 0.0024975847590721543 0.01634123741012387
ita ISO-8859-2 29006423 5465900 6194 -4.579770037733853 2.1353891170931347E-4 0.0011332077059587626
ita EUC-KR 29006423 3740983 2375 -5.738652620777133 8.187841706645455E-5 6.348598750649227E-4
ita windows-1251 29006423 28402934 1983 -10.133399168181976 6.836416886011763E-5 6.981673090533534E-5
ita windows-1250 29006423 4195074 1745 -6.583431469785972 6.0159089591984504E-5 4.159640568914875E-4
ita US-ASCII 29006423 1092812 1433 -4.291254981206362 4.940285122367553E-5 0.0013112959960176133
ita ISO-8859-4 29006423 49925 368 -0.8510698533942223 1.2686845254928538E-5 0.007371056584877316
ita EUC-JP 29006423 6657154 273 -11.214314640407617 9.411708572270355E-6 4.1008515050125024E-5
ita GB18030 29006423 1946824 211 -9.274329343294134 7.2742509477986996E-6 1.083816513459871E-4
ita ISO-8859-3 29006423 5592 134 1.4730828928591875 4.619666478696805E-6 0.02396280400572246
ita KOI8-R 29006423 231624 117 -6.198131251237004 4.033589388115866E-6 5.051290021759403E-4
ita ISO-8859-9 29006423 1249981 111 -9.673335556599552 3.826738650263771E-6 8.880134978051666E-5
ita ISO-8859-8 29006423 28421 107 -2.1875368691943513 3.6888381583623737E-6 0.0037648217867070123
ita Big5 29006423 1662412 69 -11.194049340194894 2.378783485299101E-6 4.1505956405511994E-5
ita ISO-8859-7 29006423 344776 57 -8.431164425371287 1.9650820095949092E-6 1.6532473257999396E-4
ita windows-1254 29006423 903946 56 -10.393674354303663 1.93060688661956E-6 6.19506032439991E-5
jav UTF-8 14786 2279847342 14313 -2.298959305800177 0.9680102799945894 6.278051927566297E-6
jav ISO-8859-1 14786 92311849 263 -1.3750225424001108 0.017787095901528473 2.8490383720945725E-6
jav windows-1252 14786 13338428 183 1.716677222784211 0.012376572433382929 1.3719757680590246E-5
jpn UTF-8 53236727 2279847342 46117370 0.3316283347646589 0.8662698215838851 0.020228271055878443
jpn Shift_JIS 53236727 7879945 4562476 4.7962040190216655 0.0857016623129367 0.5789984574765433
jpn EUC-JP 53236727 6657154 2424250 4.776742335948919 0.045537172110524375 0.3641571157885186
jpn GBK 53236727 21765704 75872 -3.522284946391546 0.0014251815292852246 0.0034858509515704158
jpn windows-31j 53236727 63073 40366 4.851584784114007 7.582359448957108E-4 0.6399885846558749
jpn GB18030 53236727 1946824 8464 -3.0949318626439437 1.5898798586922897E-4 0.004347593824608696
jpn ISO-2022-JP 53236727 16892 3679 4.249071802985715 6.910642722269534E-5 0.2177954061094009
jpn GB2312 53236727 28523770 901 -12.9147903864325 1.692440634075795E-5 3.15876898460477E-5
jpn ISO-8859-1 53236727 92311849 894 -15.229304596631573 1.6792918167189354E-5 9.684563896017292E-6
jpn UTF-16LE 53236727 17460 893 1.7376888255946785 1.677413414239384E-5 0.051145475372279495
jpn US-ASCII 53236727 1092812 696 -6.929424105456369 1.3073681257677619E-5 6.368890531948771E-4
jpn windows-1252 53236727 13338428 337 -13.37312492605647 6.330216356088157E-6 2.5265346111250892E-5
jpn Big5 53236727 1662412 276 -9.6169592786837 5.184390843561814E-6 1.6602382562204798E-4
jpn UTF-16BE 53236727 1054 99 2.86113901451218 1.859618454755868E-6 0.09392789373814042
jpn EUC-KR 53236727 3740983 76 -13.816562841887446 1.4275858844590503E-6 2.0315516002077527E-5
kal UTF-8 22308 2279847342 21673 -2.525156168994598 0.9715348753810292 9.506338253765414E-6
kal ISO-8859-1 22308 92311849 606 -0.5472887740156072 0.027165142549757933 6.564704385890916E-6
kan UTF-8 31677 2279847342 31591 -7.172673717805953 0.9972850964422136 1.3856629528662537E-5
kan ISO-8859-1 31677 92311849 69 -5.543387860535668 0.0021782365754332796 7.474663409677776E-7
kat UTF-8 913985 2279847342 913588 -10.8327718756792 0.9995656383857503 4.0072332176353234E-4
kat windows-1252 913985 13338428 237 -5.989362018662231 2.5930403671832686E-4 1.7768210766666056E-5
kat windows-1251 913985 28402934 139 -8.556124103173293 1.5208127048036893E-4 4.893860613132432E-6
kaz UTF-8 422859 2279847342 422275 -8.521524343610809 0.998618924984451 1.852207348363766E-4
kaz windows-1251 422859 28402934 573 -4.184567119293428 0.001355061616283442 2.0173972167804918E-5
kha UTF-8 4585 2279847342 4334 -1.272041702505297 0.9452562704471101 1.90100447523736E-6
kha ISO-8859-1 4585 92311849 194 0.30758899164398434 0.04231188658669575 2.101572031126795E-6
khm UTF-8 85976 2279847342 85967 -13.678608326382216 0.9998953196240812 3.770734926689666E-5
kin UTF-8 33954 2279847342 33484 -3.93717607797673 0.9861577428285327 1.4686948280767827E-5
kin ISO-8859-1 33954 92311849 350 -2.450932208284905 0.010308063851092654 3.791495932445249E-6
kir UTF-8 64648 2279847342 63390 -3.2673936835512007 0.9805407746566019 2.7804493236108966E-5
kir windows-1251 64648 28402934 1248 1.091821509757701 0.01930454151713897 4.3939122627260973E-5
kor UTF-8 7301395 2279847342 5556186 1.2361400486618916 0.7609759504861742 0.0024370868600025763
kor EUC-KR 7301395 3740983 1735480 8.40436648547206 0.2376915644202238 0.46391015409586195
kor ISO-8859-1 7301395 92311849 8149 -6.8733278958742385 0.0011160880900156751 8.827685815284667E-5
kor x-windows-949 7301395 1214 516 8.907742293316893 7.067142648767803E-5 0.42504118616144976
kor x-IBM949 7301395 1294 494 8.83812110450361 6.765830365293207E-5 0.38176197836166925
kor US-ASCII 7301395 1092812 468 -3.7857918392221976 6.409734030277776E-5 4.2825298404483114E-4
kur UTF-8 110389 2279847342 108760 -3.81101662887784 0.9852430948735834 4.770494848334455E-5
kur windows-1252 110389 13338428 1567 1.98706347314322 0.014195254961998026 1.1748011084964436E-4
lao UTF-8 23752 2279847342 23728 -9.146002145452496 0.998989558773998 1.0407714395115847E-5
lat UTF-8 129074 2279847342 95666 1.3483932513694685 0.7411717309450393 4.196158147855498E-5
lat ISO-8859-1 129074 92311849 14316 2.086091888438032 0.11091311960580752 1.5508301648253195E-4
lat windows-1251 129074 28402934 12760 4.188559826967059 0.09885801943071416 4.492493627594952E-4
lat windows-1252 129074 13338428 3803 3.416029109177483 0.029463718487069433 2.851160571545612E-4
lat windows-1250 129074 4195074 1495 3.891381448069545 0.011582503060260006 3.5637035246577294E-4
lat GB2312 129074 28523770 551 -1.9040275828533728 0.004268869020871748 1.931722209231108E-5
lat ISO-8859-2 129074 5465900 281 0.039636899663228815 0.0021770457257077333 5.1409648914176986E-5
lav UTF-8 3038630 2279847342 2900679 -1.6254896525733293 0.9546009221260897 0.0012723128196186042
lav windows-1257 3038630 382749 126674 10.375807058818316 0.04168786591325696 0.33095840877441873
lav windows-1251 3038630 28402934 3446 -4.538382542028324 0.0011340637063413446 1.2132549404931195E-4
lav windows-1252 3038630 13338428 2311 -3.8369674232149755 7.605401118267114E-4 1.7325879781335553E-4
lav ISO-8859-2 3038630 5465900 1774 -2.5877260920385328 5.838157327479819E-4 3.2455771236209954E-4
lav ISO-8859-1 3038630 92311849 1368 -8.690837782302644 4.5020288748547863E-4 1.4819332673100286E-5
lav EUC-JP 3038630 6657154 874 -4.395966281285645 2.876296225601669E-4 1.3128733389673725E-4
lav Big5 3038630 1662412 535 -2.60680899053851 1.7606618772275664E-4 3.218215460427379E-4
lav ISO-8859-13 3038630 13920 435 6.480671805729447 1.4315661992411055E-4 0.03125
lav windows-1250 3038630 4195074 336 -5.385810877866942 1.105761478034509E-4 8.009393874816034E-5
lav GB18030 3038630 1946824 97 -6.3367259944229115 3.192228076468672E-5 4.982474019223104E-5
lav ISO-8859-4 3038630 49925 84 0.6976112620877418 2.7644036950862724E-5 0.0016825237856785177
lin UTF-8 3265 2279847342 2643 0.9118982248840376 0.8094946401225115 1.1592881467587315E-6
lin windows-1255 3265 864449 439 11.68551716926033 0.13445635528330782 5.078379406997984E-4
lin ISO-8859-1 3265 92311849 101 -0.2952038569366328 0.030934150076569678 1.094117397648486E-6
lin windows-1252 3265 13338428 58 2.4285025980321002 0.01776416539050536 4.3483384998592035E-6
lit UTF-8 5384784 2279847342 5206218 -2.2268790701060635 0.9668387812770206 0.0022835818451918083
lit windows-1257 5384784 382749 163112 9.455626162203007 0.030291280021631322 0.4261591800370478
lit ISO-8859-13 5384784 13920 7846 9.525246983785808 0.0014570686586500034 0.5636494252873563
lit ISO-8859-1 5384784 92311849 2668 -8.497571285133581 4.954701989903402E-4 2.8902031850754068E-5
lit EUC-JP 5384784 6657154 1735 -4.167439347006738 3.222041961200301E-4 2.6062188136251617E-4
lit ISO-8859-2 5384784 5465900 1613 -3.919890296615443 2.9954776273291556E-4 2.951023619166103E-4
lit windows-1252 5384784 13338428 708 -7.343967970492367 1.314815970334186E-4 5.307971823966063E-5
lit windows-1251 5384784 28402934 636 -9.058221720250089 1.1811058716561332E-4 2.239205287735415E-5
lit EUC-KR 5384784 3740983 129 -8.213882058059744 2.3956392679817796E-5 3.4482915319315804E-5
lit ISO-8859-4 5384784 49925 54 -1.3273212981329299 1.0028257400853962E-5 0.0010816224336504757
ltz UTF-8 46769 2279847342 35084 1.3019191307894071 0.750155017212256 1.5388749656028503E-5
ltz ISO-8859-1 46769 92311849 11393 3.3364109562291326 0.24360153092860656 1.234186090238535E-4
ltz windows-1252 46769 13338428 172 -0.692820506580837 0.003677649725245355 1.2895072792685915E-5
ltz windows-1255 46769 864449 78 3.1923401151805364 0.0016677713870298701 9.023088695805073E-5
lug UTF-8 2210 2279847342 2081 -1.1514179880248099 0.9416289592760181 9.127804136984185E-7
lug ISO-8859-1 2210 92311849 60 -0.5484251309038128 0.027149321266968326 6.499707312763283E-7
lug windows-1251 2210 28402934 59 1.725132464469923 0.02669683257918552 2.0772501883080106E-6
mal UTF-8 70682 2279847342 63812 -0.2158074647929921 0.902804108542486 2.7989593348833966E-5
mal UTF-16 70682 88200 6659 15.473438866042782 0.09421069013327298 0.07549886621315192
mal windows-1252 70682 13338428 193 -1.2864540352877478 0.002730539599898135 1.446947121504873E-5
mar UTF-8 64839 2279847342 64817 -11.327134646763986 0.9996606980366755 2.843041233766958E-5
mfe UTF-8 1546 2279847342 1357 0.18659233305273287 0.8777490297542044 5.95215291392962E-7
mfe ISO-8859-1 1546 92311849 121 1.4611592691286048 0.07826649417852523 1.3107743080739288E-6
mkd UTF-8 68461 2279847342 67471 -3.850981163733455 0.9855392121061627 2.959452536888323E-5
mkd windows-1251 68461 28402934 950 0.44259916237561975 0.013876513635500505 3.344724879479E-5
mlg UTF-8 71378 2279847342 69932 -3.1885299939650555 0.979741657093222 3.067398360920606E-5
mlg ISO-8859-1 71378 92311849 1391 -1.1958390299941701 0.019487797360531255 1.5068488120089545E-5
mlt UTF-8 33882 2279847342 33602 -4.957534303442581 0.9917360250280385 1.4738706132193301E-5
mlt windows-1252 33882 13338428 152 -0.2970057097385292 0.004486157841921965 1.1395645723768948E-5
mlt ISO-8859-1 33882 92311849 104 -4.859191381749658 0.0030694764181571337 1.1266159342123024E-6
mon UTF-8 375847 2279847342 375662 -10.583151697569281 0.9995077784311169 1.6477506764573537E-4
mon windows-1251 375847 28402934 150 -6.627447925799334 3.9909856936466167E-4 5.28114454654579E-6
mri UTF-8 11815 2279847342 11646 -3.8725201691244404 0.9856961489631824 5.108236760178656E-6
mri ISO-8859-1 11815 92311849 144 -2.1197131875801603 0.012187896741430384 1.559929755063188E-6
msa UTF-8 1335587 2279847342 1307767 -3.1333604447158785 0.9791702075566773 5.736204244503315E-4
msa ISO-8859-1 1335587 92311849 20135 -1.6996997353920624 0.015075768182828974 2.1811934457081452E-4
msa windows-1252 1335587 13338428 5787 -0.36603408448087965 0.004332926271369817 4.338592223911244E-4
msa GB2312 1335587 28523770 1431 -4.661437399004877 0.0010714390002298615 5.016868387313458E-5
msa windows-1251 1335587 28402934 201 -8.576761638623344 1.504956247702321E-4 7.076733692371359E-6
msa US-ASCII 1335587 1092812 114 -3.217001957729537 8.535572748162419E-5 1.0431803457502297E-4
mya UTF-8 54532 2279847342 54327 -6.523822584487372 0.996240739382381 2.3829227071116764E-5
mya ISO-8859-1 54532 92311849 129 -5.378729318125023 0.0023655835105992813 1.397437072244106E-6
nau UTF-8 1017 2279847342 956 -1.1004456939993164 0.9400196656833825 4.1932632171825475E-7
nep UTF-8 145755 2279847342 145615 -9.24716299450937 0.9990394840657267 6.387050453661471E-5
nep ISO-8859-1 145755 92311849 79 -8.32201522406419 5.420054200542005E-4 8.55794796180499E-7
nep Big5 145755 1662412 56 -1.0482687481831707 3.842063737093067E-4 3.36859936044735E-5
nld UTF-8 21994993 2279847342 20774989 -1.2398141116120047 0.9445326488624024 0.009112447407015903
nld ISO-8859-1 21994993 92311849 990456 0.4223054616693678 0.04503097591347267 0.010729456843617117
nld windows-1252 21994993 13338428 186186 0.9540441112479339 0.008464926540326701 0.013958616412668719
nld ISO-8859-15 21994993 4433324 35712 -0.12682267889118068 0.0016236422534892372 0.008055355304507407
nld windows-1250 21994993 4195074 1461 -6.390692034348754 6.642420845507884E-5 3.4826560866387576E-4
nld ISO-8859-2 21994993 5465900 1368 -7.050280204138418 6.219597341995063E-5 2.5027900254303957E-4
nld US-ASCII 21994993 1092812 1018 -4.426472361341333 4.628326092215624E-5 9.315417473453805E-4
nld windows-1255 21994993 864449 826 -4.375846443077549 3.7554001494794746E-5 9.555219567608963E-4
nld ISO-8859-3 21994993 5592 650 4.980548367930419 2.9552180353046714E-5 0.11623748211731044
nld windows-1253 21994993 141359 516 -1.701177260160726 2.3459884711034006E-5 0.003650280491514513
nld ISO-8859-8 21994993 28421 297 0.3886585931576921 1.3503073176699806E-5 0.010450019351887689
nld windows-1251 21994993 28402934 290 -13.430204271793114 1.3184818926743918E-5 1.0210212789988528E-5
nld Shift_JIS 21994993 7879945 276 -10.980870228799441 1.2548310426832144E-5 3.502562517885594E-5
nld EUC-JP 21994993 6657154 191 -11.380796363445215 8.68379453451065E-6 2.869093910100322E-5
nld UTF-16LE 21994993 17460 184 0.405320465315369 8.365540284554761E-6 0.010538373424971364
nld GB18030 21994993 1946824 129 -9.71045512515271 5.864971177758502E-6 6.626176788451344E-5
nld ISO-8859-9 21994993 1249981 88 -9.589808739571465 4.000910570874017E-6 7.040107009626547E-5
nld ISO-8859-7 21994993 344776 86 -7.060869317405984 3.909980785172335E-6 2.4943731582244706E-4
nld windows-1254 21994993 903946 55 -9.881833894969706 2.5005691067962603E-6 6.084434247178482E-5
nld windows-1256 21994993 3109658 54 -12.38773303158337 2.455104213945419E-6 1.7365253670982468E-5
nno UTF-8 445418 2279847342 415720 -0.9030095381028497 0.9333255503818885 1.8234554232710552E-4
nno ISO-8859-1 445418 92311849 21771 0.5822171034285896 0.04887768343443687 2.3584187984361575E-4
nno windows-1252 445418 13338428 7292 2.267274676182193 0.016371139019976742 5.466911093271261E-4
nno US-ASCII 445418 1092812 518 2.0031867291179757 0.0011629525524338936 4.740065079812447E-4
nor UTF-8 5109932 2279847342 4768022 -0.8951880938821116 0.9330891291704078 0.0020913777480457284
nor ISO-8859-1 5109932 92311849 294477 0.89093194830802 0.05762835982944587 0.0031900238505676555
nor windows-1252 5109932 13338428 40638 0.8389586298510572 0.007952747707797286 0.0030466858613323847
nor ISO-8859-15 5109932 4433324 5930 -0.7973203326713063 0.0011604851101736774 0.0013375968009556713
nor windows-1251 5109932 28402934 202 -11.247326522714106 3.9530858727669955E-5 7.1119413226816635E-6
nor EUC-KR 5109932 3740983 177 -7.476686952891022 3.4638425716819716E-5 4.7313767531154247E-5
nor ISO-8859-5 5109932 60446 130 0.14972544446132607 2.544065165642126E-5 0.0021506799457366907
nor US-ASCII 5109932 1092812 90 -6.370280578663017 1.7612758839060873E-5 8.235634308554445E-5
nor EUC-JP 5109932 6657154 61 -10.757527009399842 1.193753654647459E-5 9.163074791419877E-6
nor ISO-8859-4 5109932 49925 55 -1.1860974665816673 1.0763352623870533E-5 0.0011016524787180772
nor ISO-8859-9 5109932 1249981 54 -7.6604652207152615 1.0567655303436523E-5 4.320065664998108E-5
nya UTF-8 6111 2279847342 6025 -3.9045990431220883 0.985927016854852 2.6427208037159884E-6
nya ISO-8859-1 6111 92311849 57 -2.648897629239752 0.009327442317133039 6.174721947125119E-7
oci UTF-8 44584 2279847342 42617 -1.6812163743288444 0.9558810335546384 1.8692918255927682E-5
oci ISO-8859-1 44584 92311849 1693 0.10025650955661809 0.037973263951193256 1.8340007467513733E-5
oci windows-1252 44584 13338428 249 0.13894185294748992 0.005584963215503319 1.8667867008016236E-5
ori UTF-8 3945 2279847342 3944 -11.910172787335672 0.9997465145754119 1.72994039001757E-6
orm UTF-8 4305 2279847342 4257 -4.364345046212064 0.9888501742160278 1.8672302840529398E-6
pan UTF-8 31942 2279847342 31924 -10.312976143878242 0.9994364786174942 1.4002691939888666E-5
pol UTF-8 34333226 2279847342 31644239 -0.5994573524001464 0.9216797454454178 0.013879981530798478
pol ISO-8859-2 34333226 5465900 2501772 5.719051517542603 0.07286737343004121 0.45770540990504766
pol windows-1250 34333226 4195074 155308 1.97745039269233 0.004523548122160149 0.0370215161877955
pol ISO-8859-1 34333226 92311849 20038 -8.148655208506272 5.836328925222465E-4 2.1706855855525113E-4
pol windows-1252 34333226 13338428 8608 -6.03178825697628 2.507192304038077E-4 6.453534104618625E-4
pol windows-1251 34333226 28402934 960 -11.917141303924783 2.796125246139119E-5 3.3799325097893056E-5
pol GB18030 34333226 1946824 692 -7.2323772242414766 2.015540281591948E-5 3.55450723845607E-4
pol US-ASCII 34333226 1092812 489 -6.77276457423771 1.4242762972521137E-5 4.474694640981248E-4
pol UTF-16LE 34333226 17460 378 0.9417911118392599 1.100974315667278E-5 0.021649484536082474
pol Shift_JIS 34333226 7879945 348 -11.39816586933141 1.0135954017254307E-5 4.416274479073141E-5
pol ISO-8859-15 34333226 4433324 302 -10.534080794598252 8.796144003479312E-6 6.81204441633411E-5
por UTF-8 39178304 2279847342 35354799 -0.2055228633737781 0.9024075927329575 0.015507529100165514
por ISO-8859-1 39178304 92311849 3380040 1.596356542278742 0.08627325981236962 0.03661545117572068
por windows-1252 39178304 13338428 368742 1.1499194125032999 0.009411892868052685 0.02764508681232901
por ISO-8859-15 39178304 4433324 43491 -0.8762534990749075 0.0011100786802818213 0.009810020652675059
por EUC-JP 39178304 6657154 11950 -4.2535607349048625 3.050157556590505E-4 0.0017950613730732382
por GB18030 39178304 1946824 5593 -3.3184746137876155 1.4275758338084263E-4 0.002872884246341734
por ISO-8859-2 39178304 5465900 5162 -5.536921659200872 1.317565967123028E-4 9.444007391280484E-4
por ISO-8859-9 39178304 1249981 2880 -3.7590427565661853 7.351007333038204E-5 0.002304035021332324
por Shift_JIS 39178304 7879945 1295 -9.030516728141354 3.305400866765442E-5 1.6434124857470453E-4
por US-ASCII 39178304 1092812 1009 -5.5852164724290265 2.575404999665121E-5 9.233061130368261E-4
por UTF-16LE 39178304 17460 732 1.9614856625051915 1.8683810304805436E-5 0.04192439862542955
por EUC-KR 39178304 3740983 730 -8.690236918111392 1.8632761642770446E-5 1.9513587738837626E-4
por windows-1251 39178304 28402934 671 -12.893651855095747 1.7126826112738315E-5 2.3624319938214833E-5
por windows-1250 39178304 4195074 587 -9.35493422393145 1.498278230726884E-5 1.3992601799157775E-4
por ISO-8859-5 39178304 60446 215 -2.893885279488234 5.4877311687611595E-6 0.0035568937564106804
por Big5 39178304 1662412 81 -11.466544153420797 2.067470812416995E-6 4.87243836064706E-5
por windows-1254 39178304 903946 56 -10.986837241019316 1.4293625369796509E-6 6.19506032439991E-5
por windows-1255 39178304 864449 55 -10.933553296129885 1.403838205962157E-6 6.362434336785628E-5
por IBM866 39178304 18855 52 -3.401150063926511 1.3272652129096757E-6 0.0027578891540705384
pus UTF-8 30756 2279847342 30590 -5.8037690130850335 0.9946026791520354 1.3417565043265077E-5
pus windows-1256 30756 3109658 109 2.1376280288891887 0.0035440239302900246 3.5052086113649796E-5
pus windows-1252 30756 13338428 52 -2.2430610354303546 0.0016907270126154247 3.898510379184114E-6
que UTF-8 11147 2279847342 11071 -5.339320160032233 0.9931820220687181 4.85602689094435E-6
roh UTF-8 25331 2279847342 24001 -1.3508178223969596 0.9474951640282657 1.0527459254769699E-5
roh ISO-8859-1 25331 92311849 902 -0.023424883272791134 0.035608542892108484 9.771226660187469E-6
roh windows-1251 25331 28402934 347 0.4171647706901186 0.0136986301369863 1.2217047717675927E-5
roh windows-1252 25331 13338428 59 -1.6036459745819969 0.0023291618964904662 4.423309853305052E-6
ron UTF-8 7677422 2279847342 7140486 -0.8126734575986464 0.9300629820791406 0.0031320018092685087
ron ISO-8859-1 7677422 92311849 247606 -0.21392673299306306 0.03225119056891754 0.002682277548140109
ron ISO-8859-2 7677422 5465900 210301 4.980710730530914 0.027392137621196282 0.0384750910188624
ron windows-1250 7677422 4195074 39347 2.2614752506337994 0.005125027645998878 0.00937933395215436
ron windows-1252 7677422 13338428 26280 -0.8337698023671134 0.003423024030722813 0.001970247168556894
ron EUC-KR 7677422 3740983 4106 -2.003864843586609 5.348149417864486E-4 0.0010975724829543464
ron windows-1251 7677422 28402934 2755 -6.834540981189257 3.588444141796556E-4 9.699702150489101E-5
ron Shift_JIS 7677422 7879945 2360 -4.596104521966149 3.0739485207404257E-4 2.9949447616702907E-4
ron ISO-8859-3 7677422 5592 1107 7.944945277059067 1.4418902595168015E-4 0.19796137339055794
ron GB18030 7677422 1946824 1025 -3.4725349872176317 1.3350835736266679E-4 5.264985432684207E-4
ron US-ASCII 7677422 1092812 509 -3.7180542288539344 6.629829648546088E-5 4.6577087367269025E-4
ron EUC-JP 7677422 6657154 497 -7.374521582997896 6.473527181389795E-5 7.465652739894556E-5
ron ISO-8859-9 7677422 1249981 354 -4.712576803606739 4.6109227811106386E-5 2.832043047054315E-4
ron ISO-8859-15 7677422 4433324 241 -8.010651004520732 3.139074548722214E-5 5.4361016699884785E-5
ron Big5 7677422 1662412 178 -6.657166611711962 2.3184865961516768E-5 1.0707333681421933E-4
ron ISO-8859-5 7677422 60446 161 -0.23573938955994234 2.0970581010135956E-5 0.0026635343943354397
ron UTF-16LE 7677422 17460 53 0.02495992085425638 6.9033589660696E-6 0.0030355097365406644
run UTF-8 1726 2279847342 1643 -1.5172673509428556 0.9519119351100811 7.206622872208082E-7
run ISO-8859-1 1726 92311849 68 0.17093274494501326 0.039397450753186555 7.366334954465055E-7
rus UTF-8 109380509 2279847342 92828481 0.5160624288396936 0.8486747945193782 0.040716972268224794
rus windows-1251 109380509 28402934 16370741 3.2846917678016205 0.14966780781757014 0.5763749970337572
rus KOI8-R 109380509 231624 122376 3.6129955698274627 0.0011188099334955555 0.52833903222464
rus x-MacCyrillic 109380509 111324 49512 3.5960492811265508 4.526583433617044E-4 0.4447558477956236
rus ISO-8859-1 109380509 92311849 4147 -13.557180979624471 3.791351894330643E-5 4.4923810376715565E-5
rus US-ASCII 109380509 1092812 1202 -7.232466152023374 1.0989160783663934E-5 0.0010999147154313826
rus windows-1252 109380509 13338428 1169 -12.28074529310282 1.06874616939294E-5 8.764151217819672E-5
rus ISO-8859-5 109380509 60446 452 -3.4126358429829238 4.132363289697253E-6 0.0074777487344075706
rus UTF-16LE 109380509 17460 418 -1.1188447426833794 3.821521803304097E-6 0.023940435280641468
rus ISO-8859-9 109380509 1249981 310 -10.209716242419379 2.8341429641728948E-6 2.480037696572988E-4
rus IBM866 109380509 18855 287 -2.0067846010308914 2.623867841024583E-6 0.015221426677273933
rus windows-1257 109380509 382749 286 -8.005514546670495 2.614725444365961E-6 7.472259888334131E-4
rus KOI8-U 109380509 3475 193 0.49838276667864073 1.7644825551140925E-6 0.05553956834532374
rus ISO-8859-15 109380509 4433324 191 -13.70756702627643 1.746197761796848E-6 4.3082797467543544E-5
rus ISO-8859-2 109380509 5465900 172 -14.335087859752246 1.5724922252830255E-6 3.146782780511901E-5
rus GB2312 109380509 28523770 155 -17.830279661085036 1.4170714820864474E-6 5.434064290940503E-6
rus IBM855 109380509 5749 91 -1.929765949728357 8.31958095934624E-7 0.015828839798225778
rus windows-1250 109380509 4195074 83 -15.264084900553799 7.58818922665646E-7 1.9785109869337228E-5
rus Shift_JIS 109380509 7879945 69 -16.89159528651183 6.308253694449346E-7 8.756406294713985E-6
rus windows-1254 109380509 903946 63 -12.748260657227304 5.759709894932012E-7 6.969442864949898E-5
san UTF-8 31964 2279847342 31150 -2.7416455861209443 0.9745338505819047 1.3663195524606313E-5
san windows-1252 31964 13338428 473 2.069097459738423 0.014797897634839194 3.5461450179886267E-5
san ISO-8859-1 31964 92311849 282 -2.759191684111858 0.00882242522838193 3.054862436998743E-6
sco UTF-8 12107 2279847342 11226 -0.7413226484651096 0.9272321797307342 4.924013899172728E-6
sco ISO-8859-1 12107 92311849 512 0.30658013618450586 0.042289584537870656 5.546416906891335E-6
sco windows-1252 12107 13338428 220 2.472977125247224 0.018171305856116298 1.6493697758086634E-5
sin UTF-8 32612 2279847342 32047 -3.4954143968963667 0.9826750889243223 1.4056642920611831E-5
sin ISO-8859-1 32612 92311849 437 -1.9325402640196774 0.01339997546915246 4.733953492795925E-6
sin windows-1252 32612 13338428 127 -0.57879392113854 0.003894272047099227 9.521361887622739E-6
slk UTF-8 9879351 2279847342 8797083 -0.004073394742848814 0.8904515084037403 0.0038586280923014536
slk windows-1250 9879351 4195074 1018861 7.522673990551446 0.1031303574495936 0.24287080513955178
slk ISO-8859-2 9879351 5465900 50664 1.736443341479103 0.005128272089937892 0.00926910481347994
slk windows-1252 9879351 13338428 7521 -3.8304613752030865 7.612848252886248E-4 5.638595492662254E-4
slk ISO-8859-1 9879351 92311849 4247 -8.778056408633121 4.298865380934436E-4 4.6007094928842776E-5
slk US-ASCII 9879351 1092812 519 -4.1817345637829435 5.253381522733629E-5 4.74921578459973E-4
slk windows-1251 9879351 28402934 214 -12.446702953403687 2.166134192418105E-5 7.534432886405327E-6
slk UTF-16LE 9879351 17460 199 2.1515132028513766 2.014302356500948E-5 0.011397479954180985
slv UTF-8 3548543 2279847342 3342146 -1.1569460410750694 0.941836128236293 0.0014659516619512326
slv windows-1250 3548543 4195074 154094 6.3976033664567815 0.043424582990821865 0.036732129159104225
slv ISO-8859-2 3548543 5465900 47161 3.6208346239529265 0.01329024334776273 0.008628222250681498
slv ISO-8859-1 3548543 92311849 3239 -7.2778498161507095 9.127689871589551E-4 3.508758664340046E-5
slv windows-1252 3548543 13338428 1259 -5.3605877501491275 3.5479350257274606E-4 9.438893398832306E-5
slv ISO-8859-15 3548543 4433324 319 -5.909747194149315 8.989605029444479E-5 7.195503870233712E-5
slv EUC-KR 3548543 3740983 170 -6.829333948687029 4.790698605033108E-5 4.544260158359447E-5
slv EUC-JP 3548543 6657154 83 -9.413543224215516 2.338988142457341E-5 1.246779028996475E-5
smo UTF-8 4400 2279847342 4114 -0.9504122686260135 0.935 1.8045067861390168E-6
smo ISO-8859-1 4400 92311849 254 0.8964561211528823 0.057727272727272724 2.7515427624031235E-6
sna UTF-8 1830 2279847342 1746 -1.6057451639295475 0.9540983606557377 7.658407507532143E-7
snd UTF-8 7299 2279847342 7285 -7.865965499877708 0.9980819290313742 3.195389386733772E-6
som UTF-8 13696 2279847342 11779 0.41648166459414454 0.8600321261682243 5.1665739994972E-6
som ISO-8859-1 13696 92311849 1826 2.4032711874785915 0.13332359813084113 1.9780775921842925E-5
som windows-1252 13696 13338428 63 -0.24716803255099873 0.004599883177570093 4.723195267088445E-6
sot UTF-8 5646 2279847342 5379 -1.5490553735000068 0.9527098831030818 2.35936849845449E-6
sot ISO-8859-1 5646 92311849 247 0.37133366182948513 0.043747786043216434 2.6757128437542182E-6
spa UTF-8 68768766 2279847342 63414941 -0.6032247801792331 0.9221474324550188 0.0278154330036717
spa ISO-8859-1 68768766 92311849 4453925 1.0687620453616264 0.06476668492204732 0.048248681488332015
spa windows-1252 68768766 13338428 513442 0.6918312435684065 0.007466209296237772 0.03849344165594326
spa ISO-8859-15 68768766 4433324 261951 1.5059368617556916 0.003809156616246393 0.05908681612262041
spa GB18030 68768766 1946824 59467 0.24998000563778144 8.647385064318298E-4 0.03054564768052993
spa ISO-8859-5 68768766 60446 22604 4.384791349743495 3.2869573375796796E-4 0.37395361148793965
spa EUC-JP 68768766 6657154 19019 -4.427922509539769 2.7656450895163656E-4 0.002856926548492043
spa ISO-8859-2 68768766 5465900 8331 -5.682438974594307 1.2114511404785132E-4 0.0015241771711886423
spa windows-1251 68768766 28402934 5135 -9.925863579542135 7.467052702385266E-5 1.8079118164341754E-4
spa US-ASCII 68768766 1092812 3266 -4.341842007768971 4.749249099511252E-5 0.002988620183526535
spa Shift_JIS 68768766 7879945 3026 -8.435236639153901 4.4002534522722135E-5 3.840128325768771E-4
spa windows-1250 68768766 4195074 1296 -8.87294028366596 1.884576495090809E-5 3.0893376374290416E-4
spa Big5 68768766 1662412 836 -7.9007764065194275 1.2156681712159848E-5 5.02883761666783E-4
spa ISO-8859-9 68768766 1249981 452 -8.560406681540359 6.5727513563352295E-6 3.616054964035453E-4
spa UTF-16LE 68768766 17460 280 -1.008790911435844 4.071615884455452E-6 0.016036655211912942
spa ISO-8859-8 68768766 28421 144 -3.290988707738555 2.0939738834342323E-6 0.005066676049400092
spa windows-1256 68768766 3109658 98 -13.438521197667956 1.4250655595594081E-6 3.151471962511633E-5
spa EUC-KR 68768766 3740983 81 -14.188728919052219 1.1778603094317557E-6 2.1652063107477366E-5
sqi UTF-8 704140 2279847342 662654 -1.133727326023192 0.9410827392279945 2.906571803262431E-4
sqi ISO-8859-1 704140 92311849 38319 0.7851849766903307 0.054419575652569094 4.151038075296271E-4
sqi windows-1252 704140 13338428 2899 -0.46785877372439144 0.004117078989973585 2.1734195363951435E-4
sqi ISO-8859-15 704140 4433324 186 -3.7565136307655647 2.641520152242452E-4 4.1954975544309415E-5
srp UTF-8 4663830 2279847342 4533486 -2.5583992916552436 0.9720521545596644 0.0019885041934531423
srp windows-1250 4663830 4195074 71199 4.406276971233993 0.015266208245154733 0.0169720486456258
srp ISO-8859-1 4663830 92311849 26873 -3.6021189981916257 0.005762002474361201 2.911110576931462E-4
srp ISO-8859-2 4663830 5465900 16871 1.0497987040351608 0.0036174131561399108 0.003086591412210249
srp windows-1251 4663830 28402934 6701 -4.064733892202598 0.0014368019417517362 2.359263307093556E-4
srp windows-1252 4663830 13338428 6147 -2.7376843040398406 0.001318015450820463 4.6084890963162976E-4
srp EUC-KR 4663830 3740983 1680 -2.79499244813483 3.60218961668843E-4 4.490798274143454E-4
srp KOI8-R 4663830 231624 327 -0.5082791752645647 7.011404789625693E-5 0.0014117708009532691
srp x-MacCyrillic 4663830 111324 226 0.21692212347022027 4.845802698640388E-5 0.002030110308648629
srp EUC-JP 4663830 6657154 102 -9.547011194256823 2.1870436958465466E-5 1.5321862765980777E-5
srp UTF-16LE 4663830 17460 68 1.5161865439242268 1.4580291305643645E-5 0.0038946162657502864
sun UTF-8 15127 2279847342 14699 -2.536882437272672 0.971706220665036 6.447361509347936E-6
sun ISO-8859-1 15127 92311849 281 -1.289829406790828 0.01857605605870298 3.044029591477471E-6
sun windows-1252 15127 13338428 134 1.0549032098485316 0.008858332782441991 1.0046161361743678E-5
swa UTF-8 74320 2279847342 74076 -6.793721920363944 0.9967168998923573 3.2491649171131216E-5
swa ISO-8859-1 74320 92311849 117 -6.191590205767537 0.0015742734122712594 1.2674429259888403E-6
swa ISO-8859-2 74320 5465900 53 -2.189504388644123 7.131324004305705E-4 9.696481823670394E-6
swe UTF-8 10943593 2279847342 9968847 -0.371083404214622 0.9109299843296439 0.004372594083977049
swe ISO-8859-1 10943593 92311849 882868 1.5062746641221798 0.0806744183560189 0.00956397265967449
swe windows-1252 10943593 13338428 78985 0.6452135590258022 0.007217465050098263 0.005921612351920331
swe ISO-8859-15 10943593 4433324 10845 -1.1104128617621352 9.909908016498786E-4 0.0024462457514948153
swe EUC-KR 10943593 3740983 621 -6.485178390501714 5.674553138078143E-5 1.6599915049065982E-4
swe EUC-JP 10943593 6657154 508 -8.03709286939114 4.641985497815937E-5 7.630888514821799E-5
swe ISO-8859-2 10943593 5465900 360 -8.33240180563785 3.289596022074286E-5 6.586289540606304E-5
swe ISO-8859-4 10943593 49925 353 1.001576344905358 3.225631654978397E-5 0.007070605908863295
syr UTF-8 8324 2279847342 8322 -12.017236903875416 0.9997597308986065 3.6502444030745987E-6
tam UTF-8 227876 2279847342 226825 -6.116475234628651 0.9953878425108392 9.949131058968948E-5
tam ISO-8859-1 227876 92311849 966 -4.215701017364068 0.004239147606593059 1.0464528773548886E-5
tam windows-1252 227876 13338428 65 -5.799242033653089 2.852428513753094E-4 4.873137973980142E-6
tat UTF-8 58977 2279847342 56928 -2.1395441672033897 0.965257642809909 2.4970092931774902E-5
tat windows-1251 58977 28402934 1838 2.025332152134986 0.03116469132034522 6.471162451034108E-5
tat ISO-8859-1 58977 92311849 63 -6.966200456356387 0.0010682130321989928 6.824692678401448E-7
tat KOI8-R 58977 231624 55 4.664381880952659 9.325669328721365E-4 2.3745380444168134E-4
tel UTF-8 52359 2279847342 52348 -12.285612120710732 0.9997899119540098 2.2961186495091213E-5
tgk UTF-8 69169 2279847342 67709 -3.108121778256981 0.9788922783327791 2.9698918323453257E-5
tgk windows-1251 69169 28402934 1456 1.2613580592426652 0.021049892292790123 5.1262309731804466E-5
tgl UTF-8 29393 2279847342 29089 -4.513002919487922 0.9896574014221073 1.2759187628098653E-5
tgl ISO-8859-1 29393 92311849 232 -2.9799500898862656 0.007893035756812846 2.5132201609351364E-6
tgl windows-1252 29393 13338428 53 -2.114534000939599 0.0018031504099615555 3.9734817326299625E-6
tha UTF-8 2023547 2279847342 1685013 0.7078913361091844 0.8327026750552372 7.390902754575749E-4
tha x-windows-874 2023547 452276 216031 11.286369563047144 0.10675857788329107 0.47765302602835435
tha TIS-620 2023547 387297 101846 10.903628968653546 0.05033043462790832 0.2629661474269101
tha GB2312 2023547 28523770 18359 -0.4056331850572143 0.0090726827694143 6.436386214024304E-4
tha ISO-8859-1 2023547 92311849 1125 -8.26983199205094 5.559544700469028E-4 1.2186951211431156E-5
tha x-iso-8859-11 2023547 1419 835 11.451243355629055 4.126417622125901E-4 0.5884425651867512
tha windows-1252 2023547 13338428 180 -8.127942956247924 8.895271520750445E-5 1.34948436202527E-5
tha UTF-16LE 2023547 17460 73 3.3253775961312733 3.6075267834154584E-5 0.00418098510882016
ton UTF-8 2202 2279847342 2081 -1.2649557522369221 0.9450499545867393 9.127804136984185E-7
ton ISO-8859-1 2202 92311849 91 0.26249786751875814 0.041326067211625794 9.857889424357646E-7
tsn UTF-8 2409 2279847342 2330 -2.250984035922117 0.9672063096720631 1.0219982527233616E-6
tso UTF-8 1231 2279847342 1197 -2.5837193938019 0.9723801787164906 5.250351538668943E-7
tuk UTF-8 26143 2279847342 25778 -3.9202432521213937 0.9860383276594117 1.1306897407168589E-5
tuk windows-1252 26143 13338428 219 0.9441334921338199 0.00837700340435298 1.6418726404640785E-5
tuk ISO-8859-1 26143 92311849 91 -4.608480311500189 0.0034808552958727 9.857889424357646E-7
tur UTF-8 16552510 2279847342 15058688 -0.34679487888366023 0.9097525390409068 0.006605129967513413
tur ISO-8859-9 16552510 1249981 810332 7.0407738303426 0.04895523397962001 0.6482754537868975
tur windows-1254 16552510 903946 632742 6.898284269840604 0.03822634754487386 0.6999776535324013
tur windows-1252 16552510 13338428 18987 -3.0078058735679343 0.0011470767877500149 0.0014234810878763224
tur ISO-8859-1 16552510 92311849 10744 -7.949501803188404 6.490858486114795E-4 1.1638809228054787E-4
tur Big5 16552510 1662412 7738 -0.6524851355884831 4.674819710122513E-4 0.004654682473418142
tur EUC-JP 16552510 6657154 6904 -3.644177307530034 4.1709686325518E-4 0.0010370798091797185
tur ISO-8859-15 16552510 4433324 4811 -3.5550585027746395 2.906507834763429E-4 0.0010851902545358741
tur ISO-8859-8 16552510 28421 790 2.8740940089489153 4.772690063319702E-5 0.02779634777101439
tur windows-1251 16552510 28402934 439 -12.03668418146439 2.6521657440472775E-5 1.5456149706224013E-5
tur ISO-8859-3 16552510 5592 138 2.642583167386536 8.337104161242011E-6 0.02467811158798283
tur windows-1250 16552510 4195074 66 -12.020067565644178 3.987310685811396E-6 1.573273796838864E-5
uig UTF-8 23294 2279847342 23278 -9.917343095967777 0.99931312784408 1.021033275832378E-5
ukr UTF-8 4934883 2279847342 4328114 0.19606822744290195 0.8770449066370976 0.0018984227234281198
ukr windows-1251 4934883 28402934 603540 4.523563645681952 0.12230077187240306 0.02124921319748164
ukr KOI8-U 4934883 3475 1942 9.706283915187203 3.935250339268428E-4 0.5588489208633094
ukr x-MacCyrillic 4934883 111324 876 2.801819062869546 1.7751180727081068E-4 0.007868923143257519
ukr ISO-8859-2 4934883 5465900 140 -8.63308207896393 2.8369466915426362E-5 2.561334821346896E-5
ukr ISO-8859-1 4934883 92311849 114 -14.628187346450655 2.3100851631132896E-5 1.2349443894250238E-6
ukr windows-1252 4934883 13338428 63 -12.008106782596276 1.2766260111941864E-5 4.723195267088445E-6
ukr KOI8-R 4934883 231624 51 -4.334815664979691 1.0334591519191032E-5 2.201844368459227E-4
urd UTF-8 174765 2279847342 173811 -5.781161349025142 0.9945412410951849 7.623799927214601E-5
urd windows-1256 174765 3109658 879 2.8344214788820468 0.0050296111921723455 2.826677403109924E-4
uzb UTF-8 312641 2279847342 296751 -1.4122005083178921 0.9491749322705595 1.301626624437383E-4
uzb windows-1251 312641 28402934 15149 2.8712827269381394 0.04845493713236588 5.333603915708145E-4
uzb x-MacCyrillic 312641 111324 500 7.198697970153068 0.0015992784055834008 0.004491394488160684
uzb ISO-8859-1 312641 92311849 118 -9.045344602318725 3.774297037176826E-4 1.2782757715101125E-6
uzb Shift_JIS 312641 7879945 79 -4.993131409452157 2.5268598808217733E-4 1.0025450685252245E-5
vie UTF-8 13774142 2279847342 13755168 -8.517641361324838 0.9986224913319465 0.006033372387088539
vie ISO-8859-1 13774142 92311849 12502 -7.2815847795874635 9.07642741014286E-4 1.3543223470694428E-4
vie windows-1252 13774142 13338428 5159 -5.244900572374438 3.7454238528976977E-4 3.867772124271316E-4
vie ISO-8859-15 13774142 4433324 951 -6.429857558657381 6.904241295029483E-5 2.145117297991304E-4
vie ISO-2022-JP 13774142 16892 126 0.6499484662525041 9.14757521738922E-6 0.007459152261425527
vie US-ASCII 13774142 1092812 99 -8.156081533475236 7.187380527948674E-6 9.05919773940989E-5
vol UTF-8 7773 2279847342 7455 -1.8255192983803565 0.9590891547664995 3.269955782855219E-6
vol ISO-8859-1 7773 92311849 264 -0.11460190265948761 0.03396372057120803 2.8598712176158446E-6
war UTF-8 25363 2279847342 22096 0.27610879659662857 0.8711903166029256 9.691876992349956E-6
war ISO-8859-1 25363 92311849 2298 1.7269467263487548 0.09060442376690454 2.4893879007883374E-5
war windows-1252 25363 13338428 395 2.169746549408094 0.015573867444702913 2.9613684611110094E-5
war ISO-8859-9 25363 1249981 305 6.384940543728748 0.012025391318061743 2.4400370885637463E-4
war US-ASCII 25363 1092812 95 4.337701604725511 0.003745613689232346 8.693169547918581E-5
war windows-1254 25363 903946 60 3.800778564229905 0.0023656507510941135 6.637564633285616E-5
wol UTF-8 2814 2279847342 2680 -1.53588409294895 0.9523809523809523 1.1755173035616347E-6
wol ISO-8859-1 2814 92311849 58 -1.08606048912797 0.02061122956645345 6.283050402337841E-7
xho UTF-8 5123 2279847342 4780 -0.8953880019334893 0.9330470427483896 2.0966316085912737E-6
xho ISO-8859-1 5123 92311849 169 -0.17085268883677987 0.032988483310560215 1.8307508930949915E-6
xho windows-1250 5123 4195074 86 4.623619679122057 0.016787038844427095 2.0500234322445802E-5
yid UTF-8 20208 2279847342 20102 -5.860554813583632 0.9947545526524149 8.817257028431335E-6
yid ISO-8859-8 20208 28421 72 11.531100103959064 0.0035629453681710215 0.002533338024700046
yor UTF-8 7668 2279847342 7601 -4.847044133131075 0.9912623891497131 3.333995158347756E-6
zha UTF-8 1198 2279847342 1145 -1.6759909460846953 0.9557595993322203 5.022266091709222E-7
zho UTF-8 124256712 2279847342 89448173 1.30299911222281 0.7198659256330555 0.03923428176622187
zho GB2312 124256712 28523770 23084888 2.0429486812498396 0.1857838311382326 0.8093210680074899
zho GBK 124256712 21765704 9380718 3.2070209066948516 0.07549465818796171 0.4309861973681164
zho GB18030 124256712 1946824 1320946 3.0869762093269912 0.010630781860701417 0.6785133119378023
zho Big5 124256712 1662412 909032 3.3482184771326073 0.0073157577193898385 0.546815109611817
zho ISO-8859-1 124256712 92311849 55986 -8.59699201864106 4.505672096007176E-4 6.064876893539419E-4
zho UTF-16 124256712 88200 40292 3.363017022747867 3.2426417335105405E-4 0.4568253968253968
zho windows-1252 124256712 13338428 10226 -8.187598284869901 8.229736515159036E-5 7.666570603372451E-4
zho UTF-16LE 124256712 17460 2311 1.822681331268189 1.8598592887280006E-5 0.13235967926689576
zho EUC-JP 124256712 6657154 1930 -10.136400176296403 1.5532360135201388E-5 2.899136778268912E-4
zho UTF-16BE 124256712 1054 500 3.373271982575541 4.023927496166163E-6 0.47438330170777987
zho US-ASCII 124256712 1092812 457 -9.408072008850274 3.677869731495873E-6 4.181872087788201E-4
zho KOI8-R 124256712 231624 330 -6.959116651663489 2.6557921474696675E-6 0.001424722826650088
zho IBM866 124256712 18855 321 -2.0293938128848286 2.5833614525386766E-6 0.017024661893396978
zho EUC-KR 124256712 3740983 246 -13.105291585366682 1.979772328113752E-6 6.5758117585672E-5
zho IBM855 124256712 5749 93 -2.1298543959734415 7.484505142869063E-7 0.016176726387197773
zho Big5-HKSCS 124256712 11241 88 -3.5645599513044144 7.082112393252446E-7 0.007828485010230406
zho windows-1251 124256712 28402934 64 -19.833960687962026 5.150627195092688E-7 2.253288339859537E-6
zul UTF-8 8313 2279847342 8266 -5.711450722681214 0.9943462047395646 3.625681354940475E-6
# iso-639-3 language-code
# probability charset
aar
0.69448276 UTF-8
0.27482759 ISO-8859-1
0.01862069 windows-1252
abk
0.96103896 UTF-8
0.02877515 x-MacCyrillic
afr
0.93385646 UTF-8
0.05655853 ISO-8859-1
0.00928687 windows-1252
ara
0.86109903 UTF-8
0.13777710 windows-1256
0.00043999 windows-1252
0.00041689 ISO-8859-1
0.00011413 ISO-8859-15
0.00009587 US-ASCII
0.00002111 UTF-16LE
0.00001217 ISO-8859-9
0.00001080 x-MacCyrillic
aze
0.99433290 UTF-8
0.00289860 windows-1251
0.00083189 EUC-JP
0.00076171 ISO-8859-9
0.00048021 windows-1254
0.00044709 ISO-8859-1
0.00008753 Big5
0.00008674 windows-1252
0.00006781 ISO-8859-15
bak
0.94060933 UTF-8
0.05932639 windows-1251
bel
0.92427353 UTF-8
0.07518023 windows-1251
0.00050797 x-MacCyrillic
ben
0.99897412 UTF-8
0.00100834 ISO-8859-1
bis
0.95624333 UTF-8
0.03166133 ISO-8859-1
bos
0.95706961 UTF-8
0.02385483 ISO-8859-1
0.01071743 windows-1250
0.00290683 ISO-8859-2
0.00178624 windows-1256
0.00149731 ISO-8859-9
0.00107270 EUC-KR
0.00085241 windows-1252
0.00011812 EUC-JP
bre
0.98353252 UTF-8
0.01352523 ISO-8859-1
0.00248609 windows-1252
bul
0.90494091 UTF-8
0.09487207 windows-1251
0.00007468 ISO-8859-1
0.00002585 windows-1252
0.00002330 KOI8-R
cat
0.89940782 UTF-8
0.08172897 ISO-8859-1
0.01127385 windows-1252
0.00620687 ISO-8859-15
0.00115417 US-ASCII
0.00009779 GB18030
0.00006659 windows-1250
0.00003318 EUC-JP
ceb
0.98363582 UTF-8
0.01190122 ISO-8859-1
ces
0.91582350 UTF-8
0.07319638 windows-1250
0.00980602 ISO-8859-2
0.00063327 ISO-8859-1
0.00050870 windows-1252
0.00001110 ISO-8859-15
0.00001096 windows-1251
cos
0.88645843 UTF-8
0.09739107 ISO-8859-1
0.00825273 windows-1252
0.00665543 ISO-8859-15
crs
0.88316832 UTF-8
0.10594059 ISO-8859-1
cym
0.98606544 UTF-8
0.01266720 ISO-8859-1
0.00061744 windows-1252
0.00040946 US-ASCII
dan
0.89835074 UTF-8
0.07577211 ISO-8859-1
0.02330725 windows-1252
0.00079632 ISO-8859-15
0.00069018 EUC-JP
0.00042644 windows-1251
0.00016022 windows-1250
0.00014539 GBK
0.00007416 EUC-KR
0.00006767 ISO-8859-2
0.00005670 Shift_JIS
0.00003708 ISO-8859-4
0.00003229 x-MacCyrillic
0.00001700 US-ASCII
0.00001251 GB2312
0.00001251 IBM866
deu
0.89826558 UTF-8
0.08161362 ISO-8859-1
0.00985306 windows-1252
0.00951450 ISO-8859-15
0.00036018 ISO-8859-2
0.00016673 windows-1250
0.00005769 US-ASCII
0.00004774 ISO-8859-9
0.00002875 windows-1251
0.00001794 windows-1254
0.00001765 EUC-JP
0.00001291 ISO-8859-5
div
0.98740475 UTF-8
0.01240632 windows-1252
ell
0.95547714 UTF-8
0.03036101 ISO-8859-7
0.01093652 windows-1253
0.00265596 ISO-8859-1
0.00019801 GBK
0.00015916 GB2312
0.00012073 windows-1252
0.00003572 ISO-8859-15
0.00002674 Shift_JIS
eng
0.94490462 UTF-8
0.04504365 ISO-8859-1
0.00661714 windows-1252
0.00100890 US-ASCII
0.00087213 windows-1251
0.00039447 ISO-8859-15
0.00016789 ISO-8859-2
0.00015992 GB2312
0.00013695 windows-1250
0.00010181 EUC-JP
0.00009041 windows-1256
0.00005916 GBK
0.00004678 GB18030
0.00004425 windows-1255
0.00004317 EUC-KR
0.00004300 Shift_JIS
0.00004251 ISO-8859-9
0.00003439 Big5
0.00002170 ISO-8859-7
0.00002138 KOI8-R
0.00002138 x-MacCyrillic
0.00001816 windows-1254
0.00001783 ISO-8859-8
0.00001175 x-windows-874
0.00001132 ISO-8859-5
0.00001126 ISO-8859-6
epo
0.99061282 UTF-8
0.00379239 ISO-8859-1
0.00207970 windows-1251
0.00089712 windows-1252
0.00088081 ISO-8859-2
0.00057905 windows-1250
0.00050565 GB2312
est
0.92921353 UTF-8
0.04537589 ISO-8859-1
0.01028407 ISO-8859-4
0.00668891 windows-1252
0.00479324 windows-1257
0.00280646 ISO-8859-15
0.00046648 EUC-JP
0.00024083 ISO-8859-13
0.00008291 windows-1251
0.00001579 EUC-KR
eus
0.82151127 UTF-8
0.14866733 ISO-8859-1
0.02029891 windows-1252
0.00941929 ISO-8859-15
0.00007492 windows-1250
fao
0.92507933 UTF-8
0.06371627 ISO-8859-1
0.01095167 windows-1252
fas
0.99899702 UTF-8
0.00059170 windows-1256
0.00022702 windows-1252
0.00014482 ISO-8859-1
0.00001614 windows-1257
0.00001507 UTF-16LE
fin
0.91620497 UTF-8
0.07434519 ISO-8859-1
0.00601809 windows-1252
0.00319388 ISO-8859-15
0.00012706 EUC-KR
0.00004044 windows-1251
0.00002374 ISO-8859-4
0.00001612 ISO-8859-2
0.00001238 US-ASCII
fra
0.90261332 UTF-8
0.07443651 ISO-8859-1
0.01157843 windows-1252
0.01104540 ISO-8859-15
0.00010554 windows-1256
0.00004696 US-ASCII
0.00004641 ISO-8859-2
0.00002370 ISO-8859-9
0.00002168 windows-1250
0.00001865 GB18030
0.00001782 windows-1251
fry
0.93388144 UTF-8
0.05253083 ISO-8859-1
0.01248941 windows-1252
gla
0.97420027 UTF-8
0.01342107 windows-1251
0.00840446 ISO-8859-1
gle
0.98079224 UTF-8
0.01109041 ISO-8859-1
0.00428073 GB2312
0.00325681 windows-1252
glg
0.85057090 UTF-8
0.13413083 ISO-8859-1
0.00712114 windows-1252
0.00492526 ISO-8859-15
0.00310801 windows-1251
0.00005994 GB18030
glv
0.96465651 UTF-8
0.02429865 ISO-8859-1
grn
0.89563793 UTF-8
0.09701661 ISO-8859-1
0.00491581 windows-1252
hat
0.98717144 UTF-8
0.00825334 windows-1252
hau
0.96253559 UTF-8
0.03456716 ISO-8859-1
haw
0.99096622 UTF-8
0.00667714 ISO-8859-1
heb
0.89514270 UTF-8
0.10237244 windows-1255
0.00190921 windows-1251
0.00035330 ISO-8859-8
0.00009214 ISO-8859-1
0.00008442 windows-1252
0.00004175 UTF-16LE
hin
0.99790184 UTF-8
0.00158997 ISO-8859-1
0.00047083 windows-1252
hrv
0.94450885 UTF-8
0.03261750 windows-1250
0.01651335 ISO-8859-2
0.00318255 ISO-8859-1
0.00219704 windows-1252
0.00085544 EUC-KR
0.00002789 US-ASCII
0.00002116 Big5
0.00001755 ISO-8859-13
0.00001563 windows-1255
0.00001515 Shift_JIS
hun
0.88992918 UTF-8
0.09128887 ISO-8859-2
0.01245447 windows-1250
0.00393453 ISO-8859-1
0.00222040 windows-1252
0.00007181 US-ASCII
0.00006121 GB18030
0.00001550 windows-1251
0.00001128 ISO-8859-15
hye
0.99835899 UTF-8
0.00129272 windows-1252
0.00019424 ISO-8859-1
0.00011387 windows-1251
iku
0.88414055 UTF-8
0.11585945 ISO-8859-1
ile
0.73556276 UTF-8
0.23084856 ISO-8859-1
0.02150854 windows-1252
0.00780790 ISO-8859-9
ina
0.82541845 UTF-8
0.09861165 windows-1251
0.06409757 ISO-8859-1
0.00849877 windows-1252
ind
0.97166899 UTF-8
0.02306408 ISO-8859-1
0.00364256 windows-1252
0.00046292 windows-1256
0.00036781 US-ASCII
0.00022909 Shift_JIS
0.00016632 GB18030
0.00016087 EUC-KR
0.00014153 windows-1251
0.00004290 EUC-JP
0.00003024 ISO-8859-2
ipk
0.94752046 UTF-8
0.03129514 ISO-8859-1
isl
0.87596748 UTF-8
0.10243762 ISO-8859-1
0.02155247 windows-1252
ita
0.90870026 UTF-8
0.07250701 ISO-8859-1
0.01576158 windows-1252
0.00249758 ISO-8859-15
0.00021354 ISO-8859-2
0.00008188 EUC-KR
0.00006836 windows-1251
0.00006016 windows-1250
0.00004940 US-ASCII
0.00001269 ISO-8859-4
jav
0.96801028 UTF-8
0.01778710 ISO-8859-1
0.01237657 windows-1252
jpn
0.86626982 UTF-8
0.08570166 Shift_JIS
0.04553717 EUC-JP
0.00142518 GBK
0.00075824 windows-31j
0.00015899 GB18030
0.00006911 ISO-2022-JP
0.00001692 GB2312
0.00001679 ISO-8859-1
0.00001677 UTF-16LE
0.00001307 US-ASCII
kal
0.97153488 UTF-8
0.02716514 ISO-8859-1
kan
0.99728510 UTF-8
0.00217824 ISO-8859-1
kat
0.99956564 UTF-8
0.00025930 windows-1252
0.00015208 windows-1251
kaz
0.99861892 UTF-8
0.00135506 windows-1251
kha
0.94525627 UTF-8
0.04231189 ISO-8859-1
kin
0.98615774 UTF-8
0.01030806 ISO-8859-1
kir
0.98054077 UTF-8
0.01930454 windows-1251
kor
0.76097595 UTF-8
0.23769156 EUC-KR
0.00111609 ISO-8859-1
0.00007067 x-windows-949
0.00006766 x-IBM949
0.00006410 US-ASCII
kur
0.98524309 UTF-8
0.01419525 windows-1252
lat
0.74117173 UTF-8
0.11091312 ISO-8859-1
0.09885802 windows-1251
0.02946372 windows-1252
0.01158250 windows-1250
0.00426887 GB2312
0.00217705 ISO-8859-2
lav
0.95460092 UTF-8
0.04168787 windows-1257
0.00113406 windows-1251
0.00076054 windows-1252
0.00058382 ISO-8859-2
0.00045020 ISO-8859-1
0.00028763 EUC-JP
0.00017607 Big5
0.00014316 ISO-8859-13
0.00011058 windows-1250
0.00003192 GB18030
0.00002764 ISO-8859-4
lin
0.80949464 UTF-8
0.13445636 windows-1255
0.03093415 ISO-8859-1
0.01776417 windows-1252
lit
0.96683878 UTF-8
0.03029128 windows-1257
0.00145707 ISO-8859-13
0.00049547 ISO-8859-1
0.00032220 EUC-JP
0.00029955 ISO-8859-2
0.00013148 windows-1252
0.00011811 windows-1251
0.00002396 EUC-KR
0.00001003 ISO-8859-4
ltz
0.75015502 UTF-8
0.24360153 ISO-8859-1
0.00367765 windows-1252
0.00166777 windows-1255
lug
0.94162896 UTF-8
0.02714932 ISO-8859-1
0.02669683 windows-1251
mal
0.90280411 UTF-8
0.09421069 UTF-16
0.00273054 windows-1252
mfe
0.87774903 UTF-8
0.07826649 ISO-8859-1
mkd
0.98553921 UTF-8
0.01387651 windows-1251
mlg
0.97974166 UTF-8
0.01948780 ISO-8859-1
mlt
0.99173603 UTF-8
0.00448616 windows-1252
0.00306948 ISO-8859-1
mon
0.99950778 UTF-8
0.00039910 windows-1251
mri
0.98569615 UTF-8
0.01218790 ISO-8859-1
msa
0.97917021 UTF-8
0.01507577 ISO-8859-1
0.00433293 windows-1252
0.00107144 GB2312
0.00015050 windows-1251
0.00008536 US-ASCII
mya
0.99624074 UTF-8
0.00236558 ISO-8859-1
nep
0.99903948 UTF-8
0.00054201 ISO-8859-1
0.00038421 Big5
nld
0.94453265 UTF-8
0.04503098 ISO-8859-1
0.00846493 windows-1252
0.00162364 ISO-8859-15
0.00006642 windows-1250
0.00006220 ISO-8859-2
0.00004628 US-ASCII
0.00003755 windows-1255
0.00002955 ISO-8859-3
0.00002346 windows-1253
0.00001350 ISO-8859-8
0.00001318 windows-1251
0.00001255 Shift_JIS
nno
0.93332555 UTF-8
0.04887768 ISO-8859-1
0.01637114 windows-1252
0.00116295 US-ASCII
nor
0.93308913 UTF-8
0.05762836 ISO-8859-1
0.00795275 windows-1252
0.00116049 ISO-8859-15
0.00003953 windows-1251
0.00003464 EUC-KR
0.00002544 ISO-8859-5
0.00001761 US-ASCII
0.00001194 EUC-JP
0.00001076 ISO-8859-4
0.00001057 ISO-8859-9
nya
0.98592702 UTF-8
0.00932744 ISO-8859-1
oci
0.95588103 UTF-8
0.03797326 ISO-8859-1
0.00558496 windows-1252
pol
0.92167975 UTF-8
0.07286737 ISO-8859-2
0.00452355 windows-1250
0.00058363 ISO-8859-1
0.00025072 windows-1252
0.00002796 windows-1251
0.00002016 GB18030
0.00001424 US-ASCII
0.00001101 UTF-16LE
0.00001014 Shift_JIS
por
0.90240759 UTF-8
0.08627326 ISO-8859-1
0.00941189 windows-1252
0.00111008 ISO-8859-15
0.00030502 EUC-JP
0.00014276 GB18030
0.00013176 ISO-8859-2
0.00007351 ISO-8859-9
0.00003305 Shift_JIS
0.00002575 US-ASCII
0.00001868 UTF-16LE
0.00001863 EUC-KR
0.00001713 windows-1251
0.00001498 windows-1250
pus
0.99460268 UTF-8
0.00354402 windows-1256
0.00169073 windows-1252
roh
0.94749516 UTF-8
0.03560854 ISO-8859-1
0.01369863 windows-1251
0.00232916 windows-1252
ron
0.93006298 UTF-8
0.03225119 ISO-8859-1
0.02739214 ISO-8859-2
0.00512503 windows-1250
0.00342302 windows-1252
0.00053481 EUC-KR
0.00035884 windows-1251
0.00030739 Shift_JIS
0.00014419 ISO-8859-3
0.00013351 GB18030
0.00006630 US-ASCII
0.00006474 EUC-JP
0.00004611 ISO-8859-9
0.00003139 ISO-8859-15
0.00002318 Big5
0.00002097 ISO-8859-5
run
0.95191194 UTF-8
0.03939745 ISO-8859-1
rus
0.84867479 UTF-8
0.14966781 windows-1251
0.00111881 KOI8-R
0.00045266 x-MacCyrillic
0.00003791 ISO-8859-1
0.00001099 US-ASCII
0.00001069 windows-1252
san
0.97453385 UTF-8
0.01479790 windows-1252
0.00882243 ISO-8859-1
sco
0.92723218 UTF-8
0.04228958 ISO-8859-1
0.01817131 windows-1252
sin
0.98267509 UTF-8
0.01339998 ISO-8859-1
0.00389427 windows-1252
slk
0.89045151 UTF-8
0.10313036 windows-1250
0.00512827 ISO-8859-2
0.00076128 windows-1252
0.00042989 ISO-8859-1
0.00005253 US-ASCII
0.00002166 windows-1251
0.00002014 UTF-16LE
slv
0.94183613 UTF-8
0.04342458 windows-1250
0.01329024 ISO-8859-2
0.00091277 ISO-8859-1
0.00035479 windows-1252
0.00008990 ISO-8859-15
0.00004791 EUC-KR
0.00002339 EUC-JP
smo
0.93500000 UTF-8
0.05772727 ISO-8859-1
som
0.86003213 UTF-8
0.13332360 ISO-8859-1
0.00459988 windows-1252
sot
0.95270988 UTF-8
0.04374779 ISO-8859-1
spa
0.92214743 UTF-8
0.06476668 ISO-8859-1
0.00746621 windows-1252
0.00380916 ISO-8859-15
0.00086474 GB18030
0.00032870 ISO-8859-5
0.00027656 EUC-JP
0.00012115 ISO-8859-2
0.00007467 windows-1251
0.00004749 US-ASCII
0.00004400 Shift_JIS
0.00001885 windows-1250
0.00001216 Big5
sqi
0.94108274 UTF-8
0.05441958 ISO-8859-1
0.00411708 windows-1252
0.00026415 ISO-8859-15
srp
0.97205215 UTF-8
0.01526621 windows-1250
0.00576200 ISO-8859-1
0.00361741 ISO-8859-2
0.00143680 windows-1251
0.00131802 windows-1252
0.00036022 EUC-KR
0.00007011 KOI8-R
0.00004846 x-MacCyrillic
0.00002187 EUC-JP
0.00001458 UTF-16LE
sun
0.97170622 UTF-8
0.01857606 ISO-8859-1
0.00885833 windows-1252
swa
0.99671690 UTF-8
0.00157427 ISO-8859-1
0.00071313 ISO-8859-2
swe
0.91092998 UTF-8
0.08067442 ISO-8859-1
0.00721747 windows-1252
0.00099099 ISO-8859-15
0.00005675 EUC-KR
0.00004642 EUC-JP
0.00003290 ISO-8859-2
0.00003226 ISO-8859-4
tam
0.99538784 UTF-8
0.00423915 ISO-8859-1
0.00028524 windows-1252
tat
0.96525764 UTF-8
0.03116469 windows-1251
0.00106821 ISO-8859-1
0.00093257 KOI8-R
tgk
0.97889228 UTF-8
0.02104989 windows-1251
tgl
0.98965740 UTF-8
0.00789304 ISO-8859-1
0.00180315 windows-1252
tha
0.83270268 UTF-8
0.10675858 x-windows-874
0.05033043 TIS-620
0.00907268 GB2312
0.00055595 ISO-8859-1
0.00041264 x-iso-8859-11
0.00008895 windows-1252
0.00003608 UTF-16LE
ton
0.94504995 UTF-8
0.04132607 ISO-8859-1
tuk
0.98603833 UTF-8
0.00837700 windows-1252
0.00348086 ISO-8859-1
tur
0.90975254 UTF-8
0.04895523 ISO-8859-9
0.03822635 windows-1254
0.00114708 windows-1252
0.00064909 ISO-8859-1
0.00046748 Big5
0.00041710 EUC-JP
0.00029065 ISO-8859-15
0.00004773 ISO-8859-8
0.00002652 windows-1251
ukr
0.87704491 UTF-8
0.12230077 windows-1251
0.00039353 KOI8-U
0.00017751 x-MacCyrillic
0.00002837 ISO-8859-2
0.00002310 ISO-8859-1
0.00001277 windows-1252
0.00001033 KOI8-R
urd
0.99454124 UTF-8
0.00502961 windows-1256
uzb
0.94917493 UTF-8
0.04845494 windows-1251
0.00159928 x-MacCyrillic
0.00037743 ISO-8859-1
0.00025269 Shift_JIS
vie
0.99862249 UTF-8
0.00090764 ISO-8859-1
0.00037454 windows-1252
0.00006904 ISO-8859-15
vol
0.95908915 UTF-8
0.03396372 ISO-8859-1
war
0.87119032 UTF-8
0.09060442 ISO-8859-1
0.01557387 windows-1252
0.01202539 ISO-8859-9
0.00374561 US-ASCII
0.00236565 windows-1254
wol
0.95238095 UTF-8
0.02061123 ISO-8859-1
xho
0.93304704 UTF-8
0.03298848 ISO-8859-1
0.01678704 windows-1250
yid
0.99475455 UTF-8
0.00356295 ISO-8859-8
zho
0.71986593 UTF-8
0.18578383 GB2312
0.07549466 GBK
0.01063078 GB18030
0.00731576 Big5
0.00045057 ISO-8859-1
0.00032426 UTF-16
0.00008230 windows-1252
0.00001860 UTF-16LE
0.00001553 EUC-JP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment