Skip to content

Instantly share code, notes, and snippets.

@he7d3r
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save he7d3r/78c3f2200692c0d467bb to your computer and use it in GitHub Desktop.
Save he7d3r/78c3f2200692c0d467bb to your computer and use it in GitHub Desktop.
Testing ORES
# Create some folders
mkdir models datasets
# Generate a file with a new model
./new_model revscores.scorers.LinearSVCModel \
revscores.features.added_badwords_ratio \
revscores.features.added_misspellings_ratio \
revscores.features.day_of_week_in_utc \
revscores.features.hour_of_day_in_utc \
revscores.features.is_custom_comment \
revscores.features.is_mainspace \
revscores.features.is_section_comment \
revscores.features.longest_repeated_char_added \
revscores.features.longest_token_added \
revscores.features.numeric_chars_added \
revscores.features.prev_words \
revscores.features.proportion_of_markup_added \
revscores.features.proportion_of_numeric_added \
revscores.features.proportion_of_symbolic_added \
revscores.features.proportion_of_uppercase_added \
revscores.features.seconds_since_last_page_edit \
revscores.features.segments_added \
revscores.features.segments_removed \
revscores.features.user_age_in_seconds \
revscores.features.user_is_anon \
revscores.features.user_is_bot > \
models/reverts.halfak_mix.model
# Generate a file with a list of 5000 recent changes to work with, for ptwiki and enwiki
last="41142714"; (echo "rev_id"; for ((i=0; i<5000; i++)); do echo "$(( last - i ))"; done) >> datasets/ptwiki.rev_pages.tsv
last="643577066"; (echo "rev_id"; for ((i=0; i<5000; i++)); do echo "$(( last - i ))"; done) >> datasets/enwiki.rev_pages.tsv
# Get the feature values for each revision in the given file
# This takes a while...
cat datasets/ptwiki.rev_pages.tsv | tail -n+2 | \
./features_reverted models/reverts.halfak_mix.model \
revscores.scorers.LinearSVCModel \
--api=https://pt.wikipedia.org/w/api.php > \
datasets/ptwiki.features_reverted.tsv
cat datasets/enwiki.rev_pages.tsv | tail -n+2 | \
./features_reverted models/reverts.halfak_mix.model \
revscores.scorers.LinearSVCModel \
--api=https://en.wikipedia.org/w/api.php > \
datasets/enwiki.features_reverted.tsv
# Test the classifier
./train_test models/reverts.halfak_mix.model \
revscores.scorers.LinearSVCModel \
--feature-scores=datasets/ptwiki.features_reverted.tsv > \
models/ptwiki_train_test.log
./train_test models/reverts.halfak_mix.model \
revscores.scorers.LinearSVCModel \
--feature-scores=datasets/enwiki.features_reverted.tsv > \
models/enwiki_train_test.log
# First execution of ./train_test on enwiki...
{'auc': 0.5,
'mean.accuracy': 0.94854434664861209,
'roc': {'fpr': [0.0, 1.0],
'thresholds': [1.0000001000000101, 1.0000000994736041e-07],
'tpr': [0.0, 1.0]}}
# Second execution on enwiki
{'auc': 0.56994491826581384,
'mean.accuracy': 0.95260663507109,
'roc': {'fpr': [0.0,
0.0031982942430703624,
0.0035536602700781805,
0.0035536602700781805,
0.0039090262970859985,
0.0039090262970859985,
0.0042643923240938165,
0.0046197583511016346,
0.0046197583511016346,
0.0046197583511016346,
0.0049751243781094526,
0.0049751243781094526,
0.0049751243781094526,
0.0053304904051172707,
0.0053304904051172707,
0.0053304904051172707,
0.0053304904051172707,
0.0056858564321250887,
0.0060412224591329068,
0.0063965884861407248,
0.0063965884861407248,
0.0063965884861407248,
0.0067519545131485429,
0.0071073205401563609,
0.007462686567164179,
0.007818052594171997,
0.0081734186211798159,
0.0085287846481876331,
0.008884150675195452,
0.0092395167022032692,
0.0095948827292110881,
0.0099502487562189053,
0.010305614783226724,
0.010660980810234541,
0.01101634683724236,
0.011371712864250177,
0.011727078891257996,
0.012082444918265814,
0.012437810945273632,
0.01279317697228145,
0.013148542999289269,
0.013503909026297086,
0.013859275053304905,
0.013859275053304905,
0.014214641080312722,
0.014570007107320541,
0.014925373134328358,
0.015280739161336177,
0.015636105188343994,
0.015991471215351813,
0.016346837242359632,
0.016702203269367447,
0.017057569296375266,
0.017412935323383085,
0.017768301350390904,
0.018123667377398719,
0.018479033404406538,
0.018479033404406538,
0.018834399431414357,
0.019189765458422176,
0.019545131485429992,
0.019900497512437811,
0.020255863539445629,
0.020611229566453448,
0.020966595593461264,
0.020966595593461264,
0.021321961620469083,
0.021677327647476902,
0.022032693674484721,
0.022388059701492536,
0.022743425728500355,
0.023098791755508174,
0.023454157782515993,
0.023454157782515993,
0.023809523809523808,
0.024164889836531627,
0.024520255863539446,
0.024875621890547265,
0.02523098791755508,
0.025586353944562899,
0.025941719971570718,
0.026297085998578537,
0.026652452025586353,
0.027007818052594171,
0.02736318407960199,
0.027718550106609809,
0.028073916133617625,
0.028429282160625444,
0.028784648187633263,
0.029140014214641081,
0.029495380241648897,
0.029850746268656716,
0.030206112295664535,
0.030561478322672354,
0.030916844349680169,
0.031272210376687988,
0.031627576403695803,
0.031982942430703626,
0.032338308457711441,
0.032693674484719264,
0.033049040511727079,
0.033404406538734895,
0.033759772565742717,
0.034115138592750532,
0.034470504619758348,
0.03482587064676617,
0.035181236673773986,
0.035536602700781808,
1.0],
'thresholds': [1.9999999996412612,
0.99999999964126118,
0.95877573307109398,
0.80146641029619459,
0.38633240992310258,
0.35968047122307595,
0.19592388841212452,
0.15161189091716035,
0.13017317931614111,
0.090944345147339151,
0.074623126852286614,
0.053706382927128322,
0.052363624059411508,
0.051262036779861589,
0.0507526524000581,
0.046728283261885419,
0.04620383878501267,
0.046089621294653438,
0.045744817564613321,
0.044919955468827863,
0.044687784103430626,
0.044638412706598761,
0.04433241400741085,
0.043820662224733083,
0.04270472067466758,
0.042264504032253597,
0.042200667600442408,
0.041879945693032848,
0.040832271106939884,
0.040761135096734895,
0.040442846005736811,
0.040012088590854498,
0.039989783934549779,
0.039000453328081365,
0.038818683150793606,
0.038099478059498801,
0.037604269516953483,
0.037487634205222914,
0.037318620151474247,
0.036469155467300819,
0.034355769360479634,
0.032608411641521388,
0.032589406430512127,
0.032112367798813976,
0.032070191310988601,
0.032047301801910809,
0.030904654359081383,
0.030795037097947189,
0.030081841748079957,
0.030013902251674659,
0.029832114691969478,
0.029746785979474295,
0.02932650638248337,
0.028963009222557665,
0.028787195830680894,
0.028685989271936212,
0.028117054154748584,
0.027944768600073531,
0.026500325010695245,
0.02595560806385093,
0.025619498785212171,
0.023887834852721088,
0.02190833246787767,
0.021819118633155606,
0.021435941889516998,
0.021108677219662121,
0.020740013072796354,
0.019297408491013895,
0.018630319995517029,
0.018267775103999524,
0.017082371712057318,
0.016556685345489749,
0.016297258759134466,
0.013931202308959887,
0.012139593678207277,
0.011331195053919628,
0.0096366438344114135,
0.004426783814641371,
0.0026377156196401709,
0.0018630918948446607,
0.0017584038164978437,
0.0013928769406068707,
0.0012819332292758479,
0.0010321963142093406,
0.00093506517588354916,
0.00091923759201263298,
0.00050342919248945223,
0.00038435918154957688,
0.00035637717394329776,
0.00033909573292594826,
9.8699500103535892e-05,
6.8122878154119201e-05,
6.7246379551524473e-05,
6.0038324451441716e-05,
5.6843079241338376e-05,
4.6719581359860166e-05,
4.5914158979221126e-05,
4.4707788441602334e-05,
4.3291849246832494e-05,
4.0313733719052825e-05,
3.8671451139065391e-05,
3.5617987055934371e-05,
1.6446501359631546e-05,
2.9432340119122789e-06,
2.689630022147459e-06,
2.6386427335379531e-06,
2.3241862436613738e-06,
1.7731260062565111e-06,
1.0000000994736041e-07],
'tpr': [0.0,
0.064285714285714279,
0.064285714285714279,
0.071428571428571425,
0.071428571428571425,
0.07857142857142857,
0.07857142857142857,
0.07857142857142857,
0.085714285714285715,
0.09285714285714286,
0.09285714285714286,
0.10000000000000001,
0.10714285714285714,
0.10714285714285714,
0.11428571428571428,
0.12142857142857143,
0.12857142857142856,
0.12857142857142856,
0.12857142857142856,
0.12857142857142856,
0.1357142857142857,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.14999999999999999,
0.15714285714285714,
0.15714285714285714,
0.15714285714285714,
0.15714285714285714,
0.15714285714285714,
0.15714285714285714,
0.15714285714285714,
0.15714285714285714,
0.16428571428571428,
0.16428571428571428,
0.16428571428571428,
0.16428571428571428,
0.16428571428571428,
0.16428571428571428,
0.16428571428571428,
0.16428571428571428,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
0.17142857142857143,
1.0]}}
# Third execution on enwiki
{'auc': 0.56459468293745918,
'mean.accuracy': 0.94651320243737302,
'roc': {'fpr': [0.0,
0.011685552407932011,
0.012039660056657223,
0.012039660056657223,
0.012393767705382436,
0.012747875354107648,
0.013101983002832862,
0.013101983002832862,
0.013101983002832862,
0.013456090651558074,
0.013810198300283285,
0.013810198300283285,
0.014164305949008499,
0.014518413597733711,
0.014872521246458924,
0.015226628895184136,
0.015226628895184136,
0.015580736543909348,
0.015934844192634561,
0.016288951841359773,
0.016643059490084985,
0.0169971671388102,
0.017351274787535412,
0.017351274787535412,
0.017705382436260624,
0.018059490084985835,
0.018059490084985835,
0.018059490084985835,
0.018413597733711047,
0.018767705382436262,
0.019121813031161474,
0.019475920679886686,
0.019830028328611898,
0.020184135977337109,
0.020538243626062325,
0.020538243626062325,
0.020538243626062325,
0.020892351274787536,
0.021246458923512748,
0.02160056657223796,
0.021954674220963172,
0.022308781869688384,
0.022662889518413599,
0.023016997167138811,
0.023371104815864022,
0.023371104815864022,
0.023725212464589234,
0.024079320113314446,
0.024433427762039661,
0.024787535410764873,
0.025141643059490085,
0.025495750708215296,
0.025849858356940508,
0.026203966005665724,
0.026558073654390935,
0.026912181303116147,
0.026912181303116147,
0.027266288951841359,
0.027620396600566571,
0.027974504249291786,
0.027974504249291786,
0.028328611898016998,
0.028682719546742209,
0.029036827195467421,
0.029390934844192633,
0.029745042492917848,
0.029745042492917848,
0.03009915014164306,
0.030453257790368272,
0.030807365439093484,
0.031161473087818695,
0.031515580736543911,
0.031869688385269122,
0.032223796033994334,
0.032577903682719546,
0.032932011331444758,
0.033286118980169969,
0.033640226628895181,
0.0339943342776204,
0.034348441926345612,
0.034702549575070823,
0.035056657223796035,
0.035410764872521247,
0.035764872521246459,
0.036118980169971671,
0.036473087818696882,
0.036473087818696882,
0.036827195467422094,
0.037181303116147306,
0.037535410764872525,
0.037889518413597736,
0.038243626062322948,
0.03859773371104816,
0.038951841359773372,
0.039305949008498584,
0.039660056657223795,
0.040014164305949007,
0.040368271954674219,
0.040368271954674219,
0.040722379603399431,
0.041076487252124649,
0.041430594900849861,
0.041784702549575073,
0.042138810198300285,
0.042492917847025496,
0.042847025495750708,
0.04320113314447592,
0.043555240793201132,
0.043909348441926344,
0.044263456090651555,
0.044617563739376767,
0.044971671388101986,
0.045325779036827198,
0.045679886685552409,
0.046033994334277621,
0.046388101983002833,
0.046742209631728045,
0.047096317280453256,
0.047450424929178468,
0.04780453257790368,
0.048158640226628892,
0.048512747875354111,
0.048866855524079322,
0.049220963172804534,
0.049575070821529746,
0.049929178470254958,
0.050283286118980169,
0.050637393767705381,
0.050991501416430593,
0.051345609065155805,
0.051699716713881017,
0.052053824362606235,
0.052407932011331447,
0.052762039660056659,
0.053116147308781871,
0.053470254957507082,
0.053824362606232294,
0.054178470254957506,
0.054532577903682718,
0.054886685552407929,
0.055240793201133141,
0.05559490084985836,
0.055949008498583572,
0.056303116147308784,
0.056657223796033995,
0.057011331444759207,
0.057365439093484419,
0.057719546742209631,
0.058073654390934842,
0.058427762039660054,
0.058781869688385266,
0.059135977337110485,
0.059490084985835696,
0.06019830028328612,
0.060552407932011332,
1.0],
'thresholds': [1.9999999995130122,
0.99999999951301222,
0.99999922610566894,
0.99999802261649329,
0.99999794485592242,
0.99999578320143323,
0.99998858062509044,
0.97318936860166105,
0.86722198397116157,
0.85645586843378041,
0.80554964887076108,
0.79209706127514756,
0.75995887660898109,
0.1772773476173338,
0.076928618173239025,
0.069661404049511724,
0.06856209099112659,
0.061112854294461241,
0.060766065115236398,
0.057367794935563481,
0.056634619509886582,
0.056124153175433414,
0.055698981436175139,
0.05405461629160907,
0.053653082509851555,
0.053509109929661562,
0.053206096299036186,
0.053166783208087487,
0.053088335383044472,
0.052906807769021735,
0.052832448154199775,
0.052762142178590946,
0.052732848983516135,
0.052584604062002452,
0.052508511143041933,
0.052463068591988238,
0.052347674264228282,
0.052254349204426488,
0.051916969872161101,
0.05172518598775494,
0.051652636142397373,
0.051626702871608629,
0.05157603736065005,
0.051522402452400508,
0.05151749493479503,
0.051494415661138,
0.05137869571442702,
0.051295023702707672,
0.051227806494315783,
0.051214723300319412,
0.051196090688569666,
0.051091176391975718,
0.051025093487649206,
0.050464886411225346,
0.050292361673652135,
0.050263690583474359,
0.050013949888141153,
0.049706245312802648,
0.048854013637168257,
0.048751495131606433,
0.048732981551418343,
0.048636337562959868,
0.048592995419575069,
0.048004152492242026,
0.04788241534295682,
0.047618046244232395,
0.045745624169478097,
0.044166323981549946,
0.043953166119563056,
0.043848355790076038,
0.043814021544778835,
0.043771805824554824,
0.043565062926594407,
0.043433562107963931,
0.041714719511327353,
0.040654232296894265,
0.039814256049251137,
0.039541429499354061,
0.038783616201353899,
0.037674199146467337,
0.037534012984121172,
0.037162785659857367,
0.035282442974400315,
0.034665619938390277,
0.03431233208686995,
0.032812617904501629,
0.031195195526032816,
0.030009447131042616,
0.029745238504465022,
0.028964395156306968,
0.028825522920995637,
0.02251584672769975,
0.022072471175043511,
0.021998843385786216,
0.021751224221392609,
0.017501266339660104,
0.016931875995438533,
0.016619900461292907,
0.01629704327527523,
0.013168315180256251,
0.010957122316054045,
0.010369946821198879,
0.0073444561350290653,
0.0070827244455757147,
0.0067316397444387339,
0.0063981732623333096,
0.0063777842306349097,
0.0063682596936972206,
0.0063563508091673687,
0.0063494943478174743,
0.0063345735973986119,
0.0045036584911865444,
0.0044872496781646872,
0.0044446009455101163,
0.0043107420771679276,
0.0043045689123021105,
0.004271728424515943,
0.0042644209294515848,
0.0042633273353353349,
0.0042628646892724827,
0.0042626918175961981,
0.0042382629064750621,
0.0042265225046720194,
0.004180531625639263,
0.0039760033953746216,
0.0038189196301124557,
0.0037537614995409095,
0.0033138102077631942,
0.0019712273859103144,
0.0019679170720275823,
0.0019151065407189136,
0.0019121711281630356,
0.0018969282363798089,
0.00051387091665156957,
0.00021265667338043855,
0.00011091707576406661,
0.00011064428535847308,
0.00011061477746584589,
0.00011050464883070062,
0.00011045076077691971,
0.00011002806281139091,
0.00010983130765829656,
5.1707370067261508e-05,
2.0227093208031559e-05,
1.4949694013564998e-05,
1.1393530985510732e-05,
8.1794560994997387e-06,
7.3161754809980728e-06,
4.5399712664130534e-06,
4.2297812570433086e-06,
1.3540015113494813e-06,
7.8925649036224459e-07,
6.1200538588484267e-07,
5.5254298757774442e-07,
1.6054774223012589e-07,
1.0000000994736041e-07],
'tpr': [0.0,
0.061538461538461542,
0.061538461538461542,
0.069230769230769235,
0.069230769230769235,
0.069230769230769235,
0.069230769230769235,
0.076923076923076927,
0.08461538461538462,
0.08461538461538462,
0.08461538461538462,
0.092307692307692313,
0.092307692307692313,
0.092307692307692313,
0.092307692307692313,
0.092307692307692313,
0.10000000000000001,
0.10000000000000001,
0.10000000000000001,
0.10000000000000001,
0.10000000000000001,
0.10000000000000001,
0.10000000000000001,
0.1076923076923077,
0.1076923076923077,
0.1076923076923077,
0.11538461538461539,
0.12307692307692308,
0.12307692307692308,
0.12307692307692308,
0.12307692307692308,
0.12307692307692308,
0.12307692307692308,
0.12307692307692308,
0.12307692307692308,
0.13076923076923078,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.13846153846153847,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.14615384615384616,
0.15384615384615385,
0.15384615384615385,
0.15384615384615385,
0.15384615384615385,
0.16153846153846155,
0.16153846153846155,
0.16153846153846155,
0.16153846153846155,
0.16153846153846155,
0.16153846153846155,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.16923076923076924,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.17692307692307693,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
0.18461538461538463,
1.0]}}
# Fourth execution on enwiki
{'auc': 0.55297619047619051,
'mean.accuracy': 0.95023696682464454,
'roc': {'fpr': [0.0,
0.0060412224591329068,
0.0060412224591329068,
0.0060412224591329068,
0.0063965884861407248,
0.0067519545131485429,
0.0067519545131485429,
0.0071073205401563609,
0.007462686567164179,
0.007462686567164179,
0.007462686567164179,
0.007818052594171997,
0.007818052594171997,
0.007818052594171997,
0.0081734186211798159,
1.0],
'thresholds': [1.9999999999999698,
0.99999999999996991,
0.99999992890052403,
0.97515583207241496,
0.018978754924332587,
0.0018265776394870987,
0.00054395049877190157,
0.00036774511623237821,
0.00030118057657604533,
0.00018029984895817771,
0.00015680264219386422,
7.7985049196443749e-06,
3.1313300093959929e-06,
8.9733767524214136e-07,
3.9393164882919198e-07,
1.0000000994736041e-07],
'tpr': [0.0,
0.064285714285714279,
0.071428571428571425,
0.07857142857142857,
0.07857142857142857,
0.07857142857142857,
0.085714285714285715,
0.085714285714285715,
0.085714285714285715,
0.09285714285714286,
0.10000000000000001,
0.10000000000000001,
0.10714285714285714,
0.11428571428571428,
0.11428571428571428,
1.0]}}
$ git diff
diff --git a/ores/features_reverted.py b/ores/features_reverted.py
index 1f4089f..0a2b076 100644
--- a/ores/features_reverted.py
+++ b/ores/features_reverted.py
@@ -27,7 +27,7 @@ from mw import api
from mw.lib import reverts
from revscores.extractors import APIExtractor
-from revscores.language import English
+from revscores.language import Portuguese
from revscores.scorers import MLScorerModel
@@ -72,7 +72,7 @@ def main():
def run(rev_pages, api_url, model):
session = api.Session(api_url)
- extractor = APIExtractor(session, language=English()) # This is a hack. Need to fix languages
+ extractor = APIExtractor(session, language=Portuguese()) # This is a hack. Need to fix languages
for rev_id, page_id in rev_pages:
sys.stderr.write(".");sys.stderr.flush()
# First execution of ./train_test on ptwiki...
{'auc': 0.49815361890694237,
'mean.accuracy': 0.93670010377032165,
'roc': {'fpr': [0.0,
0.00036927621861152144,
0.00073855243722304289,
0.0011078286558345643,
0.0014771048744460858,
0.001846381093057607,
0.0022156573116691287,
0.0025849335302806499,
0.0029542097488921715,
0.0033234859675036928,
0.003692762186115214,
1.0],
'thresholds': [1.9999999848513053,
0.99999998485130526,
0.99999967335064954,
0.0040546403636459449,
0.00021421071548512178,
0.00014188051947484933,
3.1312197668620073e-05,
4.9616896396941686e-06,
3.3696879126852102e-06,
2.2069682944646386e-07,
1.22812681119808e-07,
1.0000000994736041e-07],
'tpr': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]}}
# Second execution on ptwiki
{'auc': 0.50924832984958401,
'mean.accuracy': 0.92493946731234866,
'roc': {'fpr': [0.0,
0.016967908520841016,
0.017336776097381039,
0.017705643673921061,
0.018074511250461084,
0.018443378827001106,
0.018812246403541129,
0.019181113980081151,
0.019549981556621174,
0.019918849133161196,
0.020287716709701219,
0.020656584286241238,
0.02102545186278126,
0.021394319439321283,
0.021763187015861305,
0.022132054592401328,
0.02250092216894135,
0.022869789745481373,
0.023238657322021395,
0.023607524898561418,
0.023607524898561418,
0.02397639247510144,
0.024345260051641459,
0.024714127628181481,
0.025082995204721504,
0.025451862781261526,
0.025820730357801549,
0.026189597934341571,
0.026558465510881594,
0.026927333087421616,
0.027296200663961639,
0.027665068240501661,
0.028033935817041684,
0.028402803393581703,
0.028771670970121725,
0.029140538546661748,
0.02950940612320177,
0.029878273699741793,
0.030247141276281815,
0.030616008852821838,
0.03098487642936186,
0.031722611582441905,
0.032091479158981924,
1.0],
'thresholds': [1.9999999999999698,
0.99999999999996991,
0.096802407421953507,
0.089412381757115272,
0.074138922654953476,
0.040158783921046742,
0.036140790032085486,
0.017290340971734024,
0.016659387854422072,
0.013532726685682847,
0.012569234440790633,
0.009695562830866828,
0.0092936447294306689,
0.0080976333335785172,
0.0029003736355214622,
0.0028861416711657168,
0.0022506629960438538,
0.0020024077734358266,
0.0019574881476074808,
0.0018277680703899283,
0.0010992870932229767,
0.00045026134013702405,
0.00025831120661681133,
0.00017813016768737885,
8.5625813855319774e-05,
8.0145451166731754e-05,
3.6955757083710469e-05,
2.8660749231630018e-05,
2.415113110111705e-05,
1.9833253843350468e-05,
1.6354738118159407e-05,
9.1620016346133682e-06,
7.8343023999810645e-06,
7.7174550253633484e-06,
5.7617051518604711e-06,
2.6927204622535728e-06,
1.3209113531529292e-06,
7.657563092113605e-07,
2.9748184983077608e-07,
2.2570893419469584e-07,
1.7544208555271606e-07,
1.5798922745889328e-07,
1.2577916003146519e-07,
1.0000000994736041e-07],
'tpr': [0.0,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.044444444444444446,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
0.050000000000000003,
1.0]}}
# Third execution on ptwiki
{'auc': 0.48444408697050989,
'mean.accuracy': 0.2151504669664476,
'roc': {'fpr': [0.0,
0.83578251285819249,
0.8361498897869214,
0.8365172667156503,
0.83688464364437909,
0.83725202057310799,
0.83761939750183689,
0.8379867744305658,
0.8379867744305658,
0.8379867744305658,
0.83835415135929459,
0.83872152828802349,
0.83908890521675239,
0.83945628214548129,
0.83982365907421019,
0.84019103600293898,
0.84055841293166789,
0.84092578986039679,
0.84129316678912569,
0.84166054371785448,
0.84202792064658338,
0.84239529757531229,
0.84276267450404119,
0.84313005143276998,
0.84349742836149888,
0.84386480529022778,
0.84423218221895668,
0.84459955914768547,
0.84496693607641438,
0.84533431300514328,
0.84570168993387218,
0.84606906686260108,
0.84643644379132987,
1.0],
'thresholds': [1.99999999137472,
0.99999999137471995,
0.99999998103695342,
0.9999990623444398,
0.99633689102048673,
0.99571838332029061,
0.99364972693751652,
0.98952232779173099,
0.9813576639370305,
0.95243903493014737,
0.7609784684745392,
0.60880093031158156,
0.44425551301861099,
0.42103801050731743,
0.41443554979070268,
0.27905932973758962,
0.24088579876361726,
0.24052972605204578,
0.20854202113006987,
0.14349918575503551,
0.12180166748940285,
0.064640668850418609,
0.023408436148379164,
0.0095227906445994465,
0.0070523590764899474,
0.00067055108045023634,
0.00052854882318463547,
0.00042098409110264063,
3.5659863873933554e-05,
3.0355211781315324e-05,
1.5846177081593103e-06,
8.4155847047155778e-07,
4.439046279309538e-07,
1.0000000994736041e-07],
'tpr': [0.0,
0.80473372781065089,
0.80473372781065089,
0.80473372781065089,
0.80473372781065089,
0.80473372781065089,
0.80473372781065089,
0.80473372781065089,
0.81065088757396453,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
0.81656804733727806,
1.0]}}
# Fourth execution on ptwiki
{'auc': 0.49740644683216007,
'mean.accuracy': 0.93358699411968182,
'roc': {'fpr': [0.0,
0.00037050759540570581,
0.00074101519081141163,
0.0011115227862171174,
0.0014820303816228233,
0.0018525379770285291,
0.0022230455724342349,
0.0025935531678399409,
0.0029640607632456465,
0.0033345683586513525,
0.0037050759540570581,
0.0040755835494627642,
0.0044460911448684698,
0.0048165987402741754,
0.0051871063356798818,
1.0],
'thresholds': [1.0009092133164244,
0.00090921331642442082,
0.00067973845349362518,
0.00043392225328895328,
0.00037027258347507392,
0.00035227911860226288,
0.00014658287727269996,
8.0405999537700397e-05,
7.4746434475894761e-05,
2.5865967534841318e-05,
2.253235391765644e-05,
1.3728185311603878e-05,
1.0130928408969309e-05,
7.1915934856279247e-07,
4.9476165363078197e-07,
1.0000000994736041e-07],
'tpr': [0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0]}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment