Last active
August 29, 2015 14:13
-
-
Save he7d3r/78c3f2200692c0d467bb to your computer and use it in GitHub Desktop.
Testing ORES
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create some folders | |
mkdir models datasets | |
# Generate a file with a new model | |
./new_model revscores.scorers.LinearSVCModel \ | |
revscores.features.added_badwords_ratio \ | |
revscores.features.added_misspellings_ratio \ | |
revscores.features.day_of_week_in_utc \ | |
revscores.features.hour_of_day_in_utc \ | |
revscores.features.is_custom_comment \ | |
revscores.features.is_mainspace \ | |
revscores.features.is_section_comment \ | |
revscores.features.longest_repeated_char_added \ | |
revscores.features.longest_token_added \ | |
revscores.features.numeric_chars_added \ | |
revscores.features.prev_words \ | |
revscores.features.proportion_of_markup_added \ | |
revscores.features.proportion_of_numeric_added \ | |
revscores.features.proportion_of_symbolic_added \ | |
revscores.features.proportion_of_uppercase_added \ | |
revscores.features.seconds_since_last_page_edit \ | |
revscores.features.segments_added \ | |
revscores.features.segments_removed \ | |
revscores.features.user_age_in_seconds \ | |
revscores.features.user_is_anon \ | |
revscores.features.user_is_bot > \ | |
models/reverts.halfak_mix.model | |
# Generate a file with a list of 5000 recent changes to work with, for ptwiki and enwiki | |
last="41142714"; (echo "rev_id"; for ((i=0; i<5000; i++)); do echo "$(( last - i ))"; done) >> datasets/ptwiki.rev_pages.tsv | |
last="643577066"; (echo "rev_id"; for ((i=0; i<5000; i++)); do echo "$(( last - i ))"; done) >> datasets/enwiki.rev_pages.tsv | |
# Get the feature values for each revision in the given file | |
# This takes a while... | |
cat datasets/ptwiki.rev_pages.tsv | tail -n+2 | \ | |
./features_reverted models/reverts.halfak_mix.model \ | |
revscores.scorers.LinearSVCModel \ | |
--api=https://pt.wikipedia.org/w/api.php > \ | |
datasets/ptwiki.features_reverted.tsv | |
cat datasets/enwiki.rev_pages.tsv | tail -n+2 | \ | |
./features_reverted models/reverts.halfak_mix.model \ | |
revscores.scorers.LinearSVCModel \ | |
--api=https://en.wikipedia.org/w/api.php > \ | |
datasets/enwiki.features_reverted.tsv | |
# Test the classifier | |
./train_test models/reverts.halfak_mix.model \ | |
revscores.scorers.LinearSVCModel \ | |
--feature-scores=datasets/ptwiki.features_reverted.tsv > \ | |
models/ptwiki_train_test.log | |
./train_test models/reverts.halfak_mix.model \ | |
revscores.scorers.LinearSVCModel \ | |
--feature-scores=datasets/enwiki.features_reverted.tsv > \ | |
models/enwiki_train_test.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First execution of ./train_test on enwiki... | |
{'auc': 0.5, | |
'mean.accuracy': 0.94854434664861209, | |
'roc': {'fpr': [0.0, 1.0], | |
'thresholds': [1.0000001000000101, 1.0000000994736041e-07], | |
'tpr': [0.0, 1.0]}} | |
# Second execution on enwiki | |
{'auc': 0.56994491826581384, | |
'mean.accuracy': 0.95260663507109, | |
'roc': {'fpr': [0.0, | |
0.0031982942430703624, | |
0.0035536602700781805, | |
0.0035536602700781805, | |
0.0039090262970859985, | |
0.0039090262970859985, | |
0.0042643923240938165, | |
0.0046197583511016346, | |
0.0046197583511016346, | |
0.0046197583511016346, | |
0.0049751243781094526, | |
0.0049751243781094526, | |
0.0049751243781094526, | |
0.0053304904051172707, | |
0.0053304904051172707, | |
0.0053304904051172707, | |
0.0053304904051172707, | |
0.0056858564321250887, | |
0.0060412224591329068, | |
0.0063965884861407248, | |
0.0063965884861407248, | |
0.0063965884861407248, | |
0.0067519545131485429, | |
0.0071073205401563609, | |
0.007462686567164179, | |
0.007818052594171997, | |
0.0081734186211798159, | |
0.0085287846481876331, | |
0.008884150675195452, | |
0.0092395167022032692, | |
0.0095948827292110881, | |
0.0099502487562189053, | |
0.010305614783226724, | |
0.010660980810234541, | |
0.01101634683724236, | |
0.011371712864250177, | |
0.011727078891257996, | |
0.012082444918265814, | |
0.012437810945273632, | |
0.01279317697228145, | |
0.013148542999289269, | |
0.013503909026297086, | |
0.013859275053304905, | |
0.013859275053304905, | |
0.014214641080312722, | |
0.014570007107320541, | |
0.014925373134328358, | |
0.015280739161336177, | |
0.015636105188343994, | |
0.015991471215351813, | |
0.016346837242359632, | |
0.016702203269367447, | |
0.017057569296375266, | |
0.017412935323383085, | |
0.017768301350390904, | |
0.018123667377398719, | |
0.018479033404406538, | |
0.018479033404406538, | |
0.018834399431414357, | |
0.019189765458422176, | |
0.019545131485429992, | |
0.019900497512437811, | |
0.020255863539445629, | |
0.020611229566453448, | |
0.020966595593461264, | |
0.020966595593461264, | |
0.021321961620469083, | |
0.021677327647476902, | |
0.022032693674484721, | |
0.022388059701492536, | |
0.022743425728500355, | |
0.023098791755508174, | |
0.023454157782515993, | |
0.023454157782515993, | |
0.023809523809523808, | |
0.024164889836531627, | |
0.024520255863539446, | |
0.024875621890547265, | |
0.02523098791755508, | |
0.025586353944562899, | |
0.025941719971570718, | |
0.026297085998578537, | |
0.026652452025586353, | |
0.027007818052594171, | |
0.02736318407960199, | |
0.027718550106609809, | |
0.028073916133617625, | |
0.028429282160625444, | |
0.028784648187633263, | |
0.029140014214641081, | |
0.029495380241648897, | |
0.029850746268656716, | |
0.030206112295664535, | |
0.030561478322672354, | |
0.030916844349680169, | |
0.031272210376687988, | |
0.031627576403695803, | |
0.031982942430703626, | |
0.032338308457711441, | |
0.032693674484719264, | |
0.033049040511727079, | |
0.033404406538734895, | |
0.033759772565742717, | |
0.034115138592750532, | |
0.034470504619758348, | |
0.03482587064676617, | |
0.035181236673773986, | |
0.035536602700781808, | |
1.0], | |
'thresholds': [1.9999999996412612, | |
0.99999999964126118, | |
0.95877573307109398, | |
0.80146641029619459, | |
0.38633240992310258, | |
0.35968047122307595, | |
0.19592388841212452, | |
0.15161189091716035, | |
0.13017317931614111, | |
0.090944345147339151, | |
0.074623126852286614, | |
0.053706382927128322, | |
0.052363624059411508, | |
0.051262036779861589, | |
0.0507526524000581, | |
0.046728283261885419, | |
0.04620383878501267, | |
0.046089621294653438, | |
0.045744817564613321, | |
0.044919955468827863, | |
0.044687784103430626, | |
0.044638412706598761, | |
0.04433241400741085, | |
0.043820662224733083, | |
0.04270472067466758, | |
0.042264504032253597, | |
0.042200667600442408, | |
0.041879945693032848, | |
0.040832271106939884, | |
0.040761135096734895, | |
0.040442846005736811, | |
0.040012088590854498, | |
0.039989783934549779, | |
0.039000453328081365, | |
0.038818683150793606, | |
0.038099478059498801, | |
0.037604269516953483, | |
0.037487634205222914, | |
0.037318620151474247, | |
0.036469155467300819, | |
0.034355769360479634, | |
0.032608411641521388, | |
0.032589406430512127, | |
0.032112367798813976, | |
0.032070191310988601, | |
0.032047301801910809, | |
0.030904654359081383, | |
0.030795037097947189, | |
0.030081841748079957, | |
0.030013902251674659, | |
0.029832114691969478, | |
0.029746785979474295, | |
0.02932650638248337, | |
0.028963009222557665, | |
0.028787195830680894, | |
0.028685989271936212, | |
0.028117054154748584, | |
0.027944768600073531, | |
0.026500325010695245, | |
0.02595560806385093, | |
0.025619498785212171, | |
0.023887834852721088, | |
0.02190833246787767, | |
0.021819118633155606, | |
0.021435941889516998, | |
0.021108677219662121, | |
0.020740013072796354, | |
0.019297408491013895, | |
0.018630319995517029, | |
0.018267775103999524, | |
0.017082371712057318, | |
0.016556685345489749, | |
0.016297258759134466, | |
0.013931202308959887, | |
0.012139593678207277, | |
0.011331195053919628, | |
0.0096366438344114135, | |
0.004426783814641371, | |
0.0026377156196401709, | |
0.0018630918948446607, | |
0.0017584038164978437, | |
0.0013928769406068707, | |
0.0012819332292758479, | |
0.0010321963142093406, | |
0.00093506517588354916, | |
0.00091923759201263298, | |
0.00050342919248945223, | |
0.00038435918154957688, | |
0.00035637717394329776, | |
0.00033909573292594826, | |
9.8699500103535892e-05, | |
6.8122878154119201e-05, | |
6.7246379551524473e-05, | |
6.0038324451441716e-05, | |
5.6843079241338376e-05, | |
4.6719581359860166e-05, | |
4.5914158979221126e-05, | |
4.4707788441602334e-05, | |
4.3291849246832494e-05, | |
4.0313733719052825e-05, | |
3.8671451139065391e-05, | |
3.5617987055934371e-05, | |
1.6446501359631546e-05, | |
2.9432340119122789e-06, | |
2.689630022147459e-06, | |
2.6386427335379531e-06, | |
2.3241862436613738e-06, | |
1.7731260062565111e-06, | |
1.0000000994736041e-07], | |
'tpr': [0.0, | |
0.064285714285714279, | |
0.064285714285714279, | |
0.071428571428571425, | |
0.071428571428571425, | |
0.07857142857142857, | |
0.07857142857142857, | |
0.07857142857142857, | |
0.085714285714285715, | |
0.09285714285714286, | |
0.09285714285714286, | |
0.10000000000000001, | |
0.10714285714285714, | |
0.10714285714285714, | |
0.11428571428571428, | |
0.12142857142857143, | |
0.12857142857142856, | |
0.12857142857142856, | |
0.12857142857142856, | |
0.12857142857142856, | |
0.1357142857142857, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14285714285714285, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.14999999999999999, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.15714285714285714, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.16428571428571428, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
0.17142857142857143, | |
1.0]}} | |
# Third execution on enwiki | |
{'auc': 0.56459468293745918, | |
'mean.accuracy': 0.94651320243737302, | |
'roc': {'fpr': [0.0, | |
0.011685552407932011, | |
0.012039660056657223, | |
0.012039660056657223, | |
0.012393767705382436, | |
0.012747875354107648, | |
0.013101983002832862, | |
0.013101983002832862, | |
0.013101983002832862, | |
0.013456090651558074, | |
0.013810198300283285, | |
0.013810198300283285, | |
0.014164305949008499, | |
0.014518413597733711, | |
0.014872521246458924, | |
0.015226628895184136, | |
0.015226628895184136, | |
0.015580736543909348, | |
0.015934844192634561, | |
0.016288951841359773, | |
0.016643059490084985, | |
0.0169971671388102, | |
0.017351274787535412, | |
0.017351274787535412, | |
0.017705382436260624, | |
0.018059490084985835, | |
0.018059490084985835, | |
0.018059490084985835, | |
0.018413597733711047, | |
0.018767705382436262, | |
0.019121813031161474, | |
0.019475920679886686, | |
0.019830028328611898, | |
0.020184135977337109, | |
0.020538243626062325, | |
0.020538243626062325, | |
0.020538243626062325, | |
0.020892351274787536, | |
0.021246458923512748, | |
0.02160056657223796, | |
0.021954674220963172, | |
0.022308781869688384, | |
0.022662889518413599, | |
0.023016997167138811, | |
0.023371104815864022, | |
0.023371104815864022, | |
0.023725212464589234, | |
0.024079320113314446, | |
0.024433427762039661, | |
0.024787535410764873, | |
0.025141643059490085, | |
0.025495750708215296, | |
0.025849858356940508, | |
0.026203966005665724, | |
0.026558073654390935, | |
0.026912181303116147, | |
0.026912181303116147, | |
0.027266288951841359, | |
0.027620396600566571, | |
0.027974504249291786, | |
0.027974504249291786, | |
0.028328611898016998, | |
0.028682719546742209, | |
0.029036827195467421, | |
0.029390934844192633, | |
0.029745042492917848, | |
0.029745042492917848, | |
0.03009915014164306, | |
0.030453257790368272, | |
0.030807365439093484, | |
0.031161473087818695, | |
0.031515580736543911, | |
0.031869688385269122, | |
0.032223796033994334, | |
0.032577903682719546, | |
0.032932011331444758, | |
0.033286118980169969, | |
0.033640226628895181, | |
0.0339943342776204, | |
0.034348441926345612, | |
0.034702549575070823, | |
0.035056657223796035, | |
0.035410764872521247, | |
0.035764872521246459, | |
0.036118980169971671, | |
0.036473087818696882, | |
0.036473087818696882, | |
0.036827195467422094, | |
0.037181303116147306, | |
0.037535410764872525, | |
0.037889518413597736, | |
0.038243626062322948, | |
0.03859773371104816, | |
0.038951841359773372, | |
0.039305949008498584, | |
0.039660056657223795, | |
0.040014164305949007, | |
0.040368271954674219, | |
0.040368271954674219, | |
0.040722379603399431, | |
0.041076487252124649, | |
0.041430594900849861, | |
0.041784702549575073, | |
0.042138810198300285, | |
0.042492917847025496, | |
0.042847025495750708, | |
0.04320113314447592, | |
0.043555240793201132, | |
0.043909348441926344, | |
0.044263456090651555, | |
0.044617563739376767, | |
0.044971671388101986, | |
0.045325779036827198, | |
0.045679886685552409, | |
0.046033994334277621, | |
0.046388101983002833, | |
0.046742209631728045, | |
0.047096317280453256, | |
0.047450424929178468, | |
0.04780453257790368, | |
0.048158640226628892, | |
0.048512747875354111, | |
0.048866855524079322, | |
0.049220963172804534, | |
0.049575070821529746, | |
0.049929178470254958, | |
0.050283286118980169, | |
0.050637393767705381, | |
0.050991501416430593, | |
0.051345609065155805, | |
0.051699716713881017, | |
0.052053824362606235, | |
0.052407932011331447, | |
0.052762039660056659, | |
0.053116147308781871, | |
0.053470254957507082, | |
0.053824362606232294, | |
0.054178470254957506, | |
0.054532577903682718, | |
0.054886685552407929, | |
0.055240793201133141, | |
0.05559490084985836, | |
0.055949008498583572, | |
0.056303116147308784, | |
0.056657223796033995, | |
0.057011331444759207, | |
0.057365439093484419, | |
0.057719546742209631, | |
0.058073654390934842, | |
0.058427762039660054, | |
0.058781869688385266, | |
0.059135977337110485, | |
0.059490084985835696, | |
0.06019830028328612, | |
0.060552407932011332, | |
1.0], | |
'thresholds': [1.9999999995130122, | |
0.99999999951301222, | |
0.99999922610566894, | |
0.99999802261649329, | |
0.99999794485592242, | |
0.99999578320143323, | |
0.99998858062509044, | |
0.97318936860166105, | |
0.86722198397116157, | |
0.85645586843378041, | |
0.80554964887076108, | |
0.79209706127514756, | |
0.75995887660898109, | |
0.1772773476173338, | |
0.076928618173239025, | |
0.069661404049511724, | |
0.06856209099112659, | |
0.061112854294461241, | |
0.060766065115236398, | |
0.057367794935563481, | |
0.056634619509886582, | |
0.056124153175433414, | |
0.055698981436175139, | |
0.05405461629160907, | |
0.053653082509851555, | |
0.053509109929661562, | |
0.053206096299036186, | |
0.053166783208087487, | |
0.053088335383044472, | |
0.052906807769021735, | |
0.052832448154199775, | |
0.052762142178590946, | |
0.052732848983516135, | |
0.052584604062002452, | |
0.052508511143041933, | |
0.052463068591988238, | |
0.052347674264228282, | |
0.052254349204426488, | |
0.051916969872161101, | |
0.05172518598775494, | |
0.051652636142397373, | |
0.051626702871608629, | |
0.05157603736065005, | |
0.051522402452400508, | |
0.05151749493479503, | |
0.051494415661138, | |
0.05137869571442702, | |
0.051295023702707672, | |
0.051227806494315783, | |
0.051214723300319412, | |
0.051196090688569666, | |
0.051091176391975718, | |
0.051025093487649206, | |
0.050464886411225346, | |
0.050292361673652135, | |
0.050263690583474359, | |
0.050013949888141153, | |
0.049706245312802648, | |
0.048854013637168257, | |
0.048751495131606433, | |
0.048732981551418343, | |
0.048636337562959868, | |
0.048592995419575069, | |
0.048004152492242026, | |
0.04788241534295682, | |
0.047618046244232395, | |
0.045745624169478097, | |
0.044166323981549946, | |
0.043953166119563056, | |
0.043848355790076038, | |
0.043814021544778835, | |
0.043771805824554824, | |
0.043565062926594407, | |
0.043433562107963931, | |
0.041714719511327353, | |
0.040654232296894265, | |
0.039814256049251137, | |
0.039541429499354061, | |
0.038783616201353899, | |
0.037674199146467337, | |
0.037534012984121172, | |
0.037162785659857367, | |
0.035282442974400315, | |
0.034665619938390277, | |
0.03431233208686995, | |
0.032812617904501629, | |
0.031195195526032816, | |
0.030009447131042616, | |
0.029745238504465022, | |
0.028964395156306968, | |
0.028825522920995637, | |
0.02251584672769975, | |
0.022072471175043511, | |
0.021998843385786216, | |
0.021751224221392609, | |
0.017501266339660104, | |
0.016931875995438533, | |
0.016619900461292907, | |
0.01629704327527523, | |
0.013168315180256251, | |
0.010957122316054045, | |
0.010369946821198879, | |
0.0073444561350290653, | |
0.0070827244455757147, | |
0.0067316397444387339, | |
0.0063981732623333096, | |
0.0063777842306349097, | |
0.0063682596936972206, | |
0.0063563508091673687, | |
0.0063494943478174743, | |
0.0063345735973986119, | |
0.0045036584911865444, | |
0.0044872496781646872, | |
0.0044446009455101163, | |
0.0043107420771679276, | |
0.0043045689123021105, | |
0.004271728424515943, | |
0.0042644209294515848, | |
0.0042633273353353349, | |
0.0042628646892724827, | |
0.0042626918175961981, | |
0.0042382629064750621, | |
0.0042265225046720194, | |
0.004180531625639263, | |
0.0039760033953746216, | |
0.0038189196301124557, | |
0.0037537614995409095, | |
0.0033138102077631942, | |
0.0019712273859103144, | |
0.0019679170720275823, | |
0.0019151065407189136, | |
0.0019121711281630356, | |
0.0018969282363798089, | |
0.00051387091665156957, | |
0.00021265667338043855, | |
0.00011091707576406661, | |
0.00011064428535847308, | |
0.00011061477746584589, | |
0.00011050464883070062, | |
0.00011045076077691971, | |
0.00011002806281139091, | |
0.00010983130765829656, | |
5.1707370067261508e-05, | |
2.0227093208031559e-05, | |
1.4949694013564998e-05, | |
1.1393530985510732e-05, | |
8.1794560994997387e-06, | |
7.3161754809980728e-06, | |
4.5399712664130534e-06, | |
4.2297812570433086e-06, | |
1.3540015113494813e-06, | |
7.8925649036224459e-07, | |
6.1200538588484267e-07, | |
5.5254298757774442e-07, | |
1.6054774223012589e-07, | |
1.0000000994736041e-07], | |
'tpr': [0.0, | |
0.061538461538461542, | |
0.061538461538461542, | |
0.069230769230769235, | |
0.069230769230769235, | |
0.069230769230769235, | |
0.069230769230769235, | |
0.076923076923076927, | |
0.08461538461538462, | |
0.08461538461538462, | |
0.08461538461538462, | |
0.092307692307692313, | |
0.092307692307692313, | |
0.092307692307692313, | |
0.092307692307692313, | |
0.092307692307692313, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.1076923076923077, | |
0.1076923076923077, | |
0.1076923076923077, | |
0.11538461538461539, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.12307692307692308, | |
0.13076923076923078, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.13846153846153847, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.14615384615384616, | |
0.15384615384615385, | |
0.15384615384615385, | |
0.15384615384615385, | |
0.15384615384615385, | |
0.16153846153846155, | |
0.16153846153846155, | |
0.16153846153846155, | |
0.16153846153846155, | |
0.16153846153846155, | |
0.16153846153846155, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.16923076923076924, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.17692307692307693, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
0.18461538461538463, | |
1.0]}} | |
# Fourth execution on enwiki | |
{'auc': 0.55297619047619051, | |
'mean.accuracy': 0.95023696682464454, | |
'roc': {'fpr': [0.0, | |
0.0060412224591329068, | |
0.0060412224591329068, | |
0.0060412224591329068, | |
0.0063965884861407248, | |
0.0067519545131485429, | |
0.0067519545131485429, | |
0.0071073205401563609, | |
0.007462686567164179, | |
0.007462686567164179, | |
0.007462686567164179, | |
0.007818052594171997, | |
0.007818052594171997, | |
0.007818052594171997, | |
0.0081734186211798159, | |
1.0], | |
'thresholds': [1.9999999999999698, | |
0.99999999999996991, | |
0.99999992890052403, | |
0.97515583207241496, | |
0.018978754924332587, | |
0.0018265776394870987, | |
0.00054395049877190157, | |
0.00036774511623237821, | |
0.00030118057657604533, | |
0.00018029984895817771, | |
0.00015680264219386422, | |
7.7985049196443749e-06, | |
3.1313300093959929e-06, | |
8.9733767524214136e-07, | |
3.9393164882919198e-07, | |
1.0000000994736041e-07], | |
'tpr': [0.0, | |
0.064285714285714279, | |
0.071428571428571425, | |
0.07857142857142857, | |
0.07857142857142857, | |
0.07857142857142857, | |
0.085714285714285715, | |
0.085714285714285715, | |
0.085714285714285715, | |
0.09285714285714286, | |
0.10000000000000001, | |
0.10000000000000001, | |
0.10714285714285714, | |
0.11428571428571428, | |
0.11428571428571428, | |
1.0]}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ git diff | |
diff --git a/ores/features_reverted.py b/ores/features_reverted.py | |
index 1f4089f..0a2b076 100644 | |
--- a/ores/features_reverted.py | |
+++ b/ores/features_reverted.py | |
@@ -27,7 +27,7 @@ from mw import api | |
from mw.lib import reverts | |
from revscores.extractors import APIExtractor | |
-from revscores.language import English | |
+from revscores.language import Portuguese | |
from revscores.scorers import MLScorerModel | |
@@ -72,7 +72,7 @@ def main(): | |
def run(rev_pages, api_url, model): | |
session = api.Session(api_url) | |
- extractor = APIExtractor(session, language=English()) # This is a hack. Need to fix languages | |
+ extractor = APIExtractor(session, language=Portuguese()) # This is a hack. Need to fix languages | |
for rev_id, page_id in rev_pages: | |
sys.stderr.write(".");sys.stderr.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First execution of ./train_test on ptwiki... | |
{'auc': 0.49815361890694237, | |
'mean.accuracy': 0.93670010377032165, | |
'roc': {'fpr': [0.0, | |
0.00036927621861152144, | |
0.00073855243722304289, | |
0.0011078286558345643, | |
0.0014771048744460858, | |
0.001846381093057607, | |
0.0022156573116691287, | |
0.0025849335302806499, | |
0.0029542097488921715, | |
0.0033234859675036928, | |
0.003692762186115214, | |
1.0], | |
'thresholds': [1.9999999848513053, | |
0.99999998485130526, | |
0.99999967335064954, | |
0.0040546403636459449, | |
0.00021421071548512178, | |
0.00014188051947484933, | |
3.1312197668620073e-05, | |
4.9616896396941686e-06, | |
3.3696879126852102e-06, | |
2.2069682944646386e-07, | |
1.22812681119808e-07, | |
1.0000000994736041e-07], | |
'tpr': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]}} | |
# Second execution on ptwiki | |
{'auc': 0.50924832984958401, | |
'mean.accuracy': 0.92493946731234866, | |
'roc': {'fpr': [0.0, | |
0.016967908520841016, | |
0.017336776097381039, | |
0.017705643673921061, | |
0.018074511250461084, | |
0.018443378827001106, | |
0.018812246403541129, | |
0.019181113980081151, | |
0.019549981556621174, | |
0.019918849133161196, | |
0.020287716709701219, | |
0.020656584286241238, | |
0.02102545186278126, | |
0.021394319439321283, | |
0.021763187015861305, | |
0.022132054592401328, | |
0.02250092216894135, | |
0.022869789745481373, | |
0.023238657322021395, | |
0.023607524898561418, | |
0.023607524898561418, | |
0.02397639247510144, | |
0.024345260051641459, | |
0.024714127628181481, | |
0.025082995204721504, | |
0.025451862781261526, | |
0.025820730357801549, | |
0.026189597934341571, | |
0.026558465510881594, | |
0.026927333087421616, | |
0.027296200663961639, | |
0.027665068240501661, | |
0.028033935817041684, | |
0.028402803393581703, | |
0.028771670970121725, | |
0.029140538546661748, | |
0.02950940612320177, | |
0.029878273699741793, | |
0.030247141276281815, | |
0.030616008852821838, | |
0.03098487642936186, | |
0.031722611582441905, | |
0.032091479158981924, | |
1.0], | |
'thresholds': [1.9999999999999698, | |
0.99999999999996991, | |
0.096802407421953507, | |
0.089412381757115272, | |
0.074138922654953476, | |
0.040158783921046742, | |
0.036140790032085486, | |
0.017290340971734024, | |
0.016659387854422072, | |
0.013532726685682847, | |
0.012569234440790633, | |
0.009695562830866828, | |
0.0092936447294306689, | |
0.0080976333335785172, | |
0.0029003736355214622, | |
0.0028861416711657168, | |
0.0022506629960438538, | |
0.0020024077734358266, | |
0.0019574881476074808, | |
0.0018277680703899283, | |
0.0010992870932229767, | |
0.00045026134013702405, | |
0.00025831120661681133, | |
0.00017813016768737885, | |
8.5625813855319774e-05, | |
8.0145451166731754e-05, | |
3.6955757083710469e-05, | |
2.8660749231630018e-05, | |
2.415113110111705e-05, | |
1.9833253843350468e-05, | |
1.6354738118159407e-05, | |
9.1620016346133682e-06, | |
7.8343023999810645e-06, | |
7.7174550253633484e-06, | |
5.7617051518604711e-06, | |
2.6927204622535728e-06, | |
1.3209113531529292e-06, | |
7.657563092113605e-07, | |
2.9748184983077608e-07, | |
2.2570893419469584e-07, | |
1.7544208555271606e-07, | |
1.5798922745889328e-07, | |
1.2577916003146519e-07, | |
1.0000000994736041e-07], | |
'tpr': [0.0, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.044444444444444446, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
0.050000000000000003, | |
1.0]}} | |
# Third execution on ptwiki | |
{'auc': 0.48444408697050989, | |
'mean.accuracy': 0.2151504669664476, | |
'roc': {'fpr': [0.0, | |
0.83578251285819249, | |
0.8361498897869214, | |
0.8365172667156503, | |
0.83688464364437909, | |
0.83725202057310799, | |
0.83761939750183689, | |
0.8379867744305658, | |
0.8379867744305658, | |
0.8379867744305658, | |
0.83835415135929459, | |
0.83872152828802349, | |
0.83908890521675239, | |
0.83945628214548129, | |
0.83982365907421019, | |
0.84019103600293898, | |
0.84055841293166789, | |
0.84092578986039679, | |
0.84129316678912569, | |
0.84166054371785448, | |
0.84202792064658338, | |
0.84239529757531229, | |
0.84276267450404119, | |
0.84313005143276998, | |
0.84349742836149888, | |
0.84386480529022778, | |
0.84423218221895668, | |
0.84459955914768547, | |
0.84496693607641438, | |
0.84533431300514328, | |
0.84570168993387218, | |
0.84606906686260108, | |
0.84643644379132987, | |
1.0], | |
'thresholds': [1.99999999137472, | |
0.99999999137471995, | |
0.99999998103695342, | |
0.9999990623444398, | |
0.99633689102048673, | |
0.99571838332029061, | |
0.99364972693751652, | |
0.98952232779173099, | |
0.9813576639370305, | |
0.95243903493014737, | |
0.7609784684745392, | |
0.60880093031158156, | |
0.44425551301861099, | |
0.42103801050731743, | |
0.41443554979070268, | |
0.27905932973758962, | |
0.24088579876361726, | |
0.24052972605204578, | |
0.20854202113006987, | |
0.14349918575503551, | |
0.12180166748940285, | |
0.064640668850418609, | |
0.023408436148379164, | |
0.0095227906445994465, | |
0.0070523590764899474, | |
0.00067055108045023634, | |
0.00052854882318463547, | |
0.00042098409110264063, | |
3.5659863873933554e-05, | |
3.0355211781315324e-05, | |
1.5846177081593103e-06, | |
8.4155847047155778e-07, | |
4.439046279309538e-07, | |
1.0000000994736041e-07], | |
'tpr': [0.0, | |
0.80473372781065089, | |
0.80473372781065089, | |
0.80473372781065089, | |
0.80473372781065089, | |
0.80473372781065089, | |
0.80473372781065089, | |
0.80473372781065089, | |
0.81065088757396453, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
0.81656804733727806, | |
1.0]}} | |
# Fourth execution on ptwiki | |
{'auc': 0.49740644683216007, | |
'mean.accuracy': 0.93358699411968182, | |
'roc': {'fpr': [0.0, | |
0.00037050759540570581, | |
0.00074101519081141163, | |
0.0011115227862171174, | |
0.0014820303816228233, | |
0.0018525379770285291, | |
0.0022230455724342349, | |
0.0025935531678399409, | |
0.0029640607632456465, | |
0.0033345683586513525, | |
0.0037050759540570581, | |
0.0040755835494627642, | |
0.0044460911448684698, | |
0.0048165987402741754, | |
0.0051871063356798818, | |
1.0], | |
'thresholds': [1.0009092133164244, | |
0.00090921331642442082, | |
0.00067973845349362518, | |
0.00043392225328895328, | |
0.00037027258347507392, | |
0.00035227911860226288, | |
0.00014658287727269996, | |
8.0405999537700397e-05, | |
7.4746434475894761e-05, | |
2.5865967534841318e-05, | |
2.253235391765644e-05, | |
1.3728185311603878e-05, | |
1.0130928408969309e-05, | |
7.1915934856279247e-07, | |
4.9476165363078197e-07, | |
1.0000000994736041e-07], | |
'tpr': [0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
1.0]}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment