Skip to content

Instantly share code, notes, and snippets.

@Ladsgroup
Last active January 31, 2018 17:09
Show Gist options
  • Save Ladsgroup/ad77bf59273263134e4c4dc5a1cc0309 to your computer and use it in GitHub Desktop.
Save Ladsgroup/ad77bf59273263134e4c4dc5a1cc0309 to your computer and use it in GitHub Desktop.
Diff for templating
amsa@C235:~/editquality$ python differ.py "Spanish Wikipedia"
############################# Spanish Wikipedia ################################
-
datasets/eswiki.sampled_revisions.20k_2015.json:
wget -qO- http://quarry.wmflabs.org/run/42221/output/0/json-lines?download=true > $@
-
- datasets/eswiki.human_labeled_revisions.5k_2015.json:
- ./utility fetch_labels \
- https://labels.wmflabs.org/campaigns/eswiki/12/ > $@
datasets/eswiki.autolabeled_revisions.20k_2015.json: \
datasets/eswiki.sampled_revisions.20k_2015.json
cat $< | \
./utility autolabel --host=https://es.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+ datasets/eswiki.human_labeled_revisions.5k_2015.json:
+ ./utility fetch_labels \
+ https://labels.wmflabs.org/campaigns/eswiki/12/ > $@
+
datasets/eswiki.labeled_revisions.20k_2015.json: \
+ datasets/eswiki.autolabeled_revisions.20k_2015.json \
- datasets/eswiki.human_labeled_revisions.5k_2015.json \
? --
+ datasets/eswiki.human_labeled_revisions.5k_2015.json
- datasets/eswiki.autolabeled_revisions.20k_2015.json
./utility merge_labels $^ > $@
datasets/eswiki.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/eswiki.autolabeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
editquality.feature_lists.eswiki.reverted \
- editquality.feature_lists.eswiki.damaging \
- editquality.feature_lists.eswiki.goodfaith \
--host https://es.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
datasets/eswiki.labeled_revisions.w_cache.20k_2015.json: \
datasets/eswiki.labeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
- editquality.feature_lists.eswiki.reverted \
editquality.feature_lists.eswiki.damaging \
editquality.feature_lists.eswiki.goodfaith \
--host https://es.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/eswiki.damaging.md: \
datasets/eswiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.eswiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
- --pop-rate "true=0.1273116307504203" \
- --pop-rate "false=0.8726883692495797" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- tuning_reports/eswiki.reverted.md: \
- datasets/eswiki.autolabeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.eswiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.11036013315847877" \
--pop-rate "false=0.8896398668415212" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
-
- models/eswiki.reverted.gradient_boosting.model: \
- datasets/eswiki.autolabeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.eswiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.11036013315847877" \
- --pop-rate "false=0.8896398668415212" \
- --center --scale > $@
models/eswiki.damaging.gradient_boosting.model: \
datasets/eswiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.eswiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.11036013315847877" \
--pop-rate "false=0.8896398668415212" \
- --center --scale > $@
? -
+ --center --scale > $@
tuning_reports/eswiki.goodfaith.md: \
datasets/eswiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.eswiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.11036013315847877" \
--pop-rate "false=0.8896398668415212" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
- --debug > $@
? -
+ --debug > $@
models/eswiki.goodfaith.gradient_boosting.model: \
datasets/eswiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.eswiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.11036013315847877" \
--pop-rate "false=0.8896398668415212" \
- --center --scale > $@
? -
+ --center --scale > $@
eswiki_models: \
- models/eswiki.damaging.gradient_boosting.model \
? -
+ models/eswiki.damaging.gradient_boosting.model \
- models/eswiki.goodfaith.gradient_boosting.model
? -
+ models/eswiki.goodfaith.gradient_boosting.model
eswiki_tuning_reports: \
- tuning_reports/eswiki.damaging.md \
? -
+ tuning_reports/eswiki.damaging.md \
- tuning_reports/eswiki.goodfaith.md
? -
+ tuning_reports/eswiki.goodfaith.md
############################# Spanish Wikibooks ################################
-
datasets/eswikibooks.sampled_revisions.20k_2015.json:
wget -qO- https://quarry.wmflabs.org/run/113419/output/0/json-lines?download=true > $@
+
+ datasets/eswikibooks.autolabeled_revisions.20k_2015.json: \
+ datasets/eswikibooks.sampled_revisions.20k_2015.json
+ cat $< | \
+ ./utility autolabel --host=https://es.wikibooks.org \
+ --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
+ --trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
+ --verbose > $@
datasets/eswikibooks.human_labeled_revisions.5k_2015.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/eswikibooks/42/ > $@
- datasets/eswikibooks.autolabeled_revisions.20k_2015.json: \
- datasets/eswikibooks.sampled_revisions.20k_2015.json
- cat $< | \
- ./utility autolabel --host=https://es.wikibooks.org \
- --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,autopatrolled \
- --trusted-edits=1000 \
- --verbose > $@
-
datasets/eswikibooks.labeled_revisions.20k_2015.json: \
+ datasets/eswikibooks.autolabeled_revisions.20k_2015.json \
- datasets/eswikibooks.human_labeled_revisions.5k_2015.json \
? --
+ datasets/eswikibooks.human_labeled_revisions.5k_2015.json
- datasets/eswikibooks.autolabeled_revisions.20k_2015.json
./utility merge_labels $^ > $@
datasets/eswikibooks.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/eswikibooks.autolabeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
editquality.feature_lists.eswikibooks.reverted \
- editquality.feature_lists.eswikibooks.damaging \
- editquality.feature_lists.eswikibooks.goodfaith \
--host https://es.wikibooks.org \
--extractor $(max_extractors) \
--verbose > $@
datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json: \
datasets/eswikibooks.labeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
- editquality.feature_lists.eswikibooks.reverted \
editquality.feature_lists.eswikibooks.damaging \
editquality.feature_lists.eswikibooks.goodfaith \
--host https://es.wikibooks.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/eswikibooks.reverted.md: \
- datasets/eswikibooks.autolabeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.eswikibooks.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.08983104208206527" \
- --pop-rate "false=0.9101689579179347" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/eswikibooks.reverted.gradient_boosting.model: \
- datasets/eswikibooks.autolabeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.eswikibooks.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.08983104208206527" \
- --pop-rate "false=0.9101689579179347" \
- --center --scale > $@
tuning_reports/eswikibooks.damaging.md: \
datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.eswikibooks.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.1126671580499105" \
--pop-rate "false=0.8873328419500895" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
- --debug > $@
? -
+ --debug > $@
models/eswikibooks.damaging.gradient_boosting.model: \
datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.eswikibooks.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.1126671580499105" \
--pop-rate "false=0.8873328419500895" \
- --center --scale > $@
? -
+ --center --scale > $@
tuning_reports/eswikibooks.goodfaith.md: \
datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.eswikibooks.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9139393939393939" \
--pop-rate "false=0.08606060606060606" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
- --debug > $@
? -
+ --debug > $@
models/eswikibooks.goodfaith.gradient_boosting.model: \
datasets/eswikibooks.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.eswikibooks.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.5' \
-p 'max_depth=7' \
- -p 'learning_rate=0.5' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9139393939393939" \
--pop-rate "false=0.08606060606060606" \
- --center --scale > $@
? -
+ --center --scale > $@
eswikibooks_models: \
- models/eswikibooks.damaging.gradient_boosting.model \
? -
+ models/eswikibooks.damaging.gradient_boosting.model \
- models/eswikibooks.goodfaith.gradient_boosting.model
? -
+ models/eswikibooks.goodfaith.gradient_boosting.model
eswikibooks_tuning_reports: \
- tuning_reports/eswikibooks.damaging.md
? -
+ tuning_reports/eswikibooks.damaging.md \
? ++
- tuning_reports/eswikibooks.goodfaith.md
? -
+ tuning_reports/eswikibooks.goodfaith.md
- ########################### Estonian Wikipedia ################################
+ ############################# Estonian Wikipedia ################################
? ++
-
datasets/etwiki.sampled_revisions.20k_2015.json:
wget -qO- http://quarry.wmflabs.org/run/50110/output/0/json-lines?download=true > $@
datasets/etwiki.autolabeled_revisions.20k_2015.json: \
datasets/etwiki.sampled_revisions.20k_2015.json
cat $< | \
./utility autolabel --host=https://et.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,flow-bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/etwiki.human_labeled_revisions.5k_2015.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/etwiki/17/ > $@
datasets/etwiki.labeled_revisions.20k_2015.json: \
+ datasets/etwiki.autolabeled_revisions.20k_2015.json \
- datasets/etwiki.human_labeled_revisions.5k_2015.json \
? --
+ datasets/etwiki.human_labeled_revisions.5k_2015.json
+ ./utility merge_labels $^ > $@
+
+ datasets/etwiki.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/etwiki.autolabeled_revisions.20k_2015.json
- ./utility merge_labels $^ > $@
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.etwiki.reverted \
+ --host https://et.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
datasets/etwiki.labeled_revisions.w_cache.20k_2015.json: \
datasets/etwiki.labeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
- editquality.feature_lists.etwiki.reverted \
editquality.feature_lists.etwiki.damaging \
editquality.feature_lists.etwiki.goodfaith \
--host https://et.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/etwiki.reverted.md: \
- datasets/etwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.etwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.020021127823331153" \
- --pop-rate "false=0.9799788721766688" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/etwiki.reverted.gradient_boosting.model: \
- datasets/etwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.etwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=500' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.020021127823331153" \
- --pop-rate "false=0.9799788721766688" \
- --center --scale > $@
tuning_reports/etwiki.damaging.md: \
datasets/etwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.etwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.026158257457618593" \
--pop-rate "false=0.9738417425423814" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
- --debug > $@
? -
+ --debug > $@
models/etwiki.damaging.gradient_boosting.model: \
datasets/etwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.etwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.026158257457618593" \
--pop-rate "false=0.9738417425423814" \
- --center --scale > $@
? -
+ --center --scale > $@
tuning_reports/etwiki.goodfaith.md: \
datasets/etwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.etwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9841038281603702" \
- --pop-rate "false=0.01589617183962976" \
? ^
+ --pop-rate "false=0.01589617183962977" \
? ^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
- --debug > $@
? -
+ --debug > $@
models/etwiki.goodfaith.gradient_boosting.model: \
datasets/etwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.etwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9841038281603702" \
- --pop-rate "false=0.01589617183962976" \
? ^
+ --pop-rate "false=0.01589617183962977" \
? ^
- --center --scale > $@
? -
+ --center --scale > $@
etwiki_models: \
- models/etwiki.damaging.gradient_boosting.model \
? -
+ models/etwiki.damaging.gradient_boosting.model \
- models/etwiki.goodfaith.gradient_boosting.model
? -
+ models/etwiki.goodfaith.gradient_boosting.model
etwiki_tuning_reports: \
- tuning_reports/etwiki.damaging.md \
? -
+ tuning_reports/etwiki.damaging.md \
- tuning_reports/etwiki.goodfaith.md
? -
+ tuning_reports/etwiki.goodfaith.md
amsa@C235:~/editquality$ python differ.py "Japanese Wikipedia"
- ########################### Japanese Wikipedia ################################
+ ############################# Japanese Wikipedia ################################
? ++
-
# From https://quarry.wmflabs.org/query/9927
datasets/jawiki.sampled_revisions.40k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/89016/output/0/json-lines?download=true > $@
datasets/jawiki.autolabeled_revisions.40k_2016.json: \
datasets/jawiki.sampled_revisions.40k_2016.json
cat $< | \
./utility autolabel --host=https://ja.wikipedia.org \
--trusted-groups=abusefilter,bot,bureaucrat,checkuser,eliminator,interface-editor,oversight,rollbacker,sysop \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+
datasets/jawiki.autolabeled_revisions.w_cache.40k_2016.json: \
datasets/jawiki.autolabeled_revisions.40k_2016.json
cat $< | \
revscoring extract \
editquality.feature_lists.jawiki.reverted \
--host https://ja.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/jawiki.reverted.md: \
datasets/jawiki.autolabeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.jawiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.03256945140908635" \
--pop-rate "false=0.9674305485909136" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/jawiki.reverted.gradient_boosting.model: \
datasets/jawiki.autolabeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.jawiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.03256945140908635" \
--pop-rate "false=0.9674305485909136" \
--center --scale > $@
-
jawiki_models: \
models/jawiki.reverted.gradient_boosting.model
jawiki_tuning_reports: \
- tuning_reports/jawiki.reverted.md
? -
+ tuning_reports/jawiki.reverted.md-
amsa@C235:~/editquality$ python differ.py "Korean Wikipedia"
############################# Korean Wikipedia ################################
-
- # from https://quarry.wmflabs.org/query/17645
? ^
+ # From https://quarry.wmflabs.org/query/17645
? ^
datasets/kowiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/165613/output/0/json-lines?download=true > $@
datasets/kowiki.autolabeled_revisions.20k_2016.json: \
datasets/kowiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://ko.wikipedia.org \
--trusted-groups=abusefilter,bot,bureaucrat,checkuser,eliminator,interface-editor,oversight,rollbacker,sysop \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+
datasets/kowiki.autolabeled_revisions.w_cache.20k_2016.json: \
datasets/kowiki.autolabeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
editquality.feature_lists.kowiki.reverted \
--host https://ko.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/kowiki.reverted.md: \
datasets/kowiki.autolabeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.kowiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.04717122705217348" \
--pop-rate "false=0.9528287729478265" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/kowiki.reverted.gradient_boosting.model: \
datasets/kowiki.autolabeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.kowiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.04717122705217348" \
--pop-rate "false=0.9528287729478265" \
--center --scale > $@
kowiki_models: \
models/kowiki.reverted.gradient_boosting.model
kowiki_tuning_reports: \
- tuning_reports/kowiki.reverted.md
? -
+ tuning_reports/kowiki.reverted.md-
amsa@C235:~/editquality$ python differ.py "Tamil Wikipedia"
- ############################## Tamil Wikipedia ################################
? -
+ ############################# Tamil Wikipedia ################################
-
# From https://quarry.wmflabs.org/query/20230
datasets/tawiki.sampled_revisions.20k_2017.json:
wget -qO- https://quarry.wmflabs.org/run/190662/output/0/json-lines?download=true > $@
datasets/tawiki.autolabeled_revisions.20k_2017.json: \
datasets/tawiki.sampled_revisions.20k_2017.json
cat $< | \
./utility autolabel --host=https://ta.wikipedia.org \
--trusted-groups=autopatrolled,bot,bureaucrat,patroller,rollbacker,sysop \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/tawiki.revisions_for_review.5k_2017.json: \
datasets/tawiki.autolabeled_revisions.20k_2017.json
grep '"needs_review": true' $< | shuf > $@
datasets/tawiki.autolabeled_revisions.w_cache.20k_2017.json: \
datasets/tawiki.autolabeled_revisions.20k_2017.json
cat $< | \
revscoring extract \
editquality.feature_lists.tawiki.reverted \
--host https://ta.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/tawiki.reverted.md: \
datasets/tawiki.autolabeled_revisions.w_cache.20k_2017.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.tawiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.015904172328753335" \
--pop-rate "false=0.9840958276712467" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/tawiki.reverted.gradient_boosting.model: \
datasets/tawiki.autolabeled_revisions.w_cache.20k_2017.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.tawiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.015904172328753335" \
--pop-rate "false=0.9840958276712467" \
--center --scale > $@
tawiki_models: \
models/tawiki.reverted.gradient_boosting.model
tawiki_tuning_reports: \
- tuning_reports/tawiki.reverted.md
? -
+ tuning_reports/tawiki.reverted.md-
amsa@C235:~/editquality$ python differ.py "Ukranian Wikipedia"
- ############################### Ukranian Wikipedia ############################
? --
+ ############################# Ukranian Wikipedia ################################
? ++++
-
datasets/ukwiki.sampled_revisions.20k_2015.json:
wget -qO- http://quarry.wmflabs.org/run/48597/output/0/json-lines?download=true > $@
datasets/ukwiki.autolabeled_revisions.20k_2015.json: \
datasets/ukwiki.sampled_revisions.20k_2015.json
cat $< | \
./utility autolabel --host=https://uk.wikipedia.org \
--trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+
+ datasets/ukwiki.revisions_for_review.5k_2015.json: \
+ datasets/ukwiki.autolabeled_revisions.20k_2015.json
+ grep '"needs_review": true' $< | shuf > $@
datasets/ukwiki.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/ukwiki.autolabeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
editquality.feature_lists.ukwiki.reverted \
--host https://uk.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/ukwiki.reverted.md: \
datasets/ukwiki.autolabeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.ukwiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.021877665713282153" \
--pop-rate "false=0.9781223342867178" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/ukwiki.reverted.gradient_boosting.model: \
datasets/ukwiki.autolabeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.ukwiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.021877665713282153" \
--pop-rate "false=0.9781223342867178" \
--center --scale > $@
ukwiki_models: \
- models/ukwiki.reverted.gradient_boosting.model
? -
+ models/ukwiki.reverted.gradient_boosting.model
ukwiki_tuning_reports: \
- tuning_reports/ukwiki.reverted.md
? - -
+ tuning_reports/ukwiki.reverted.md
- ############################### Romanian Wikipedia ############################
? --
+ ############################# Romanian Wikipedia ################################
? ++++
-
datasets/rowiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/146926/output/0/json-lines?download=true > $@
datasets/rowiki.autolabeled_revisions.20k_2016.json: \
datasets/rowiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://ro.wikipedia.org \
--trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot,templateeditor,patroller,autopatrolled \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/rowiki.human_labeled_revisions.5k_2016.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/rowiki/48/ > $@
datasets/rowiki.labeled_revisions.20k_2016.json: \
+ datasets/rowiki.autolabeled_revisions.20k_2016.json \
- datasets/rowiki.human_labeled_revisions.5k_2016.json \
? --
+ datasets/rowiki.human_labeled_revisions.5k_2016.json
- datasets/rowiki.autolabeled_revisions.20k_2016.json
./utility merge_labels $^ > $@
datasets/rowiki.autolabeled_revisions.w_cache.20k_2016.json: \
datasets/rowiki.autolabeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
editquality.feature_lists.rowiki.reverted \
--host https://ro.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
datasets/rowiki.labeled_revisions.w_cache.20k_2016.json: \
datasets/rowiki.labeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
- editquality.feature_lists.rowiki.reverted \
? -------
+ editquality.feature_lists.rowiki.damaging \
? +++++++
editquality.feature_lists.rowiki.goodfaith \
- editquality.feature_lists.rowiki.damaging \
--host https://ro.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/rowiki.reverted.md: \
- datasets/rowiki.autolabeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.rowiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.03311324529811925" \
- --pop-rate "false=0.9668867547018808" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/rowiki.reverted.gradient_boosting.model: \
- datasets/rowiki.autolabeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.rowiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).1 \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- -p 'learning_rate=0.01' \
- -p 'max_depth=7' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.03311324529811925" \
- --pop-rate "false=0.9668867547018808" \
- --center --scale > $@
tuning_reports/rowiki.damaging.md: \
datasets/rowiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.rowiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.04956982793117247" \
--pop-rate "false=0.9504301720688275" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/rowiki.damaging.gradient_boosting.model: \
datasets/rowiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.rowiki.damaging \
damaging \
--version=$(damaging_major_minor).1 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.04956982793117247" \
--pop-rate "false=0.9504301720688275" \
--center --scale > $@
tuning_reports/rowiki.goodfaith.md: \
datasets/rowiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.rowiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9699379751900761" \
- --pop-rate "false=0.030062024809923968" \
? -
+ --pop-rate "false=0.030062024809923926" \
? +
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/rowiki.goodfaith.gradient_boosting.model: \
datasets/rowiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.rowiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).1 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9699379751900761" \
- --pop-rate "false=0.030062024809923968" \
? -
+ --pop-rate "false=0.030062024809923926" \
? +
--center --scale > $@
rowiki_models: \
- models/rowiki.damaging.gradient_boosting.model \
? -
+ models/rowiki.damaging.gradient_boosting.model \
- models/rowiki.goodfaith.gradient_boosting.model
? -
+ models/rowiki.goodfaith.gradient_boosting.model
rowiki_tuning_reports: \
- tuning_reports/rowiki.damaging.md \
? -
+ tuning_reports/rowiki.damaging.md \
- tuning_reports/rowiki.goodfaith.md
? -
+ tuning_reports/rowiki.goodfaith.md
- ############################### Russian Wikipedia ############################
? --
+ ############################# Russian Wikipedia ################################
? ++++
-
datasets/ruwiki.sampled_revisions.20k_2015.json:
wget -qO- https://quarry.wmflabs.org/run/48649/output/0/json-lines?download=true > $@
datasets/ruwiki.autolabeled_revisions.20k_2015.json: \
datasets/ruwiki.sampled_revisions.20k_2015.json
cat $< | \
./utility autolabel --host=https://ru.wikipedia.org \
--trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/ruwiki.human_labeled_revisions.5k_2015.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/ruwiki/10/ > $@
datasets/ruwiki.labeled_revisions.20k_2015.json: \
+ datasets/ruwiki.autolabeled_revisions.20k_2015.json \
- datasets/ruwiki.human_labeled_revisions.5k_2015.json \
? --
+ datasets/ruwiki.human_labeled_revisions.5k_2015.json
+ ./utility merge_labels $^ > $@
+
+ datasets/ruwiki.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/ruwiki.autolabeled_revisions.20k_2015.json
- ./utility merge_labels $^ > $@
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.ruwiki.reverted \
+ --host https://ru.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json: \
datasets/ruwiki.labeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
- editquality.feature_lists.ruwiki.reverted \
? -------
+ editquality.feature_lists.ruwiki.damaging \
? +++++++
editquality.feature_lists.ruwiki.goodfaith \
- editquality.feature_lists.ruwiki.damaging \
--host https://ru.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/ruwiki.reverted.md: \
- datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.ruwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.05317532664843513" \
- --pop-rate "false=0.9468246733515648" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/ruwiki.reverted.gradient_boosting.model: \
- datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.ruwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- -p 'learning_rate=0.01' \
- -p 'max_depth=5' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.05317532664843513" \
- --pop-rate "false=0.9468246733515648" \
- --center --scale > $@
tuning_reports/ruwiki.damaging.md: \
datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.ruwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.053479185657854755" \
- --pop-rate "false=0.9465208143421452" \
? ^
+ --pop-rate "false=0.9465208143421453" \
? ^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/ruwiki.damaging.gradient_boosting.model: \
datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.ruwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.053479185657854755" \
- --pop-rate "false=0.9465208143421452" \
? ^
+ --pop-rate "false=0.9465208143421453" \
? ^
--center --scale > $@
tuning_reports/ruwiki.goodfaith.md: \
datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.ruwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9713866099463182" \
- --pop-rate "false=0.02861339005368176" \
? ^
+ --pop-rate "false=0.028613390053681798" \
? ^^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/ruwiki.goodfaith.gradient_boosting.model: \
datasets/ruwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.ruwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9713866099463182" \
- --pop-rate "false=0.02861339005368176" \
? ^
+ --pop-rate "false=0.028613390053681798" \
? ^^
--center --scale > $@
ruwiki_models: \
- models/ruwiki.damaging.gradient_boosting.model \
? -
+ models/ruwiki.damaging.gradient_boosting.model \
- models/ruwiki.goodfaith.gradient_boosting.model
? -
+ models/ruwiki.goodfaith.gradient_boosting.model
ruwiki_tuning_reports: \
- tuning_reports/ruwiki.damaging.md \
? -
+ tuning_reports/ruwiki.damaging.md \
- tuning_reports/ruwiki.goodfaith.md
? -
+ tuning_reports/ruwiki.goodfaith.md
- ################################# Albanian Wikipedia ###########################
? ----
+ ############################# Albanian Wikipedia ################################
? +++++
-
# From https://quarry.wmflabs.org/query/17988
datasets/sqwiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/169099/output/0/json-lines?download=true > $@
datasets/sqwiki.autolabeled_revisions.20k_2016.json: \
datasets/sqwiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://sq.wikipedia.org \
--trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/sqwiki.human_labeled_revisions.5k_2016.json:
./utility fetch_labels \
- https://labels.wmflabs.org/campaigns/sqwiki/57/ > \
? ^
+ https://labels.wmflabs.org/campaigns/sqwiki/57/ > $@
? ^^
- datasets/sqwiki.human_labeled_revisions.5k_2016.json
datasets/sqwiki.labeled_revisions.20k_2016.json: \
+ datasets/sqwiki.autolabeled_revisions.20k_2016.json \
- datasets/sqwiki.human_labeled_revisions.5k_2016.json \
? --
+ datasets/sqwiki.human_labeled_revisions.5k_2016.json
+ ./utility merge_labels $^ > $@
+
+ datasets/sqwiki.autolabeled_revisions.w_cache.20k_2016.json: \
datasets/sqwiki.autolabeled_revisions.20k_2016.json
- ./utility merge_labels $^ > $@
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.sqwiki.reverted \
+ --host https://sq.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json: \
datasets/sqwiki.labeled_revisions.20k_2016.json
- cat datasets/sqwiki.labeled_revisions.20k_2016.json | \
+ cat $< | \
revscoring extract \
- editquality.feature_lists.sqwiki.reverted \
editquality.feature_lists.sqwiki.damaging \
editquality.feature_lists.sqwiki.goodfaith \
--host https://sq.wikipedia.org \
--extractor $(max_extractors) \
- --verbose > \
? ^
+ --verbose > $@
? ^^
- datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
-
- tuning_reports/sqwiki.reverted.md: \
- datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.sqwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.018051805180518053" \
- --pop-rate "false=0.9819481948194819" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/sqwiki.reverted.gradient_boosting.model: \
- datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.sqwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=500' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.018051805180518053" \
- --pop-rate "false=0.9819481948194819" \
- --center --scale > $@
tuning_reports/sqwiki.damaging.md: \
datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.sqwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.0287028702870287" \
--pop-rate "false=0.9712971297129713" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/sqwiki.damaging.gradient_boosting.model: \
datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.sqwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.0287028702870287" \
--pop-rate "false=0.9712971297129713" \
--center --scale > $@
tuning_reports/sqwiki.goodfaith.md: \
datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.sqwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9763476347634763" \
- --pop-rate "false=0.023652365236523653" \
? ^^
+ --pop-rate "false=0.023652365236523698" \
? ^^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/sqwiki.goodfaith.gradient_boosting.model: \
datasets/sqwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.sqwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9763476347634763" \
- --pop-rate "false=0.023652365236523653" \
? ^^
+ --pop-rate "false=0.023652365236523698" \
? ^^
--center --scale > $@
+
+ sqwiki_models: \
+ models/sqwiki.damaging.gradient_boosting.model \
+ models/sqwiki.goodfaith.gradient_boosting.model
sqwiki_tuning_reports: \
tuning_reports/sqwiki.damaging.md \
tuning_reports/sqwiki.goodfaith.md
-
- sqwiki_models: \
- models/sqwiki.damaging.gradient_boosting.model \
- models/sqwiki.goodfaith.gradient_boosting.model
- ############################# Turkish Wikipedia ############################
+ ############################# Turkish Wikipedia ################################
? ++++
+ datasets/trwiki.sampled_revisions.20k_2015.json:
+ wget -qO- http://quarry.wmflabs.org/run/168286/output/0/json-lines?download=true > $@
+
+ datasets/trwiki.autolabeled_revisions.20k_2015.json: \
+ datasets/trwiki.sampled_revisions.20k_2015.json
+ cat $< | \
+ ./utility autolabel --host=https://tr.wikipedia.org \
+ --trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat \
+ --trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
+ --verbose > $@
+
- datasets/trwiki.human_labeled_revisions.20k_2015.json:
? ^^
+ datasets/trwiki.human_labeled_revisions.5k_2015.json:
? ^
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/trwiki/5/ > $@
datasets/trwiki.labeled_revisions.20k_2015.json: \
+ datasets/trwiki.autolabeled_revisions.20k_2015.json \
- datasets/trwiki.human_labeled_revisions.20k_2015.json
? ^^
+ datasets/trwiki.human_labeled_revisions.5k_2015.json
? ^
+ ./utility merge_labels $^ > $@
- cat $< | \
- ./utility autolabel --host=https://tr.wikipedia.org \
- --trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat \
- --trusted-edits=1000 \
- --verbose > $@
- datasets/trwiki.labeled_revisions.w_cache.20k_2015.json: \
- datasets/trwiki.labeled_revisions.20k_2015.json
- cat $< | \
- revscoring extract \
- editquality.feature_lists.trwiki.reverted \
- editquality.feature_lists.trwiki.damaging \
- editquality.feature_lists.trwiki.goodfaith \
- --host https://tr.wikipedia.org \
- --extractor $(max_extractors) \
- --verbose > $@
-
- datasets/trwiki.sampled_revisions.20k_2016.json:
- wget -qO- http://quarry.wmflabs.org/run/168286/output/0/json-lines?download=true > $@
-
- datasets/trwiki.autolabeled_revisions.20k_2016.json: \
- datasets/trwiki.sampled_revisions.20k_2016.json
- cat $< | \
- ./utility autolabel --host=https://tr.wikipedia.org \
- --trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \
- --trusted-edits=1000 \
- --verbose > $@
-
- datasets/trwiki.revisions_to_review.20k_2016.json: \
- datasets/trwiki.autolabeled_revisions.20k_2016.json
- cat $< | \
- grep '"needs_review": true' > $@
-
- datasets/trwiki.autolabeled_revisions.w_cache.20k_2016.json: \
? ^
+ datasets/trwiki.autolabeled_revisions.w_cache.20k_2015.json: \
? ^
- datasets/trwiki.autolabeled_revisions.20k_2016.json
? ^
+ datasets/trwiki.autolabeled_revisions.20k_2015.json
? ^
cat $< | \
revscoring extract \
editquality.feature_lists.trwiki.reverted \
--host https://tr.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
- tuning_reports/trwiki.reverted.md: \
- datasets/trwiki.labeled_revisions.w_cache.20k_2015.json
? --
+ datasets/trwiki.labeled_revisions.w_cache.20k_2015.json: \
? +++
+ datasets/trwiki.labeled_revisions.20k_2015.json
cat $< | \
- revscoring tune \
? ^^^
+ revscoring extract \
? ++ ^^^^
- config/classifiers.params.yaml \
- editquality.feature_lists.trwiki.reverted \
? -------
+ editquality.feature_lists.trwiki.damaging \
? +++++++
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.11489598623272763" \
- --pop-rate "false=0.8851040137672723" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/trwiki.reverted.gradient_boosting.model: \
- datasets/trwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.trwiki.reverted \
? ^^^^^ ^^
+ editquality.feature_lists.trwiki.goodfaith \
? ^^^^^^^ ^
+ --host https://tr.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.11489598623272763" \
- --pop-rate "false=0.8851040137672723" \
- --center --scale > $@
tuning_reports/trwiki.damaging.md: \
datasets/trwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.trwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.0495014425266994" \
--pop-rate "false=0.9504985574733006" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/trwiki.damaging.gradient_boosting.model: \
datasets/trwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.trwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.0495014425266994" \
--pop-rate "false=0.9504985574733006" \
--center --scale > $@
tuning_reports/trwiki.goodfaith.md: \
datasets/trwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.trwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9538897605911829" \
- --pop-rate "false=0.04611023940881713" \
? ^^
+ --pop-rate "false=0.04611023940881709" \
? ^^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/trwiki.goodfaith.gradient_boosting.model: \
datasets/trwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.trwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9538897605911829" \
- --pop-rate "false=0.04611023940881713" \
? ^^
+ --pop-rate "false=0.04611023940881709" \
? ^^
--center --scale > $@
trwiki_models: \
- models/trwiki.damaging.gradient_boosting.model \
? -
+ models/trwiki.damaging.gradient_boosting.model \
- models/trwiki.goodfaith.gradient_boosting.model
? -
+ models/trwiki.goodfaith.gradient_boosting.model
trwiki_tuning_reports: \
- tuning_reports/trwiki.damaging.md \
? -
+ tuning_reports/trwiki.damaging.md \
- tuning_reports/trwiki.goodfaith.md
? -
+ tuning_reports/trwiki.goodfaith.md
amsa@C235:~/editquality$ python differ.py "Chinese Wikipedia"
- ############################### Chinese Wikipedia #############################
? --
+ ############################# Chinese Wikipedia ################################
? +++
-
datasets/zhwiki.sampled_revisions.100k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/131979/output/0/json-lines?download=true > $@
datasets/zhwiki.autolabeled_revisions.100k_2016.json: \
datasets/zhwiki.sampled_revisions.100k_2016.json
cat $< | \
./utility autolabel --host=https://zh.wikipedia.org \
--trusted-groups=checkuser,bureaucrat,sysop,eliminator,bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
- datasets/zhwiki.revisions_to_review.5k_2016.json: \
? ^
+ datasets/zhwiki.revisions_for_review.5k_2016.json: \
? ^ +
datasets/zhwiki.autolabeled_revisions.100k_2016.json
+ ( \
- (cat $< | \
? ^
+ cat $< | \
? ^
grep '"needs_review": true' | \
shuf -n 2500; \
cat $< | \
grep '"needs_review": false' | \
shuf -n 2500 \
- ) | shuf > $@+ ) | shuf > $@
? +
+
+ datasets/zhwiki.autolabeled_revisions.w_cache.100k_2016.json: \
+ datasets/zhwiki.autolabeled_revisions.100k_2016.json
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.zhwiki.reverted \
+ --host https://zh.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
- ############################### Urdu Wikipedia #################################
? -- -
+ ############################# Urdu Wikipedia ################################
-
datasets/urwiki.sampled_revisions.500k_2015.json:
wget -qO- http://quarry.wmflabs.org/run/64277/output/0/json-lines?download=true > $@
datasets/urwiki.autolabeled_revisions.500k_2015.json: \
datasets/urwiki.sampled_revisions.500k_2015.json
cat $< | \
./utility autolabel --host=https://ur.wikipedia.org \
--trusted-groups=bot,bureaucrat,sysop,rollbackers \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
-
datasets/urwiki.revisions_for_review.5k_2015.json: \
datasets/urwiki.autolabeled_revisions.500k_2015.json
( \
- cat $< | \
? ^
+ cat $< | \
? ^
- grep '"needs_review": true' | \
? ^
+ grep '"needs_review": true' | \
? ^
- shuf -n 2500; \
? ^
+ shuf -n 2500; \
? ^
- cat $< | \
? -
+ cat $< | \
- grep '"needs_review": false' | \
? -
+ grep '"needs_review": false' | \
- shuf -n 2500 \
? -
+ shuf -n 2500 \
) | shuf > $@
+ datasets/urwiki.autolabeled_revisions.w_cache.500k_2015.json: \
+ datasets/urwiki.autolabeled_revisions.500k_2015.json
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.urwiki.reverted \
+ --host https://ur.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
- #urwiki_models: \
- # models/urwiki.reverted.gradient_boosting.model
-
- #urwiki_tuning_reports: \
- # tuning_reports/urwiki.reverted.md
- ############################## Serbian Wikipedia ################################
? -
+ ############################# Serbian Wikipedia ################################
- # From https://quarry.wmflabs.org/query/22469
datasets/srwiki.sampled_revisions.120k_2017.json:
wget -qO- https://quarry.wmflabs.org/run/211097/output/0/json-lines?download=true > $@
datasets/srwiki.autolabeled_revisions.120k_2017.json: \
datasets/srwiki.sampled_revisions.120k_2017.json
cat $< | \
./utility autolabel --host=https://sr.wikipedia.org \
--trusted-groups=autopatrolled,bot,bureaucrat,patroller,rollbacker,sysop \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/srwiki.revisions_for_review.5k_2017.json: \
datasets/srwiki.autolabeled_revisions.120k_2017.json
grep '"needs_review": true' $< | shuf > $@
+
+ datasets/srwiki.autolabeled_revisions.w_cache.120k_2017.json: \
+ datasets/srwiki.autolabeled_revisions.120k_2017.json
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.srwiki.reverted \
+ --host https://sr.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
- ################################# Latvian Wikipedia ############################
? ----
+ ############################# Latvian Wikipedia ################################
? ++++
-
# From https://quarry.wmflabs.org/query/17989
datasets/lvwiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/169100/output/0/json-lines?download=true > $@
datasets/lvwiki.autolabeled_revisions.20k_2016.json: \
datasets/lvwiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://lv.wikipedia.org \
--trusted-groups=sysop,bureaucrat,bot,oversight,checkuser,patroller,autopatrolled \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+ datasets/lvwiki.revisions_for_review.5k_2016.json: \
+ datasets/lvwiki.autolabeled_revisions.20k_2016.json
+ grep '"needs_review": true' $< | shuf > $@
+
+ datasets/lvwiki.autolabeled_revisions.w_cache.20k_2016.json: \
+ datasets/lvwiki.autolabeled_revisions.20k_2016.json
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.lvwiki.reverted \
+ --host https://lv.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
############################# Azeri Wikipedia ################################
-
datasets/azwiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/99533/output/0/json-lines?download=true > $@
datasets/azwiki.autolabeled_revisions.20k_2016.json: \
datasets/azwiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://az.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/azwiki.revisions_for_review.5k_2016.json: \
datasets/azwiki.autolabeled_revisions.20k_2016.json
( \
- cat $< | \
? -
+ cat $< | \
- grep '"needs_review": true' | \
? -
+ grep '"needs_review": true' | \
- shuf -n 2500; \
? -
+ shuf -n 2500; \
- cat $< | \
? -
+ cat $< | \
- grep '"needs_review": false' | \
? -
+ grep '"needs_review": false' | \
- shuf -n 2500 \
? -
+ shuf -n 2500 \
) | shuf > $@
+
+ datasets/azwiki.autolabeled_revisions.w_cache.20k_2016.json: \
+ datasets/azwiki.autolabeled_revisions.20k_2016.json
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.azwiki.reverted \
+ --host https://az.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
+
amsa@C235:~/editquality$ python my_differ.oskol.py
- ############################# English Wikipedia ###############################
+ ############################# English Wikipedia ################################
? +
datasets/enwiki.human_labeled_revisions.20k_2015.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/enwiki/4/ > $@
- datasets/enwiki.labeled_revisions.20k_2015.json: \
- datasets/enwiki.human_labeled_revisions.20k_2015.json
- cat $< | \
- ./utility autolabel --host=https://en.wikipedia.org \
- --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
- --trusted-edits=1000 \
- --verbose > $@
-
- datasets/enwiki.labeled_revisions.w_cache.20k_2015.json: \
+ datasets/enwiki.autolabeled_revisions.w_cache.20k_2015.json: \
? ++++
- datasets/enwiki.labeled_revisions.20k_2015.json
+ datasets/enwiki.autolabeled_revisions.20k_2015.json
? ++++
cat $< | \
revscoring extract \
editquality.feature_lists.enwiki.reverted \
- editquality.feature_lists.enwiki.goodfaith \
- editquality.feature_lists.enwiki.damaging \
--host https://en.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/enwiki.reverted.md: \
- datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.enwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.0728788421214136" \
- --pop-rate "false=0.9271211578785864" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/enwiki.reverted.gradient_boosting.model: \
- datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.enwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.0728788421214136" \
- --pop-rate "false=0.9271211578785864" \
- --center --scale > $@
-
tuning_reports/enwiki.damaging.md: \
datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.enwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.034163555464634586" \
--pop-rate "false=0.9658364445353654" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/enwiki.damaging.gradient_boosting.model: \
datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.enwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.034163555464634586" \
--pop-rate "false=0.9658364445353654" \
--center --scale > $@
tuning_reports/enwiki.goodfaith.md: \
datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.enwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9671661637600368" \
- --pop-rate "false=0.03283383623996318" \
? ^
+ --pop-rate "false=0.032833836239963166" \
? ^^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/enwiki.goodfaith.gradient_boosting.model: \
datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.enwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9671661637600368" \
- --pop-rate "false=0.03283383623996318" \
? ^
+ --pop-rate "false=0.032833836239963166" \
? ^^
--center --scale > $@
enwiki_models: \
- models/enwiki.damaging.gradient_boosting.model \
? -
+ models/enwiki.damaging.gradient_boosting.model \
- models/enwiki.goodfaith.gradient_boosting.model
? -
+ models/enwiki.goodfaith.gradient_boosting.model
enwiki_tuning_reports: \
- tuning_reports/enwiki.damaging.md \
? -
+ tuning_reports/enwiki.damaging.md \
- tuning_reports/enwiki.goodfaith.md
? - -
+ tuning_reports/enwiki.goodfaith.md-
############################# French Wikipedia ################################
-
- datasets/frwiki.sampled_revisions.20k_2015.json:
- wget -qO- https://quarry.wmflabs.org/run/48090/output/0/json-lines?download=true > $@
-
datasets/frwiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/98251/output/0/json-lines?download=true > $@
datasets/frwiki.autolabeled_revisions.20k_2016.json: \
datasets/frwiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://fr.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+
+ datasets/frwiki.sampled_revisions.20k_2015.json:
+ wget -qO- https://quarry.wmflabs.org/run/48090/output/0/json-lines?download=true > $@
+
+ datasets/frwiki.autolabeled_revisions.20k_2015.json: \
+ datasets/frwiki.sampled_revisions.20k_2015.json
+ cat $< | \
+ ./utility autolabel --host=https://fr.wikipedia.org \
+ --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
+ --trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
+ --verbose > $@
+
+ datasets/frwiki.human_labeled_revisions.5k_2016.json:
+ ./utility fetch_labels \
+ https://labels.wmflabs.org/campaigns/frwiki/38/ > $@
datasets/frwiki.revisions_for_review.5k_2016.json: \
datasets/frwiki.autolabeled_revisions.20k_2016.json
( \
- cat $< | \
? -
+ cat $< | \
- grep '"needs_review": true' | \
? -
+ grep '"needs_review": true' | \
- shuf -n 2500; \
? -
+ shuf -n 2500; \
- cat $< | \
? -
+ cat $< | \
- grep '"needs_review": false' | \
? -
+ grep '"needs_review": false' | \
- shuf -n 2500 \
? -
+ shuf -n 2500 \
- ) | shuf > $@
? -
+ ) | shuf > $@
+
+ datasets/frwiki.labeled_revisions.20k_2016.json: \
+ datasets/frwiki.autolabeled_revisions.20k_2016.json \
+ datasets/frwiki.human_labeled_revisions.5k_2016.json
+ ./utility merge_labels $^ > $@
datasets/frwiki.autolabeled_revisions.w_cache.20k_2016.json: \
datasets/frwiki.autolabeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
editquality.feature_lists.frwiki.reverted \
--host https://fr.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
- tuning_reports/frwiki.reverted.md: \
- datasets/frwiki.autolabeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.frwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.035263474253656585" \
- --pop-rate "false=0.9647365257463434" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/frwiki.reverted.gradient_boosting.model: \
- datasets/frwiki.autolabeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.frwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.035263474253656585" \
- --pop-rate "false=0.9647365257463434" \
- --center --scale > $@
-
- datasets/frwiki.human_labeled_revisions.5k_2016.json:
- ./utility fetch_labels \
- https://labels.wmflabs.org/campaigns/frwiki/38/ > $@
-
- datasets/frwiki.labeled_revisions.20k_2016.json: \
- datasets/frwiki.human_labeled_revisions.5k_2016.json \
- datasets/frwiki.autolabeled_revisions.20k_2016.json
- ./utility merge_labels $^ > $@
-
datasets/frwiki.labeled_revisions.w_cache.20k_2016.json: \
datasets/frwiki.labeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
- editquality.feature_lists.frwiki.reverted \
editquality.feature_lists.frwiki.damaging \
editquality.feature_lists.frwiki.goodfaith \
--host https://fr.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/frwiki.damaging.md: \
datasets/frwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.frwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.028751753155680224" \
--pop-rate "false=0.9712482468443198" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/frwiki.damaging.gradient_boosting.model: \
datasets/frwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.frwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.028751753155680224" \
--pop-rate "false=0.9712482468443198" \
--center --scale > $@
tuning_reports/frwiki.goodfaith.md: \
datasets/frwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.frwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9786115007012622" \
- --pop-rate "false=0.021388499298737727" \
? -
+ --pop-rate "false=0.021388499298737762" \
? +
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/frwiki.goodfaith.gradient_boosting.model: \
datasets/frwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.frwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9786115007012622" \
- --pop-rate "false=0.021388499298737727" \
? -
+ --pop-rate "false=0.021388499298737762" \
? +
--center --scale > $@
frwiki_models: \
- models/frwiki.damaging.gradient_boosting.model \
? -
+ models/frwiki.damaging.gradient_boosting.model \
- models/frwiki.goodfaith.gradient_boosting.model
? -
+ models/frwiki.goodfaith.gradient_boosting.model
frwiki_tuning_reports: \
- tuning_reports/frwiki.damaging.md \
? -
+ tuning_reports/frwiki.damaging.md \
- tuning_reports/frwiki.goodfaith.md
? - -
+ tuning_reports/frwiki.goodfaith.md
############################# Finnish Wikipedia ################################
+ # From https://quarry.wmflabs.org/query/19212
+ datasets/fiwiki.sampled_revisions.20k_2017.json:
+ wget -qO- https://quarry.wmflabs.org/run/181764/output/0/json-lines?download=true > $@
+
+ datasets/fiwiki.autolabeled_revisions.20k_2017.json: \
+ datasets/fiwiki.sampled_revisions.20k_2017.json
+ cat $< | \
+ ./utility autolabel --host=https://fi.wikipedia.org \
+ --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,autoreview,abusefilter,bureaucrat \
+ --trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
+ --verbose > $@
datasets/fiwiki.sampled_revisions.20k_2016.json:
wget -qO- https://quarry.wmflabs.org/run/161254/output/0/json-lines?download=true > $@
-
- # From https://quarry.wmflabs.org/query/19212
- datasets/fiwiki.sampled_revisions.20k_2017.json:
- wget -qO- https://quarry.wmflabs.org/run/181764/output/0/json-lines?download=true > $@
datasets/fiwiki.autolabeled_revisions.20k_2016.json: \
datasets/fiwiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://fi.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,autoreview,abusefilter,bureaucrat \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
- --verbose > $@
-
- datasets/fiwiki.autolabeled_revisions.20k_2017.json: \
- datasets/fiwiki.sampled_revisions.20k_2017.json
- cat $< | \
- ./utility autolabel --host=https://fi.wikipedia.org \
- --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,autoreview,abusefilter,bureaucrat \
- --trusted-edits=1000 \
--verbose > $@
datasets/fiwiki.human_labeled_revisions.5k_2016.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/fiwiki/55/ > $@
datasets/fiwiki.labeled_revisions.20k_2016.json: \
+ datasets/fiwiki.autolabeled_revisions.20k_2016.json \
- datasets/fiwiki.human_labeled_revisions.5k_2016.json \
? --
+ datasets/fiwiki.human_labeled_revisions.5k_2016.json
+ ./utility merge_labels $^ > $@
+
+ datasets/fiwiki.autolabeled_revisions.w_cache.20k_2016.json: \
datasets/fiwiki.autolabeled_revisions.20k_2016.json
- ./utility merge_labels $^ > $@
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.fiwiki.reverted \
+ --host https://fi.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json: \
datasets/fiwiki.labeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
- editquality.feature_lists.fiwiki.reverted \
editquality.feature_lists.fiwiki.damaging \
editquality.feature_lists.fiwiki.goodfaith \
--host https://fi.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/fiwiki.reverted.md: \
- datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.fiwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.053624130858886496" \
- --pop-rate "false=0.9463758691411135" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/fiwiki.reverted.gradient_boosting.model: \
- datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.fiwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).1 \
- -p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.053624130858886496" \
- --pop-rate "false=0.9463758691411135" \
- --center --scale > $@
tuning_reports/fiwiki.damaging.md: \
datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.fiwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.051323095392926815" \
--pop-rate "false=0.9486769046070732" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/fiwiki.damaging.gradient_boosting.model: \
datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.fiwiki.damaging \
damaging \
--version=$(damaging_major_minor).1 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.051323095392926815" \
--pop-rate "false=0.9486769046070732" \
--center --scale > $@
tuning_reports/fiwiki.goodfaith.md: \
datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.fiwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9658846480916412" \
- --pop-rate "false=0.03411535190835876" \
? ^
+ --pop-rate "false=0.03411535190835879" \
? ^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/fiwiki.goodfaith.gradient_boosting.model: \
datasets/fiwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.fiwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).1 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9658846480916412" \
- --pop-rate "false=0.03411535190835876" \
? ^
+ --pop-rate "false=0.03411535190835879" \
? ^
--center --scale > $@
fiwiki_models: \
- models/fiwiki.damaging.gradient_boosting.model \
? -
+ models/fiwiki.damaging.gradient_boosting.model \
- models/fiwiki.goodfaith.gradient_boosting.model
? -
+ models/fiwiki.goodfaith.gradient_boosting.model
fiwiki_tuning_reports: \
- tuning_reports/fiwiki.damaging.md \
? -
+ tuning_reports/fiwiki.damaging.md \
- tuning_reports/fiwiki.goodfaith.md
? - -
+ tuning_reports/fiwiki.goodfaith.md
- ############################### Dutch Wikipedia ###############################
? --
+ ############################# Dutch Wikipedia ################################
? +
-
datasets/nlwiki.sampled_revisions.20k_2016.json:
wget -qO- http://quarry.wmflabs.org/run/42225/output/0/json-lines?download=true > $@
datasets/nlwiki.autolabeled_revisions.20k_2016.json: \
datasets/nlwiki.sampled_revisions.20k_2016.json
cat $< | \
./utility autolabel --host=https://nl.wikipedia.org \
--trusted-groups=abusefilter,arbcom,bureaucrat,checkuser,rollbacker,sysop,bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
-
- tuning_reports/nlwiki.reverted.md: \
- datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.nlwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.06322309818777516" \
- --pop-rate "false=0.9367769018122248" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/nlwiki.reverted.gradient_boosting.model: \
- datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.nlwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.06322309818777516" \
- --pop-rate "false=0.9367769018122248" \
- --center --scale > $@
datasets/nlwiki.human_labeled_revisions.5k_2016.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/nlwiki/14/ > $@
datasets/nlwiki.labeled_revisions.20k_2016.json: \
+ datasets/nlwiki.autolabeled_revisions.20k_2016.json \
- datasets/nlwiki.human_labeled_revisions.5k_2016.json \
? --
+ datasets/nlwiki.human_labeled_revisions.5k_2016.json
+ ./utility merge_labels $^ > $@
+
+ datasets/nlwiki.autolabeled_revisions.w_cache.20k_2016.json: \
datasets/nlwiki.autolabeled_revisions.20k_2016.json
- ./utility merge_labels $^ > $@
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.nlwiki.reverted \
+ --host https://nl.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json: \
datasets/nlwiki.labeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
- editquality.feature_lists.nlwiki.reverted \
editquality.feature_lists.nlwiki.damaging \
editquality.feature_lists.nlwiki.goodfaith \
--host https://nl.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/nlwiki.damaging.md: \
datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.nlwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.05068086413432989" \
--pop-rate "false=0.9493191358656701" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/nlwiki.damaging.gradient_boosting.model: \
datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.nlwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.05068086413432989" \
--pop-rate "false=0.9493191358656701" \
--center --scale > $@
tuning_reports/nlwiki.goodfaith.md: \
datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.nlwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9646257806900789" \
- --pop-rate "false=0.035374219309921164" \
? ^^
+ --pop-rate "false=0.03537421930992113" \
? ^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/nlwiki.goodfaith.gradient_boosting.model: \
datasets/nlwiki.labeled_revisions.w_cache.20k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.nlwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9646257806900789" \
- --pop-rate "false=0.035374219309921164" \
? ^^
+ --pop-rate "false=0.03537421930992113" \
? ^
--center --scale > $@
nlwiki_models: \
- models/nlwiki.damaging.gradient_boosting.model \
? -
+ models/nlwiki.damaging.gradient_boosting.model \
- models/nlwiki.goodfaith.gradient_boosting.model
? -
+ models/nlwiki.goodfaith.gradient_boosting.model
nlwiki_tuning_reports: \
- tuning_reports/nlwiki.damaging.md \
? -
+ tuning_reports/nlwiki.damaging.md \
- tuning_reports/nlwiki.goodfaith.md
? - -
+ tuning_reports/nlwiki.goodfaith.md
- ############################# Portugueses Wikipedia ############################
+ ############################# Portugueses Wikipedia ################################
? ++++
datasets/ptwiki.human_labeled_revisions.20k_2015.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/ptwiki/7/ > $@
- datasets/ptwiki.labeled_revisions.20k_2015.json: \
- datasets/ptwiki.human_labeled_revisions.20k_2015.json
- cat $< | \
- ./utility autolabel --host=https://pt.wikipedia.org \
- --trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,flow-bot \
- --trusted-edits=1000 \
- --verbose > $@
-
- datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json: \
+ datasets/ptwiki.autolabeled_revisions.w_cache.20k_2015.json: \
? ++++
- datasets/ptwiki.labeled_revisions.20k_2015.json
+ datasets/ptwiki.autolabeled_revisions.20k_2015.json
? ++++
cat $< | \
revscoring extract \
editquality.feature_lists.ptwiki.reverted \
--host https://pt.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
-
- tuning_reports/ptwiki.reverted.md: \
- datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.ptwiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.10170004540180598" \
- --pop-rate "false=0.898299954598194" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/ptwiki.reverted.gradient_boosting.model: \
- datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json
- cat $< | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.ptwiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=700' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.10170004540180598" \
- --pop-rate "false=0.898299954598194" \
- --center --scale > $@
tuning_reports/ptwiki.damaging.md: \
datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.ptwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.06896029864299047" \
--pop-rate "false=0.9310397013570095" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/ptwiki.damaging.gradient_boosting.model: \
datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.ptwiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.06896029864299047" \
--pop-rate "false=0.9310397013570095" \
--center --scale > $@
tuning_reports/ptwiki.goodfaith.md: \
datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.ptwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9397669373959542" \
- --pop-rate "false=0.0602330626040458" \
+ --pop-rate "false=0.06023306260404582" \
? +
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/ptwiki.goodfaith.gradient_boosting.model: \
datasets/ptwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.ptwiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9397669373959542" \
- --pop-rate "false=0.0602330626040458" \
+ --pop-rate "false=0.06023306260404582" \
? +
--center --scale > $@
ptwiki_models: \
- models/ptwiki.damaging.gradient_boosting.model \
? -
+ models/ptwiki.damaging.gradient_boosting.model \
- models/ptwiki.goodfaith.gradient_boosting.model
? -
+ models/ptwiki.goodfaith.gradient_boosting.model
ptwiki_tuning_reports: \
- tuning_reports/ptwiki.damaging.md \
? -
+ tuning_reports/ptwiki.damaging.md \
- tuning_reports/ptwiki.goodfaith.md
? - -
+ tuning_reports/ptwiki.goodfaith.md
############################# Persian Wikipedia ################################
- datasets/fawiki.human_labeled_revisions.20k_2015.json:
? ^^ ^^^^^^^
+ datasets/fawiki.sampled_revisions.20k_2015.json:
? ^^ ^
+ wget -qO- http://quarry.wmflabs.org/run/59580/output/0/json-lines?download=true > $@
- ./utility fetch_labels \
- https://labels.wmflabs.org/campaigns/fawiki/6/ > $@
- datasets/fawiki.human_labeled_revisions.5k_2016.json:
- ./utility fetch_labels \
- https://labels.wmflabs.org/campaigns/fawiki/21/ > $@
-
- datasets/fawiki.labeled_revisions.20k_2015.json: \
+ datasets/fawiki.autolabeled_revisions.20k_2015.json: \
? ++++
- datasets/fawiki.human_labeled_revisions.20k_2015.json
? ^^ ^^^^^^^
+ datasets/fawiki.sampled_revisions.20k_2015.json
? ^^ ^
cat $< | \
./utility autolabel --host=https://fa.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,flow-bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
- --verbose > $@
-
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json: \
- datasets/fawiki.labeled_revisions.20k_2015.json
- cat $< | \
- revscoring extract \
- editquality.feature_lists.fawiki.reverted \
- editquality.feature_lists.fawiki.damaging \
- editquality.feature_lists.fawiki.goodfaith \
- --host https://fa.wikipedia.org \
- --extractor $(max_extractors) \
--verbose > $@
datasets/fawiki.sampled_revisions.2.20k_2015.json:
wget -qO- http://quarry.wmflabs.org/run/59580/output/0/json-lines?download=true > $@
datasets/fawiki.autolabeled_revisions.2.20k_2015.json: \
datasets/fawiki.sampled_revisions.2.20k_2015.json
cat $< | \
./utility autolabel --host=https://fa.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat,flow-bot \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+ datasets/fawiki.human_labeled_revisions.5k_2016.json:
+ ./utility fetch_labels \
+ https://labels.wmflabs.org/campaigns/fawiki/21/ > $@
+
+ datasets/fawiki.human_labeled_revisions.5k_2015.json:
+ ./utility fetch_labels \
+ https://labels.wmflabs.org/campaigns/fawiki/6/ > $@
+
datasets/fawiki.labeled_revisions.20k_2016.json: \
- datasets/fawiki.human_labeled_revisions.5k_2016.json \
- datasets/fawiki.autolabeled_revisions.2.20k_2015.json
+ datasets/fawiki.autolabeled_revisions.2.20k_2015.json \
? ++
+ datasets/fawiki.human_labeled_revisions.5k_2016.json
./utility merge_labels $^ > $@
+
+ datasets/fawiki.autolabeled_revisions.w_cache.20k_2015.json: \
+ datasets/fawiki.autolabeled_revisions.20k_2015.json
+ cat $< | \
+ revscoring extract \
+ editquality.feature_lists.fawiki.reverted \
+ --host https://fa.wikipedia.org \
+ --extractor $(max_extractors) \
+ --verbose > $@
datasets/fawiki.labeled_revisions.w_cache.20k_2016.json: \
datasets/fawiki.labeled_revisions.20k_2016.json
cat $< | \
revscoring extract \
- editquality.feature_lists.fawiki.reverted \
editquality.feature_lists.fawiki.damaging \
editquality.feature_lists.fawiki.goodfaith \
--host https://fa.wikipedia.org \
+ --extractor $(max_extractors) \
--verbose > $@
- tuning_reports/fawiki.reverted.md: \
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json \
- datasets/fawiki.labeled_revisions.w_cache.20k_2016.json
- cat $^ | \
- revscoring tune \
- config/classifiers.params.yaml \
- editquality.feature_lists.fawiki.reverted \
- reverted_for_damage \
- roc_auc.labels.true \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.029729045327931122" \
- --pop-rate "false=0.9702709546720689" \
- --center --scale \
- --cv-timeout=60 \
- --debug > $@
-
- models/fawiki.reverted.gradient_boosting.model: \
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json \
- datasets/fawiki.labeled_revisions.w_cache.20k_2016.json
- cat $^ | \
- revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
- editquality.feature_lists.fawiki.reverted \
- reverted_for_damage \
- --version=$(reverted_major_minor).0 \
- -p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
- -p 'n_estimators=500' \
- --label-weight "true=$(reverted_weight)" \
- --pop-rate "true=0.029729045327931122" \
- --pop-rate "false=0.9702709546720689" \
- --center --scale > $@
-
tuning_reports/fawiki.damaging.md: \
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json \
? --
+ datasets/fawiki.labeled_revisions.w_cache.20k_2015.json
- datasets/fawiki.labeled_revisions.w_cache.20k_2016.json
- cat $^ | \
? ^
+ cat $< | \
? ^
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.fawiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.0297029702970297" \
- --pop-rate "false=0.9702717074776531" \
? ^^ ^ ^^^ -
+ --pop-rate "false=0.9702970297029703" \
? ^ ++ ^^^ ^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/fawiki.damaging.gradient_boosting.model: \
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json \
? --
+ datasets/fawiki.labeled_revisions.w_cache.20k_2015.json
- datasets/fawiki.labeled_revisions.w_cache.20k_2016.json
- cat $^ | \
? ^
+ cat $< | \
? ^
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.fawiki.damaging \
damaging \
--version=$(damaging_major_minor).0 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "true=$(damaging_weight)" \
--pop-rate "true=0.0297029702970297" \
- --pop-rate "false=0.9702717074776531" \
? ^^ ^ ^^^ -
+ --pop-rate "false=0.9702970297029703" \
? ^ ++ ^^^ ^
--center --scale > $@
tuning_reports/fawiki.goodfaith.md: \
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json \
? --
+ datasets/fawiki.labeled_revisions.w_cache.20k_2015.json
- datasets/fawiki.labeled_revisions.w_cache.20k_2016.json
- cat $^ | \
? ^
+ cat $< | \
? ^
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.fawiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9834641681438339" \
- --pop-rate "false=0.016535831856166118" \
? ^^
+ --pop-rate "false=0.01653583185616614" \
? ^
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/fawiki.goodfaith.gradient_boosting.model: \
- datasets/fawiki.labeled_revisions.w_cache.20k_2015.json \
? --
+ datasets/fawiki.labeled_revisions.w_cache.20k_2015.json
- datasets/fawiki.labeled_revisions.w_cache.20k_2016.json
- cat $^ | \
? ^
+ cat $< | \
? ^
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.fawiki.goodfaith \
goodfaith \
--version=$(goodfaith_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=500' \
--label-weight "false=$(goodfaith_weight)" \
--pop-rate "true=0.9834641681438339" \
- --pop-rate "false=0.016535831856166118" \
? ^^
+ --pop-rate "false=0.01653583185616614" \
? ^
--center --scale > $@
fawiki_models: \
- models/fawiki.damaging.gradient_boosting.model \
? -
+ models/fawiki.damaging.gradient_boosting.model \
- models/fawiki.goodfaith.gradient_boosting.model
? -
+ models/fawiki.goodfaith.gradient_boosting.model
fawiki_tuning_reports: \
- tuning_reports/fawiki.damaging.md \
? -
+ tuning_reports/fawiki.damaging.md \
- tuning_reports/fawiki.goodfaith.md
? - -
+ tuning_reports/fawiki.goodfaith.md
- ############################### Croatian Wikipedia ###########################
? --
+ ############################# Croatian Wikipedia ################################
? +++++
-
# From https://quarry.wmflabs.org/query/21213
datasets/hrwiki.sampled_revisions.20k_2017.json:
wget -qO- https://quarry.wmflabs.org/run/200064/output/0/json-lines?download=true > $@
datasets/hrwiki.autolabeled_revisions.20k_2017.json: \
datasets/hrwiki.sampled_revisions.20k_2017.json
cat $< | \
./utility autolabel --host=https://hr.wikipedia.org \
--trusted-groups=autopatrolled,bot,bureaucrat,checkuser,reviewer,rollbacker,sysop \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
datasets/hrwiki.revisions_for_review.5k_2017.json: \
datasets/hrwiki.autolabeled_revisions.20k_2017.json
grep '"needs_review": true' $< | shuf > $@
datasets/hrwiki.autolabeled_revisions.w_cache.20k_2017.json: \
datasets/hrwiki.autolabeled_revisions.20k_2017.json
cat $< | \
revscoring extract \
editquality.feature_lists.hrwiki.reverted \
--host https://hr.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/hrwiki.reverted.md: \
datasets/hrwiki.autolabeled_revisions.w_cache.20k_2017.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.hrwiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.07927353670258512" \
--pop-rate "false=0.9207264632974149" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/hrwiki.reverted.gradient_boosting.model: \
datasets/hrwiki.autolabeled_revisions.w_cache.20k_2017.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.hrwiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.1' \
-p 'max_depth=3' \
- -p 'learning_rate=0.1' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=300' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.07927353670258512" \
--pop-rate "false=0.9207264632974149" \
--center --scale > $@
hrwiki_models: \
models/hrwiki.reverted.gradient_boosting.model
hrwiki_tuning_reports: \
- tuning_reports/hrwiki.reverted.md
? -
+ tuning_reports/hrwiki.reverted.md
- ############################### Indonesian Wikipedia ##########################
? --
+ ############################# Indonesian Wikipedia ################################
? ++++++
-
datasets/idwiki.sampled_revisions.100k_2016.json:
wget -qO- http://quarry.wmflabs.org/run/135748/output/0/json-lines?download=true > $@
datasets/idwiki.autolabeled_revisions.100k_2016.json: \
datasets/idwiki.sampled_revisions.100k_2016.json
cat $< | \
./utility autolabel --host=https://id.wikipedia.org \
--trusted-groups=autoreview,bot,bureaucrat,checkuser,editor,flow-bot,oversight,reviewer,rollbacker,sysop \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+
datasets/idwiki.autolabeled_revisions.w_cache.100k_2016.json: \
datasets/idwiki.autolabeled_revisions.100k_2016.json
cat $< | \
revscoring extract \
editquality.feature_lists.idwiki.reverted \
--host https://id.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/idwiki.reverted.md: \
datasets/idwiki.autolabeled_revisions.w_cache.100k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.idwiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.02272613605673532" \
--pop-rate "false=0.9772738639432647" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/idwiki.reverted.gradient_boosting.model: \
datasets/idwiki.autolabeled_revisions.w_cache.100k_2016.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.idwiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=5' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.02272613605673532" \
--pop-rate "false=0.9772738639432647" \
--center --scale > $@
idwiki_models: \
- models/idwiki.reverted.gradient_boosting.model
? -
+ models/idwiki.reverted.gradient_boosting.model
idwiki_tuning_reports: \
- tuning_reports/idwiki.reverted.md
? - -
+ tuning_reports/idwiki.reverted.md
- ############################# Icelandic Wikipedia #############################
+ ############################# Icelandic Wikipedia ################################
? +++
-
- # From https://quarry.wmflabs.org/query/23305
datasets/iswiki.sampled_revisions.20k_2017.json:
wget -qO- https://quarry.wmflabs.org/run/218976/output/0/json-lines?download=true > $@
datasets/iswiki.autolabeled_revisions.20k_2017.json: \
- datasets/iswiki.sampled_revisions.20k_2017.json
? ^^^^^^^^
+ datasets/iswiki.sampled_revisions.20k_2017.json
? ^
cat $< | \
./utility autolabel --host=https://is.wikipedia.org \
- --trusted-groups=autopatrolled,bot,bureaucrat,checkuser,reviewer,rollbacker,sysop \
? ^^^^^^^^
+ --trusted-groups=autopatrolled,bot,bureaucrat,checkuser,reviewer,rollbacker,sysop \
? ^
- --trusted-edits=1000 \
? ^^^^^^^^
+ --trusted-edits=1000 \
? ^
+ --revert-radius=3 \
+ --revert-window=48 \
- --verbose > $@
? ^^^^^^^^
+ --verbose > $@
? ^
datasets/iswiki.revisions_for_review.5k_2017.json: \
- datasets/iswiki.autolabeled_revisions.20k_2017.json
? ^^^^^^^^
+ datasets/iswiki.autolabeled_revisions.20k_2017.json
? ^
grep '"needs_review": true' $< | shuf > $@
datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json: \
- datasets/iswiki.autolabeled_revisions.20k_2017.json
? ^^^^^^^^
+ datasets/iswiki.autolabeled_revisions.20k_2017.json
? ^
cat $< | \
revscoring extract \
- editquality.feature_lists.iswiki.reverted \
? ^^^^^^^^
+ editquality.feature_lists.iswiki.reverted \
? ^
- --host https://is.wikipedia.org \
? ^^^^^^^^
+ --host https://is.wikipedia.org \
? ^
- --extractor $(max_extractors) \
? ^^^^^^^^
+ --extractor $(max_extractors) \
? ^
- --verbose > $@
? ^^^^^^^^
+ --verbose > $@
? ^
tuning_reports/iswiki.reverted.md: \
- datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json
? ^^^^^^^^
+ datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json
? ^
cat $< | \
revscoring tune \
- config/classifiers.params.yaml \
? ^^^^^^^^
+ config/classifiers.params.yaml \
? ^
- editquality.feature_lists.iswiki.reverted \
? ^^^^^^^^
+ editquality.feature_lists.iswiki.reverted \
? ^
- reverted_for_damage \
? ^^^^^^^^
+ reverted_for_damage \
? ^
- roc_auc.labels.true \
? ^^^^^^^^
+ roc_auc.labels.true \
? ^
- --label-weight "true=$(reverted_weight)" \
? ^^^^^^^^
+ --label-weight "true=$(reverted_weight)" \
? ^
- --pop-rate "true=0.08115405770288514" \
? ^^^^^^^^
+ --pop-rate "true=0.08115405770288514" \
? ^
- --pop-rate "false=0.9188459422971149" \
? ^^^^^^^^
+ --pop-rate "false=0.9188459422971149" \
? ^
- --center --scale \
? ^^^^^^^^
+ --center --scale \
? ^
- --cv-timeout=60 \
? ^^^^^^^^ ^
+ --cv-timeout 60 \
? ^ ^
- --debug > $@
? ^^^^^^^^
+ --debug > $@
? ^
models/iswiki.reverted.gradient_boosting.model: \
- datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json
? ^^^^^^^^
+ datasets/iswiki.autolabeled_revisions.w_cache.20k_2017.json
? ^
cat $< | \
revscoring cv_train \
- revscoring.scoring.models.GradientBoosting \
? ^^^^^^^^
+ revscoring.scoring.models.GradientBoosting \
? ^
- editquality.feature_lists.iswiki.reverted \
? ^^^^^^^^
+ editquality.feature_lists.iswiki.reverted \
? ^
- reverted_for_damage \
? ^^^^^^^^
+ reverted_for_damage \
? ^
- --version=$(reverted_major_minor).0 \
? ^^^^^^^^
+ --version=$(reverted_major_minor).0 \
? ^
- -p 'max_depth=7' \
- -p 'learning_rate=0.1' \
? ^^^^^^^^
+ -p 'learning_rate=0.1' \
? ^
+ -p 'max_depth=7' \
- -p 'max_features="log2"' \
? ^^^^^^^^ - -
+ -p 'max_features=log2' \
? ^
- -p 'n_estimators=300' \
? ^^^^^^^^
+ -p 'n_estimators=300' \
? ^
- --label-weight "true=$(reverted_weight)" \
? ^^^^^^^^
+ --label-weight "true=$(reverted_weight)" \
? ^
- --pop-rate "true=0.08115405770288514" \
? ^^^^^^^^
+ --pop-rate "true=0.08115405770288514" \
? ^
- --pop-rate "false=0.9188459422971149" \
? ^^^^^^^^
+ --pop-rate "false=0.9188459422971149" \
? ^
- --center --scale > $@
? ^^^^^^^^
+ --center --scale > $@
? ^
iswiki_models: \
models/iswiki.reverted.gradient_boosting.model
iswiki_tuning_reports: \
- tuning_reports/iswiki.reverted.md
? -
+ tuning_reports/iswiki.reverted.md
- ############################# Italian Wikipedia ###############################
+ ############################# Italian Wikipedia ################################
? +
-
datasets/itwiki.sampled_revisions.20k_2015.json:
wget -qO- http://quarry.wmflabs.org/run/42224/output/0/json-lines?download=true > $@
datasets/itwiki.autolabeled_revisions.20k_2015.json: \
datasets/itwiki.sampled_revisions.20k_2015.json
cat $< | \
./utility autolabel --host=https://it.wikipedia.org \
--trusted-groups=sysop,oversight,bot,rollbacker,checkuser,abusefilter,bureaucrat \
--trusted-edits=1000 \
+ --revert-radius=3 \
+ --revert-window=48 \
--verbose > $@
+
datasets/itwiki.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/itwiki.autolabeled_revisions.20k_2015.json
cat $< | \
revscoring extract \
editquality.feature_lists.itwiki.reverted \
--host https://it.wikipedia.org \
--extractor $(max_extractors) \
--verbose > $@
tuning_reports/itwiki.reverted.md: \
datasets/itwiki.autolabeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.itwiki.reverted \
reverted_for_damage \
roc_auc.labels.true \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.04628882613957241" \
--pop-rate "false=0.9537111738604276" \
--center --scale \
- --cv-timeout=60 \
? ^
+ --cv-timeout 60 \
? ^
--debug > $@
models/itwiki.reverted.gradient_boosting.model: \
datasets/itwiki.autolabeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.itwiki.reverted \
reverted_for_damage \
--version=$(reverted_major_minor).0 \
+ -p 'learning_rate=0.01' \
-p 'max_depth=7' \
- -p 'learning_rate=0.01' \
- -p 'max_features="log2"' \
? - -
+ -p 'max_features=log2' \
-p 'n_estimators=700' \
--label-weight "true=$(reverted_weight)" \
--pop-rate "true=0.04628882613957241" \
--pop-rate "false=0.9537111738604276" \
--center --scale > $@
itwiki_models: \
- models/itwiki.reverted.gradient_boosting.model
? -
+ models/itwiki.reverted.gradient_boosting.model
itwiki_tuning_reports: \
- tuning_reports/itwiki.reverted.md
? - -
+ tuning_reports/itwiki.reverted.md
@adamwight
Copy link

  • datasets/ukwiki.revisions_for_review.5k_2015.json: \

Harmless, but should be removed. That specific config is silly btw.

@Ladsgroup
Copy link
Author

Done, thanks for noticing it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment