https://github.com/armancohan/long-summarization
$wget https://storage.googleapis.com/allennlp-public-data/cnndm-combined-data-2020.07.13.tar.gz
$tar -xzf cnndm-combined-data-2020.07.13.tar.gz
$mv cnndm-combined-data-2020.07.13 cnn_dm
https://rajpurkar.github.io/SQuAD-explorer/
+ download from https://www.kaggle.com/amananandrai/ag-news-classification-dataset
+ download from https://www.kaggle.com/nltkdata/reuters
+ unzip files
+ run the script to prepare json files for training and test
$wget https://gist.githubusercontent.com/xinzhel/1bdd7b3f94539f83ce0d7beed320020a/raw/f7fd42bd643be75d48ba325629b8e86f11fca68c/reuters-json.py $python reuters-json.py
$ wget https://data.deepai.org/gigaword.zip $ unzip gigaword.zip $ mv sumdata gigaword
+ download from https://www.kaggle.com/crowdflower/twitter-user-gender-classification
$kaggle datasets download -d danofer/dbpedia-classes
$mkdir dbpedia_csv
$mv dbpedia-classes.zip dbpedia_csv/
$cd dbpedia_csv/
$unzip dbpedia-classes.zip
$mv DBPEDIA_test.csv test.csv
$mv DBPEDIA_train.csv train.csv