Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save blacklight/9e2c5f8f1c3046c924f4d07325d8ebfb to your computer and use it in GitHub Desktop.
Save blacklight/9e2c5f8f1c3046c924f4d07325d8ebfb to your computer and use it in GitHub Desktop.
Script to pre-analyze, count the countries share and filter Italian tweets from the dump of Russian trolls activity published on https://github.com/fivethirtyeight/russian-troll-tweets/
import csv
import json
import sys
from collections import defaultdict
def main():
tweets_by_region = defaultdict(lambda: 0)
tweets_by_language = defaultdict(lambda: 0)
italian_tweets = []
header = []
if len(sys.argv) < 2:
print("Usage: {} csv_file_1 csv_file_2 ...")
sys.exit(1)
for csv_file in sys.argv[1:]:
print('Analyzing {}'.format(csv_file))
with open(csv_file, 'r') as fp:
reader = csv.reader(fp, delimiter=',')
for (i, row) in enumerate(reader):
if i == 0:
header = row
else:
tweet = {
header[j]: row[j]
for (j, column) in enumerate(row)
}
if (tweet['region'] == 'Italy'
or tweet['language'] == 'Italian'):
italian_tweets += [row]
tweets_by_language[tweet['language']] += 1
tweets_by_region[tweet['region']] += 1
if i%10000:
print('Analyzed {} rows in {}'.format(i, csv_file))
with open('tweets_by_region.csv', 'w') as fp:
writer = csv.writer(fp, delimiter=',')
writer.writerow(['Region', 'Tweets count'])
for (region, count) in tweets_by_region.items():
writer.writerow([region, count])
with open('italian_tweets.csv', 'w') as fp:
writer = csv.writer(fp, delimiter=',')
writer.writerow(header)
for row in italian_tweets:
writer.writerow(row)
if __name__ == '__main__':
main()
# vim:sw=4:ts=4:et:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment