Skip to content

Instantly share code, notes, and snippets.

@enric1994
Last active September 5, 2019 11:00
Show Gist options
  • Save enric1994/4a401af108702f7fe3b4b6d4849cb240 to your computer and use it in GitHub Desktop.
Save enric1994/4a401af108702f7fe3b4b6d4849cb240 to your computer and use it in GitHub Desktop.
Mentions Network
#Usage: python3 network.py input.csv output.csv
import csv
import re
import sys
input_file = sys.argv[1]
output_file = sys.argv[2]
def detect_mention(text):
mentions = []
regex = r"(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9_]+)"
matches = re.finditer(regex, text)
for matchNum, match in enumerate(matches, start=1):
mentions.append(match.group())
return mentions
with open(input_file) as csvinput:
readCSV = csv.reader(csvinput, delimiter=',')
with open(output_file, 'a') as csvoutput:
spamwriter = csv.writer(csvoutput, delimiter=';',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(['username', 'mention'])
for row in readCSV:
username = row[11]
mentions = detect_mention(row[2])
for mention in mentions:
spamwriter.writerow([username, mention])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment