Skip to content

Instantly share code, notes, and snippets.

@do-me
Created May 25, 2019 11:30
Show Gist options
  • Save do-me/d07753a2f7449d86bb6f78d780d49481 to your computer and use it in GitHub Desktop.
Save do-me/d07753a2f7449d86bb6f78d780d49481 to your computer and use it in GitHub Desktop.
# Tweet to Twitter Birdcloud (Wordcloud)
import pandas as pd
import re
import os
os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")
party= "linksfraktion"
df=pd.read_json(party+".json")
from nltk.tokenize import TweetTokenizer
from stop_words import get_stop_words
sw = get_stop_words('de')
tt = TweetTokenizer()
sw.extend(("macht","schen","schon","Kapitel","setzt","weitere","lehnt","viel",
"stellen","gehen","chen","geht","gilt","lehnen", "viele","gibt",
"darf","halten","dürfen","neben","gehört","vielen","jedoch",
"braucht", "nehmen","rung","seit","sollten","deren","etwa",
"beim","außerdem","stehen", "sitzen", "mehr","sollen","müssen",
"sowie","deshalb","daher","dafür","dabei","brauchen","zudem",
"setzen","besser","neue","neu","neuen","immer","gute"))
df['tokens'] = df['text'].apply(tt.tokenize)
# complicated expression:
# all items not in stopwords, or matching @/#
df['clean'] = df['tokens'].apply(lambda x: [item for item in x if
item.isalpha() and item.lower() not in sw or re.compile(r"@|#").match(item)
and item.lower() not in sw])
###### clean df! ######
search_values = ["migr","flücht","auslä","asyl","flucht","immigr","refugee",
"geflüchte", "ausland", "zuwander", "zugewandert"]
mig=df[df.text.str.contains('|'.join(search_values ))]
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# list flattening, as here we have a list in a list
flattened = [val for sublist in list(mig.clean) for val in sublist]
words = ' '.join(flattened) # list to text
# define twitter bird mask, must have white background
mask = np.array(Image.open("C:/Users/Dome/Desktop/nu/Tweets/Wordclouds/Twitter_bird.png"))
image_colors = ImageColorGenerator(mask)
# function takes in your text and your mask and generates a wordcloud.
def birdcloud(words, mask):
word_cloud = WordCloud(width = 512, height = 512,max_words=200,
background_color='white', stopwords=STOPWORDS,
mask=mask).generate(words)
plt.figure(figsize=(10,8),facecolor = 'white', edgecolor='blue')
plt.imshow(word_cloud.recolor(color_func=image_colors),
interpolation="bilinear")
plt.axis('off')
plt.tight_layout(pad=0)
plt.savefig("C:/Users/Dome/Desktop/nu/Tweets/Wordclouds/"+
party+".png", format="png")
plt.show()
# generate your birdcloud
birdcloud(words, mask)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment