Skip to content

Instantly share code, notes, and snippets.

View do-me's full-sized avatar

Dominik Weckmüller do-me

View GitHub Profile
# PDF Scanner, Shortener and Marker
import PyPDF2
from PyPDF2 import PdfFileWriter,PdfFileReader
import os
# use path directly or deal with windows \ or /:
# pa=r"C:\Users\Dome\Desktop\nu\Wahlprogramme 2017\afd.pdf".replace("\\", "/")
# Part 1: PDF Scanner and Shortener
# Pdf2wordcloud
# 1) pdf to text object
import os
import PyPDF2
from PyPDF2 import PdfFileWriter
party="grüne" # and others: Linke, Grüne, SPD, FDP, CDU/CSU, AfD
pa= "C:/Users/Dome/Desktop/nu/Wahlprogramme 2017/"
1. # Tweets to sentiment of tokenized row items (average)
2. # ATTENTION: BUGS WITH \x OR OTHER SIMILAR CHARACTERS (\n IS REPLACED ALREADY)
3.
4. import pandas as pd
5. import re
6. import os
7. os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")
8.
9. party= "fdp"
10. df=pd.read_json(party+".json")
# Tweet to Twitter Birdcloud (Wordcloud)
import pandas as pd
import re
import os
os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")
party= "linksfraktion"
df=pd.read_json(party+".json")
# Scatterplots with seaborn
import pandas as pd
import os
pd.set_option('precision', 10) # working with csv data with high precision,
# meaning more characters after the comme
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
# avoid confusion by converting number seperators (comma to point)
ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip') # high precision
# heatmap with seaborn
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
import os
pd.set_option('precision', 10)
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
# multi choropleth
import pandas as pd
import os
pd.set_option('precision', 10)
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip')
import folium
import webbrowser
<!DOCTYPE html>
<html>
<head>
<script>
function show(shown, hidden) {
document.getElementById(shown).style.display='block';
document.getElementById(hidden).style.display='none';
return false;
}
</script>
# importing the neccessary packages
import speech_recognition as sr
from selenium import webdriver
# initialise speech_recognition
r = sr.Recognizer()
mic = sr.Microphone()
# listen!
with mic as source:
@do-me
do-me / Topic Modeling with Scikit Learn.py
Created December 7, 2020 14:59
Topic Modeling with Scikit Learn with Python 3 and Scikit-learn 0.23
# adapted code for Python 3 and latest Scikit-learn version 0:23
# based on https://medium.com/mlreview/topic-modeling-with-scikit-learn-e80d33668730
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import numpy as np
def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
for topic_idx, topic in enumerate(H):
print("Topic {}".format(topic_idx))