Dominik Weckmüller do-me

## Pdf_scan_shortened_marked.py
# PDF Scanner, Shortener and Marker

import PyPDF2
from PyPDF2 import PdfFileWriter,PdfFileReader
import os

# use path directly or deal with windows \ or /:
# pa=r"C:\Users\Dome\Desktop\nu\Wahlprogramme 2017\afd.pdf".replace("\\", "/")

# Part 1: PDF Scanner and Shortener

## Pdf2wordcloud.py
# Pdf2wordcloud

# 1) pdf to text object
import os
import PyPDF2
from PyPDF2 import PdfFileWriter

party="grüne" #  and others: Linke, Grüne, SPD, FDP, CDU/CSU, AfD

pa= "C:/Users/Dome/Desktop/nu/Wahlprogramme 2017/"

## Tweets2sentimentmean.py
1.	# Tweets to sentiment of tokenized row items (average)
2.	# ATTENTION: BUGS WITH \x OR OTHER SIMILAR CHARACTERS (\n IS REPLACED ALREADY)
3.
4.	import pandas as pd
5.	import re
6.	import os
7.	os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")
8.
9.	party= "fdp"
10.	df=pd.read_json(party+".json")

## Birdcloud.py
# Tweet to Twitter Birdcloud (Wordcloud)

import pandas as pd
import re
import os
os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")

party= "linksfraktion"
df=pd.read_json(party+".json")


## SeabornScatterplots.py
# Scatterplots with seaborn

import pandas as pd
import os
pd.set_option('precision', 10) # working with csv data with high precision,
# meaning more characters after the comme

os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
# avoid confusion by converting number seperators (comma to point)
ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip') # high precision

## SeabornHeatmap.py
# heatmap with seaborn

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
import os
pd.set_option('precision', 10)
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")

## Multichoropleth.py
# multi choropleth

import pandas as pd
import os
pd.set_option('precision', 10)
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip')
import folium
import webbrowser


## SimpleWebpage.html
<!DOCTYPE html>
<html>
<head>
<script>
function show(shown, hidden) {
  document.getElementById(shown).style.display='block';
  document.getElementById(hidden).style.display='none';
  return false;
}
</script>

## play-the-radio.py
# importing the neccessary packages
import speech_recognition as sr
from selenium import webdriver

# initialise speech_recognition
r = sr.Recognizer()
mic = sr.Microphone()

# listen!
with mic as source:

## Topic Modeling with Scikit Learn.py
# adapted code for Python 3 and latest Scikit-learn version 0:23
# based on https://medium.com/mlreview/topic-modeling-with-scikit-learn-e80d33668730

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import numpy as np

def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
    for topic_idx, topic in enumerate(H):
        print("Topic {}".format(topic_idx))
	# PDF Scanner, Shortener and Marker

	import PyPDF2
	from PyPDF2 import PdfFileWriter,PdfFileReader
	import os

	# use path directly or deal with windows \ or /:
	# pa=r"C:\Users\Dome\Desktop\nu\Wahlprogramme 2017\afd.pdf".replace("\\", "/")

	# Part 1: PDF Scanner and Shortener
	# Pdf2wordcloud

	# 1) pdf to text object
	import os
	import PyPDF2
	from PyPDF2 import PdfFileWriter

	party="grüne" # and others: Linke, Grüne, SPD, FDP, CDU/CSU, AfD

	pa= "C:/Users/Dome/Desktop/nu/Wahlprogramme 2017/"
	1. # Tweets to sentiment of tokenized row items (average)
	2. # ATTENTION: BUGS WITH \x OR OTHER SIMILAR CHARACTERS (\n IS REPLACED ALREADY)
	3.
	4. import pandas as pd
	5. import re
	6. import os
	7. os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")
	8.
	9. party= "fdp"
	10. df=pd.read_json(party+".json")
	# Tweet to Twitter Birdcloud (Wordcloud)

	import pandas as pd
	import re
	import os
	os.chdir("C:/Users/Dome/Desktop/nu/Tweets/")

	party= "linksfraktion"
	df=pd.read_json(party+".json")
	# Scatterplots with seaborn

	import pandas as pd
	import os
	pd.set_option('precision', 10) # working with csv data with high precision,
	# meaning more characters after the comme

	os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
	# avoid confusion by converting number seperators (comma to point)
	ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip') # high precision
	# heatmap with seaborn

	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	sns.set(style="white")
	import os
	pd.set_option('precision', 10)
	os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
	# multi choropleth

	import pandas as pd
	import os
	pd.set_option('precision', 10)
	os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/")
	ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip')
	import folium
	import webbrowser
	<!DOCTYPE html>
	<html>
	<head>
	<script>
	function show(shown, hidden) {
	document.getElementById(shown).style.display='block';
	document.getElementById(hidden).style.display='none';
	return false;
	}
	</script>
	# importing the neccessary packages
	import speech_recognition as sr
	from selenium import webdriver

	# initialise speech_recognition
	r = sr.Recognizer()
	mic = sr.Microphone()

	# listen!
	with mic as source:
	# adapted code for Python 3 and latest Scikit-learn version 0:23
	# based on https://medium.com/mlreview/topic-modeling-with-scikit-learn-e80d33668730

	from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
	from sklearn.decomposition import NMF, LatentDirichletAllocation
	import numpy as np

	def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
	for topic_idx, topic in enumerate(H):
	print("Topic {}".format(topic_idx))