Skip to content

Instantly share code, notes, and snippets.

🎯
Focusing

Damian Schwyrz Damian89

🎯
Focusing
Block or report user

Report or block Damian89

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View simple-tfidf-count-lsi-example.py
#!/usr/bin/env python
# coding: utf8
from sklearn import feature_extraction, decomposition
stoplist = []
docs = [
"Maschinelles lernen ist eine Disziplien die irgendwas mit Künstlicher Intelligenz zu tun hat",
"Künstliche Intelligenz ist ein interessantes Themengebiet",
View example_tfidf.py
print("TF-IDF + LDA")
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist)
tfidf_vect.fit(docs)
features = tfidf_vect.get_feature_names()
tfidf = tfidf_vect.transform(docs)
tfidf_lsi = decomposition.LatentDirichletAllocation(n_components=2)
tfidf_lsi.fit(tfidf)
View example_tfidf.py
print()
print("TF-IDF + NMF")
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist)
tfidf_vect.fit(docs)
features = tfidf_vect.get_feature_names()
tfidf = tfidf_vect.transform(docs)
tfidf_lsi = decomposition.NMF(n_components=2)
tfidf_lsi.fit(tfidf)
View example_tfidf.py
print()
print("TF-IDF + FastICA")
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist)
tfidf_vect.fit(docs)
features = tfidf_vect.get_feature_names()
tfidf = tfidf_vect.transform(docs)
tfidf_lsi = decomposition.FastICA(n_components=2)
tfidf_lsi.fit(tfidf.toarray())
View example_tfidf.py
print()
print("TF-IDF + IncrementalPCA")
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist)
tfidf_vect.fit(docs)
features = tfidf_vect.get_feature_names()
tfidf = tfidf_vect.transform(docs)
tfidf_lsi = decomposition.IncrementalPCA(n_components=2)
tfidf_lsi.fit(tfidf.toarray())
View example_tfidf.py
import warnings
from sklearn import ensemble
warnings.filterwarnings("ignore")
features = [
[1, 1, 0],
[1, 1, 0],
[1, 1, 0],
View gridsearchcv.py
import pandas
# Einlesen der Datei
training_data = pandas.read_csv("data/numerai_training_data.csv")
# Die ersten 5 Zeilen samt Header ausgeben
print(training_data.head())
# Aus wie vielen Zeilen und Spalten besteht die Datei?
print(training_data.shape)
View sklearn-one-method-git.py
preprocessor = pipeline.Pipeline(
[
('ss', preprocessing.StandardScaler()),
('ex', preprocessing.PolynomialFeatures(degree=3)),
]
)
View gist:cfecbd2ce97f049cf9e20402774599cf
<!doctype html>
<!--
~ Author: Damian Schwyrz <mail@damianschwyrz.de>
~ URL: https://www.damianschwyrz.de
~ Copyright (c) 2017.
-->
<html lang="">
<head>
<meta charset="utf-8">
You can’t perform that action at this time.