View simple-tfidf-count-lsi-example.py
#!/usr/bin/env python | |
# coding: utf8 | |
from sklearn import feature_extraction, decomposition | |
stoplist = [] | |
docs = [ | |
"Maschinelles lernen ist eine Disziplien die irgendwas mit Künstlicher Intelligenz zu tun hat", | |
"Künstliche Intelligenz ist ein interessantes Themengebiet", |
View example_tfidf.py
print("TF-IDF + LDA") | |
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist) | |
tfidf_vect.fit(docs) | |
features = tfidf_vect.get_feature_names() | |
tfidf = tfidf_vect.transform(docs) | |
tfidf_lsi = decomposition.LatentDirichletAllocation(n_components=2) | |
tfidf_lsi.fit(tfidf) |
View example_tfidf.py
print() | |
print("TF-IDF + NMF") | |
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist) | |
tfidf_vect.fit(docs) | |
features = tfidf_vect.get_feature_names() | |
tfidf = tfidf_vect.transform(docs) | |
tfidf_lsi = decomposition.NMF(n_components=2) | |
tfidf_lsi.fit(tfidf) |
View example_tfidf.py
print() | |
print("TF-IDF + FastICA") | |
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist) | |
tfidf_vect.fit(docs) | |
features = tfidf_vect.get_feature_names() | |
tfidf = tfidf_vect.transform(docs) | |
tfidf_lsi = decomposition.FastICA(n_components=2) | |
tfidf_lsi.fit(tfidf.toarray()) |
View example_tfidf.py
print() | |
print("TF-IDF + IncrementalPCA") | |
tfidf_vect = feature_extraction.text.TfidfVectorizer(stop_words=stoplist) | |
tfidf_vect.fit(docs) | |
features = tfidf_vect.get_feature_names() | |
tfidf = tfidf_vect.transform(docs) | |
tfidf_lsi = decomposition.IncrementalPCA(n_components=2) | |
tfidf_lsi.fit(tfidf.toarray()) |
View example_tfidf.py
import warnings | |
from sklearn import ensemble | |
warnings.filterwarnings("ignore") | |
features = [ | |
[1, 1, 0], | |
[1, 1, 0], | |
[1, 1, 0], |
View sklearn-numerai-solution.py
#!/usr/bin/env python | |
# coding: utf8 | |
""" Example for numer.ai competition """ | |
import math | |
import os | |
import sys | |
import numpy |
View gridsearchcv.py
import pandas | |
# Einlesen der Datei | |
training_data = pandas.read_csv("data/numerai_training_data.csv") | |
# Die ersten 5 Zeilen samt Header ausgeben | |
print(training_data.head()) | |
# Aus wie vielen Zeilen und Spalten besteht die Datei? | |
print(training_data.shape) |
View sklearn-one-method-git.py
preprocessor = pipeline.Pipeline( | |
[ | |
('ss', preprocessing.StandardScaler()), | |
('ex', preprocessing.PolynomialFeatures(degree=3)), | |
] | |
) |
View app.js
/* | |
* Author: Damian Schwyrz <mail@damianschwyrz.de> | |
* URL: https://www.damianschwyrz.de | |
* Copyright (c) 2017. | |
*/ | |
/** | |
* Main table component | |
*/ | |
Vue.component('table-keywords', { |
OlderNewer