A "Best of the Best Practices" (BOBP) guide to developing in Python.
- "Build tools for others that you want to be built for you." - Kenneth Reitz
- "Simplicity is alway better than functionality." - Pieter Hintjens
from sklearn.feature_extraction.text import CountVectorizer | |
import json | |
import pandas | |
import numpy | |
corpus_path = 'data/training/training-data.csv' | |
# prepare training data for bow (corpus) | |
X_training = [] | |
dataframe = pandas.read_csv(corpus_path, header=None) |
from sklearn.feature_extraction.text import CountVectorizer | |
import json | |
vocab_path = 'vocabulary.json' | |
vocabulary = json.load(open(vocab_path)) | |
vectorizer = CountVectorizer(vocabulary=vocabulary) | |
def tobow(string): | |
return vectorizer.transform([string]).toarray() |
# define baseline model | |
def baseline_model(): | |
# create model | |
model = Sequential() | |
model.add(Dense(5000, input_shape=(len(bow[0]),), activation='relu')) | |
model.add(Dropout(0.2)) | |
model.add(Dense(len(dummy_y[0]), activation='softmax')) | |
# Compile model | |
model.summary() | |
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) |
training_data = "data/training/training-data.csv" | |
training_label = "data/training/training-label.csv" | |
X_dataframe = pandas.read_csv(training_data, header=None) | |
X = X_dataframe.values | |
Y_dataframe = pandas.read_csv(training_label, header=None) | |
Y = Y_dataframe.values | |
dummy_x = [] |
import numpy | |
import pandas | |
from keras.models import Sequential | |
from keras.layers import Dense, Dropout | |
from keras.wrappers.scikit_learn import KerasClassifier | |
from keras.utils import np_utils | |
from sklearn.preprocessing import LabelEncoder | |
from tobow import tobow | |
numpy.random.seed(7) |
import pandas | |
import numpy | |
from tobow import tobow | |
from sklearn.preprocessing import LabelEncoder | |
from keras.models import Sequential, Dense | |
from keras.models import model_from_json | |
def evaluate(model_path, weights_path, test_data_path, test_label_path, encoder_path): | |
# load test data and label | |
X_test = pandas.read_csv(test_data_path, header=None) |
from pandas import read_csv | |
from pandas import datetime | |
from matplotlib import pyplot | |
from statsmodels.tsa.arima_model import ARIMA | |
from sklearn.metrics import mean_squared_error | |
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf | |
file_path = "minyak-goreng-prices.csv" | |
# Parse the date into pandas.datetime |
from pandas import read_csv | |
from pandas import datetime | |
from matplotlib import pyplot | |
import numpy | |
file_path = "minyak-goreng-prices.csv" | |
def parser(x): | |
splited = x.split('-') | |
return datetime.strptime(splited[0] + "-" + splited[1] + "-20" +splited[2], '%d-%b-%Y') |
from pandas import read_csv | |
from pandas import datetime | |
from matplotlib import pyplot | |
import numpy | |
file_path = "minyak-goreng-prices.csv" | |
def parser(x): | |
splited = x.split('-') | |
return datetime.strptime(splited[0] + "-" + splited[1] + "-20" +splited[2], '%d-%b-%Y') |