Skip to content

Instantly share code, notes, and snippets.

@bkaankuguoglu
bkaankuguoglu / pdf_utils.py
Last active June 4, 2018 08:19
This program contains some utility functions for pdf files, e.g. conversion from pdf to jpg, or vice versa.
#=======================================================================#
# pdf_utils.py #
#=======================================================================#
# usage: pdf_utils.py [-h] [-i INPUT_DIR] [-o OUTPUT_DIR] [-m METHOD]
#
# This program contains some utility functions for pdf files.
#
# optional arguments:
# -h, --help show this help message and exit
# -i INPUT_DIR, --input_dir INPUT_DIR
@bkaankuguoglu
bkaankuguoglu / extract_data.py
Created May 30, 2018 09:02
This program extracts regular expressions within the given frame on a set of documents.
#=======================================================================#
# extract_data.py #
#=======================================================================#
# usage: extract_data.py [-h] [-i INPUT_DIR] [-o OUTPUT_DIR]
#
# This program extracts provision numbers from a set of documents.
#
# optional arguments:
# -h, --help show this help message and exit
# -i INPUT_DIR, --input_dir INPUT_DIR
@bkaankuguoglu
bkaankuguoglu / file_collector.py
Last active August 31, 2021 02:10
File Collector - Collects image files from the server via SOAP requests.
#========================================================================#
# file_collector.py #
#========================================================================#
#========================================================================#
# usage: file_collector.py [-h] [-f FILE] [-o OUTPUT_FOLDER] #
#========================================================================#
# optional arguments:
# -h, --help show this help message and exit
# -f FILE, --file FILE the xlsx file that contains all the files to be
# collected
@bkaankuguoglu
bkaankuguoglu / plot_dataset
Last active March 15, 2021 21:25
a function that plots plotly graphs
import plotly.graph_objs as go
from plotly.offline import iplot
def plot_dataset(df, title):
data = []
value = go.Scatter(
x=df.index,
y=df.value,
mode="lines",
name="values",
import pandas as pd
df = pd.read_csv('<YOUR_FILE_DIR>/PJME_hourly.csv')
df = df.set_index(['Datetime'])
df.index = pd.to_datetime(df.index)
if not df.index.is_monotonic:
df = df.sort_index()
df = df.rename(columns={'PJME_MW': 'value'})
def generate_time_lags(df, n_lags):
df_n = df.copy()
for n in range(1, n_lags + 1):
df_n[f"lag{n}"] = df_n["value"].shift(n)
df_n = df_n.iloc[n_lags:]
return df_n
input_dim = 100
df_generated = generate_time_lags(df, input_dim)
from sklearn.model_selection import train_test_split
def feature_label_split(df, target_col):
y = df[[target_col]]
X = df.drop(columns=[target_col])
return X, y
def train_val_test_split(df, target_col, test_ratio):
val_ratio = test_ratio / (1 - test_ratio)
X, y = feature_label_split(df, target_col)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_arr = scaler.fit_transform(X_train)
X_val_arr = scaler.transform(X_val)
X_test_arr = scaler.transform(X_test)
y_train_arr = scaler.fit_transform(y_train)
y_val_arr = scaler.transform(y_val)
y_test_arr = scaler.transform(y_test)
from torch.utils.data import TensorDataset, DataLoader
batch_size = 64
train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)
val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)
test_features = torch.Tensor(X_test_arr)
test_targets = torch.Tensor(y_test_arr)
class RNNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
super(RNNModel, self).__init__()
# Defining the number of layers and the nodes in each layer
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# RNN layers
self.rnn = nn.RNN(