Ceren cereniyim

## logarithm_transformation.py
def apply_log1p_transformation(dataframe, column):
    '''This function takes a dataframe and a column in the string format
    then applies numpy log1p transformation to the column
    as a result returns log1p applied pandas series'''

    dataframe["log_" + column] = np.log1p(dataframe[column])
    return dataframe["log_" + column]

## make_list_of_K.py
def make_list_of_K(K, dataframe):
  '''inputs: K as integer and dataframe
  apply k-means clustering to dataframe
  and make a list of inertia values against 1 to K
  return the inertia values list
  '''
    cluster_values = list(range(1, K+1))
    inertia_values=[]

    for c in cluster_values:

## aggregate_by_ordered_quantity.py
def encode_column(column):
    if column > 0:
        return 1
    if column <= 0:
        return 0


def aggregate_by_ordered_quantity(dataframe, column_list):
    '''this function:
    1. aggregates a given dataframe by column list,

## plotly_logic_explanation.py
import numpy as np
np.random.seed(42)
import plotly.offline as pyo
import plotly.graph_objs as go

# create data
x = np.random.randint(1, 101, 100)
y = np.random.randint(1, 101, 100)

# data object to be used in figure object

## jupyterlab_debugging.py
# source https://towardsdatascience.com/debugging-jupyter-notebooks-will-boost-your-productivity-a33387f4fa62
import contextlib
with contextlib.redirect_stdout(None):
   import pixiedust
# New Cell
def find_max (values):
   max = 0
   import pdb; pdb.set_trace()
   for val in values:
      if val > max:

## wine_rating_predictor_imports.py
# data manipulation libraries
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 60)

# data visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.core.pylabtools import figsize

## missing_value_function.py
def missing_values_table(df):
    # Total missing values
    mis_val = df.isnull().sum()

    # Percentage of missing values
    mis_val_percent = 100 * df.isnull().sum() / len(df)

    # Make a table with the results
    mis_val_table = pd.concat([mis_val, mis_val_percent],
                              axis=1)

## clean_data.py
def CleanData(df, drop_columns, target_name):
  # this function drops not used features
  # and duplicate rows
  # and empty rows of target(poinst)
  # returns cleaned df
  interim_df = df.drop(columns=drop_columns)

  interim_df_2 = (interim_df
                  .drop_duplicates(ignore_index=True))

## plot_histogram.py
def plot_histogram(df, column, b=None):
  # funtion to print histogram
  # with mean and median
  # using distplot

  # set the histogram, mean and median
  g = sns.distplot(df[column], kde=False, bins=b)

  plt.axvline(x=df[column].mean(),
              linewidth=3,

## plot_distribution.py
def plot_distribution(df, target, column_values, column_name):
  # funtion to print distribution of a continuous variable
  # for categorical data

  for value in column_values:
      subset = df[df[column_name] == value]
      g = sns.kdeplot(subset[target],
                      label=value,
                      linewidth=3)
	def apply_log1p_transformation(dataframe, column):
	'''This function takes a dataframe and a column in the string format
	then applies numpy log1p transformation to the column
	as a result returns log1p applied pandas series'''

	dataframe["log_" + column] = np.log1p(dataframe[column])
	return dataframe["log_" + column]
	def make_list_of_K(K, dataframe):
	'''inputs: K as integer and dataframe
	apply k-means clustering to dataframe
	and make a list of inertia values against 1 to K
	return the inertia values list
	'''
	cluster_values = list(range(1, K+1))
	inertia_values=[]

	for c in cluster_values:
	def encode_column(column):
	if column > 0:
	return 1
	if column <= 0:
	return 0


	def aggregate_by_ordered_quantity(dataframe, column_list):
	'''this function:
	1. aggregates a given dataframe by column list,
	import numpy as np
	np.random.seed(42)
	import plotly.offline as pyo
	import plotly.graph_objs as go

	# create data
	x = np.random.randint(1, 101, 100)
	y = np.random.randint(1, 101, 100)

	# data object to be used in figure object
	# source https://towardsdatascience.com/debugging-jupyter-notebooks-will-boost-your-productivity-a33387f4fa62
	import contextlib
	with contextlib.redirect_stdout(None):
	import pixiedust
	# New Cell
	def find_max (values):
	max = 0
	import pdb; pdb.set_trace()
	for val in values:
	if val > max:
	# data manipulation libraries
	import pandas as pd
	import numpy as np
	pd.set_option('display.max_columns', 60)

	# data visualization libraries
	import matplotlib.pyplot as plt
	import seaborn as sns
	from IPython.core.pylabtools import figsize
	def missing_values_table(df):
	# Total missing values
	mis_val = df.isnull().sum()

	# Percentage of missing values
	mis_val_percent = 100 * df.isnull().sum() / len(df)

	# Make a table with the results
	mis_val_table = pd.concat([mis_val, mis_val_percent],
	axis=1)
	def CleanData(df, drop_columns, target_name):
	# this function drops not used features
	# and duplicate rows
	# and empty rows of target(poinst)
	# returns cleaned df
	interim_df = df.drop(columns=drop_columns)

	interim_df_2 = (interim_df
	.drop_duplicates(ignore_index=True))
	def plot_histogram(df, column, b=None):
	# funtion to print histogram
	# with mean and median
	# using distplot

	# set the histogram, mean and median
	g = sns.distplot(df[column], kde=False, bins=b)

	plt.axvline(x=df[column].mean(),
	linewidth=3,
	def plot_distribution(df, target, column_values, column_name):
	# funtion to print distribution of a continuous variable
	# for categorical data

	for value in column_values:
	subset = df[df[column_name] == value]
	g = sns.kdeplot(subset[target],
	label=value,
	linewidth=3)