Ceren cereniyim

## extract_year_from_title.py
def extract_year_from_title(title):
  # function to find the year in the given list
  # if not found assigns zero as year
  # ASSUMPTION: There is no NA values
  # in the title feature

  int_list = []
  now = datetime.datetime.now()

  for item in title:

## search_keywards_in_feature.py
def extract_features_from_description(df,
                                      column_name,
                                      new_feature_name,
                                      extract_words):
    # function to extract features from the column_name
    # searches column_name feature for a given list
    # ASSUMPTION: There is no NA values
    # in the description feature
    check_regex = (r'\b(?:{})\b'
                   .format('|'

## plot_distribution.py
def plot_distribution(df, target, column_values, column_name):
  # funtion to print distribution of a continuous variable
  # for categorical data

  for value in column_values:
      subset = df[df[column_name] == value]
      g = sns.kdeplot(subset[target],
                      label=value,
                      linewidth=3)

## plot_histogram.py
def plot_histogram(df, column, b=None):
  # funtion to print histogram
  # with mean and median
  # using distplot

  # set the histogram, mean and median
  g = sns.distplot(df[column], kde=False, bins=b)

  plt.axvline(x=df[column].mean(),
              linewidth=3,

## clean_data.py
def CleanData(df, drop_columns, target_name):
  # this function drops not used features
  # and duplicate rows
  # and empty rows of target(poinst)
  # returns cleaned df
  interim_df = df.drop(columns=drop_columns)

  interim_df_2 = (interim_df
                  .drop_duplicates(ignore_index=True))

## missing_value_function.py
def missing_values_table(df):
    # Total missing values
    mis_val = df.isnull().sum()

    # Percentage of missing values
    mis_val_percent = 100 * df.isnull().sum() / len(df)

    # Make a table with the results
    mis_val_table = pd.concat([mis_val, mis_val_percent],
                              axis=1)

## wine_rating_predictor_imports.py
# data manipulation libraries
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 60)

# data visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.core.pylabtools import figsize

## jupyterlab_debugging.py
# source https://towardsdatascience.com/debugging-jupyter-notebooks-will-boost-your-productivity-a33387f4fa62
import contextlib
with contextlib.redirect_stdout(None):
   import pixiedust
# New Cell
def find_max (values):
   max = 0
   import pdb; pdb.set_trace()
   for val in values:
      if val > max:

## plotly_logic_explanation.py
import numpy as np
np.random.seed(42)
import plotly.offline as pyo
import plotly.graph_objs as go

# create data
x = np.random.randint(1, 101, 100)
y = np.random.randint(1, 101, 100)

# data object to be used in figure object

## aggregate_by_ordered_quantity.py
def encode_column(column):
    if column > 0:
        return 1
    if column <= 0:
        return 0


def aggregate_by_ordered_quantity(dataframe, column_list):
    '''this function:
    1. aggregates a given dataframe by column list,
	def extract_year_from_title(title):
	# function to find the year in the given list
	# if not found assigns zero as year
	# ASSUMPTION: There is no NA values
	# in the title feature

	int_list = []
	now = datetime.datetime.now()

	for item in title:
	def extract_features_from_description(df,
	column_name,
	new_feature_name,
	extract_words):
	# function to extract features from the column_name
	# searches column_name feature for a given list
	# ASSUMPTION: There is no NA values
	# in the description feature
	check_regex = (r'\b(?:{})\b'
	.format('\|'
	def plot_distribution(df, target, column_values, column_name):
	# funtion to print distribution of a continuous variable
	# for categorical data

	for value in column_values:
	subset = df[df[column_name] == value]
	g = sns.kdeplot(subset[target],
	label=value,
	linewidth=3)
	def plot_histogram(df, column, b=None):
	# funtion to print histogram
	# with mean and median
	# using distplot

	# set the histogram, mean and median
	g = sns.distplot(df[column], kde=False, bins=b)

	plt.axvline(x=df[column].mean(),
	linewidth=3,
	def CleanData(df, drop_columns, target_name):
	# this function drops not used features
	# and duplicate rows
	# and empty rows of target(poinst)
	# returns cleaned df
	interim_df = df.drop(columns=drop_columns)

	interim_df_2 = (interim_df
	.drop_duplicates(ignore_index=True))
	def missing_values_table(df):
	# Total missing values
	mis_val = df.isnull().sum()

	# Percentage of missing values
	mis_val_percent = 100 * df.isnull().sum() / len(df)

	# Make a table with the results
	mis_val_table = pd.concat([mis_val, mis_val_percent],
	axis=1)
	# data manipulation libraries
	import pandas as pd
	import numpy as np
	pd.set_option('display.max_columns', 60)

	# data visualization libraries
	import matplotlib.pyplot as plt
	import seaborn as sns
	from IPython.core.pylabtools import figsize
	# source https://towardsdatascience.com/debugging-jupyter-notebooks-will-boost-your-productivity-a33387f4fa62
	import contextlib
	with contextlib.redirect_stdout(None):
	import pixiedust
	# New Cell
	def find_max (values):
	max = 0
	import pdb; pdb.set_trace()
	for val in values:
	if val > max:
	import numpy as np
	np.random.seed(42)
	import plotly.offline as pyo
	import plotly.graph_objs as go

	# create data
	x = np.random.randint(1, 101, 100)
	y = np.random.randint(1, 101, 100)

	# data object to be used in figure object
	def encode_column(column):
	if column > 0:
	return 1
	if column <= 0:
	return 0


	def aggregate_by_ordered_quantity(dataframe, column_list):
	'''this function:
	1. aggregates a given dataframe by column list,