Last active Aug 23, 2019
Python script to detect the percentage of english composition in a text file (makes use of nltk corpus)
import string
import urllib.request
from nltk.corpus import words
punctuation = set(string.punctuation)
def remove_punc(str):
return ''.join(c for c in str if c not in punctuation)
total_count = 0
Python code for fractional differencing of pandas time series
Python code for fractional differencing of pandas time series
illustrating the concepts of the article "Preserving Memory in Stationary Time Series"
by Simon Kuttruf
While this code is dedicated to the public domain for use without permission, the author disclaims any liability in connection with the use of this code.
import numpy as np
import pandas as pd
import org.apache.spark.{SparkConf, SparkContext}
import{VectorAssembler, VectorIndexer}
import{RandomForestRegressionModel, RandomForestRegressor}
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.types.{BooleanType, FloatType, IntegerType, LongType, StringType, StructField, StructType}
import org.apache.spark.sql.{SaveMode, SparkSession}
object BatchPredict {
def showPlot(points, filename): # pier mod
fig, ax = plt.subplots()
# this locator puts ticks at regular intervals
loc = ticker.MultipleLocator(base=0.2)
Useful SQL for table handling
-- Remove all backup tables
DROP TABLE IF EXISTS [WKBRZ].[dbo].[PredictionNetworkBookingPax_Backup];
-- Create a backup
select * into [WKBRZ].[dbo].[PredictionNetworkBookingPax_Backup] from [WKBRZ].[dbo].[PredictionNetworkBookingPax]
-- Restore from the backup tables
INSERT INTO [WKBRZ].[dbo].[PredictionNetworkBookingPax] SELECT * FROM [WKBRZ].[dbo].[PredictionNetworkBookingPax_Backup]
-- Delete all prediction table contents
Avoiding pandas copywithsetting error
#A value is trying to be set on a copy of a slice from a DataFrame.
#Try using .loc[row_indexer,col_indexer] = value instead
# assign was introduced in pandas 0.16 to deal with this false positive
# Instead of
df_weekly_season.loc[:, 'market'] = market
# or
df_weekly_season['market'] = market
Steps to install Python 3.6 on Cloudera VM
> yum install centos-release-scl
> yum info rh-python36
> scl enable rh-python36 bash
> python --version
Run PySpark2
> pyspark2
Python 3.6.3 (default, Apr 26 2018, 13:16:02)
Converting datetime to day of week
import calendar
df_weekly_season['DOW_ENG'] = weekly_season['ds'].apply(lambda x : calendar.day_name[pd.to_datetime(x).weekday()])
df_weekly_season['DOW'] = weekly_season['ds'].apply(lambda x : int(pd.to_datetime(x).strftime('%w'))) # SUN == 0
df_weekly_season['DOW'] = weekly_season['ds'].apply(lambda x : int(pd.to_datetime(x).weekday()) # MON == 0
Pretty seaborn bar chart, differentiate data by color
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import as style
f, ax1 = plt.subplots(1, 1, figsize=(20, 7), sharex=True)
df_mth_plot = pd.DataFrame(list(monthly_growth_dict.items()), columns=['Market', 'Average Monthly Growth Rate %'])
df_mth_plot = df_mth_plot.sort_values('Average Monthly Growth Rate %', ascending=False)
import pyodbc
import pandas as pd
conn = pyodbc.connect(
r'DRIVER={ODBC Driver 17 for SQL Server};'
