Skip to content

Instantly share code, notes, and snippets.

View jatinchauhann's full-sized avatar
💸
Working

Jatin Chauhan jatinchauhann

💸
Working
View GitHub Profile
@jatinchauhann
jatinchauhann / df_tables_hdfs_stasts.css
Created August 30, 2019 10:07
df_tables_hdfs_stasts.css for HDFS Stats
.mystyle {
font-size: 11pt;
font-family: Arial;
border-collapse: collapse;
border: 1px solid silver;
}
.mystyle td, th {
padding: 5px;
@jatinchauhann
jatinchauhann / driver_function_hdfs_stats.py
Created August 30, 2019 10:05
driver_function_hdfs_stats.py for HDFS Stats Tool
if __name__ == '__main__':
file_name = args.jsonconfig
css_file = args.cssfile
try:
spark = SparkSession.builder.enableHiveSupport().appName("HDFS Stats Generator").getOrCreate()
print(spark.version)
log=spark.sparkContext._jvm.org.apache.log4j.LogManager.getLogger(__name__)
def send_mail(franchise_name, send_from, send_to, subject, email_content, files,
server_host, server_port):
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = send_to
msg['Date'] = formatdate(localtime=True)
msg['Subject'] = subject
msg.preamble = """
Please open these reports using Google Chrome!"""
@jatinchauhann
jatinchauhann / generate_mail_hdfs_stats.py
Created August 30, 2019 10:02
generate_mail_hdfs_stats.py for HDFS Stats Tool
def generate_mail(db_json, db_names_hdfs, df_all_stats, df_db_name_dict_seg_stats, admin):
html_dump = ""
for db in db_names_hdfs:
# Function fingerprint: franchise_name, send_from, send_to, subject, email_content, files, server_host, server_port
if admin == False:
html_dump = get_html(df_all_stats, df_db_name_dict_seg_stats[db], db_json[db]["name"])
send_mail(db_json[db]["name"], db_json["admin"]["email"], db_json[db]["email"], db_json["mail_format"]["subject"], db_json["mail_format"]["body"], html_dump, db_json["admin"]["server_host"], db_json["admin"]["server_port"])
if admin == True:
html_dump = html_dump + get_html(df_all_stats, df_db_name_dict_seg_stats[db], db_json[db]["name"])
if admin == True:
@jatinchauhann
jatinchauhann / get_html_footer_hdfs_stats.py
Created August 30, 2019 09:54
get_html_footer_hdfs_stats.py for HDFS Stats Tool
@jatinchauhann
jatinchauhann / get_html_header_hdfs_stats.py
Created August 30, 2019 09:52
get_html_header_hdfs_stats.py for HDFS Stats Tool
def get_html_header(dataframe_all_stats, dataframe, franchise):
input_props = {
"db_name" : str(franchise).upper(),
"db_size" : str(get_total_size(dataframe)),
"db_all_size" : str(get_total_size(dataframe_all_stats)),
"hdfs_location_all" : get_common_location(dataframe_all_stats),
"hdfs_location" : get_common_location(dataframe),
"top_10_tables_style" : get_top_table(dataframe, 10),
"zerokb_tables_style" : get_zerokb_table(dataframe),
"style" : style
@jatinchauhann
jatinchauhann / get_html_header_hdfs_stats.py
Created August 30, 2019 09:52
get_html_header_hdfs_stats.py for HDFS Stats Tool
def get_html_header(dataframe_all_stats, dataframe, franchise):
input_props = {
"db_name" : str(franchise).upper(),
"db_size" : str(get_total_size(dataframe)),
"db_all_size" : str(get_total_size(dataframe_all_stats)),
"hdfs_location_all" : get_common_location(dataframe_all_stats),
"hdfs_location" : get_common_location(dataframe),
"top_10_tables_style" : get_top_table(dataframe, 10),
"zerokb_tables_style" : get_zerokb_table(dataframe),
"style" : style
@jatinchauhann
jatinchauhann / db_names.json
Created August 30, 2019 07:10
JSON Configuration File used in the HDFS Stats Tool available at my repository
{
"admin": {
"email": "jatin.chauhan@abc.com",
"all_stats_to_email": "jatin.chauhan@abc.com,kuldeepsingh.chauhan@abc.com",
"server_host": "your_smtp_host_name",
"server_port": "your_smtp_host_port_number"
},
"db_name_1": {
"name": "DB Name 1",
"email": "jatin.chauhan@abc.com"
@jatinchauhann
jatinchauhann / sample-machine-learning-classifier.py
Last active April 23, 2018 17:21
Writing Our First Classifier - Machine Learning Recipes #5 (Adapted from this video : https://www.youtube.com/watch?v=AoeEHqVSNOw&index=5&list=PLOU2XLYxmsIIuiBfYad6rFYQU_jL2ryal)
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.spatial import distance
def euc(a,b):
return distance.euclidean(a,b)
#Creating a sample classifier class
class ScrappyKNN():
@jatinchauhann
jatinchauhann / sample-mongo-commands.json
Created July 4, 2017 09:12
Mondo DB useful commands for beginners. Adapted from a YouTube Video - https://www.youtube.com/watch?v=pWbMrx5rVBE
//after installing mondodb
//cd to the 'bin' directory of the mongodb folder
//run the following commands in the cmd (run as administrator -for Windows)
mongod --directoryperdb --dbpath C:\mongodb\data\db --logpath C:\mongodb\log\mongo.log --logappend --rest --install
//to start the mongodb service
net start MongoDB
//to enter mongodb service (this is necessary to start the mongo service)
mongo