Skip to content

Instantly share code, notes, and snippets.

View satishkt's full-sized avatar

Satish Terala satishkt

  • Databricks
  • Boston, MA
View GitHub Profile
@satishkt
satishkt / gist:34335457086547824bf85646fabd271c
Created May 30, 2020 15:24
Install-Uninstall-Version-Change-JDK
brew update
brew tap adoptopenjdk/openjdk
brew search jdk
brew cask install adoptopenjdk11
brew cask install adoptopenjdk12
brew cask install adoptopenjdk13
# Check where Java is installed
/usr/libexec/java_home -V
Change Versions
@satishkt
satishkt / Pretty Print Named Tuples.py
Created May 3, 2019 19:39
Tuple prints as object address when print. This prints the attributrs.
def namedtuple_to_str(t, field_widths=15):
if isinstance(field_widths, int):
field_widths = [field_widths] * len(t._fields)
field_pairs = ['{}={}'.format(field, getattr(t, field)) for field in t._fields]
s = ' '.join('{{:{}}}'.format(w).format(f) for w,f in zip(field_widths, field_pairs))
result = '{}( {} )'.format(type(t).__name__, s)
return result
#Code snippets for Pandas
import pandas as pd
‘’’
Reading Files, Selecting Columns, and Summarizing
‘’’
# reading in a file from local computer or directly from a URL
# various file formats that can be read in out wrote out
‘’’
Format Type Data Description Reader Writer
text CSV read_csv to_csv
@satishkt
satishkt / notebook_imports.py
Created May 7, 2018 03:54
Notebook Code imports
__author__='satish'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as ply
from sklearn.feature_extraction.text import CountVectorizer,HashingVectorizer
from sklearn.cross_validation import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB,GaussianNB
from sklearn.linear_model import LogisticRegression,RidgeClassifier,Perceptron,PassiveAggressiveClassifier
from wordcloud import WordCloud
wordcloud = WordCloud().generate(text)
# Display the generated image:
# the matplotlib way:
import matplotlib.pyplot as plt
plt.imshow(wordcloud)
plt.axis("off")
# take relative word frequencies into account, lower max_font_size
wordcloud = WordCloud(max_font_size=40, relative_scaling=.5).generate(text)
plt.figure()
"""
Reads Google search history JSON files from the given directory.
Expected JSON format :
{"event":[
{"query":{"id":[{"timestamp_usec":"1135905619017279"}],"query_text":"XYZ"}},
{"query":{"id":[{"timestamp_usec":"1135903586447380"}],"query_text":"ABC"}},
]}
The folder containing the JSON files is stored in a config.ini file with the section
cuisine ingredients
cuisine_group
African 0.637300 0.637300
EastAsian 3.231764 3.231764
EasternEuropean 0.689805 0.689805
LatinAmerican 5.255916 5.255916
MiddleEastern 1.167780 1.167780
NorthAmerican 75.179693 75.179693
NorthernEuropean 0.452628 0.452628
SouthAsian 1.124328 1.124328
@satishkt
satishkt / food_data_1.py
Last active September 6, 2015 04:58
gist_to_load_clean_data_file
def clean_data(filenames):
dfs=[]
for filename in filenames:
epic_df = pd.read_csv(filename,names=['col'],header=None)
epic_df['cuisine']=epic_df['col'].apply(lambda x : x.split('\t')[0])
epic_df['ingredients'] = epic_df['col'].apply(lambda x:(',').join (x.split('\t')[1:]))
epic_df.drop('col',inplace=True,axis=1)
dfs.append(epic_df)
return dfs