Skip to content

Instantly share code, notes, and snippets.

chart = ctc.Bar("Cities")
chart.set_options(
labels=list(cities.index),
x_label='City',
y_label='Count',
colors=['#FFF1C5','#F7B7A3','#EA5F89','#9B3192','#57167E'],
)
chart.add_series('Count',list(cities['values']))
chart = ctc.Pie("Top 5 cities by the number of respondents")
chart.set_options(
labels=list(cities.index),
inner_radius=0.5,
colors=['#FFF1C5','#F7B7A3','#EA5F89','#9B3192','#57167E','#47B39C','#00529B'],
)
chart.add_series(list(cities['values']))
# Calling the load_javascript function when rendering chart first time.
chart = ctc.Pie("Gender of Respondents")
chart.set_options(
labels=list(gender.index),
inner_radius=0,
colors=['#FFF1C1','#F7B7A3','#EA5F89'],
)
chart.add_series(list(gender['values']))
# Calling the load_javascript function when rendering chart first time.
df = pd.DataFrame({
'Gender' : ['Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male','Male', 'Female','Male', 'Female'],
'Age' : [41, 49, 37, 33, 27, 32, 59, 30, 38, 36, 35, 29],
'EducationField': ['Life Sciences', 'Engineering', 'Life Sciences', 'Life Sciences', 'Medical', 'Life Sciences', 'Life Sciences', 'Life Sciences', 'Engineering', 'Medical', 'Life Sciences', 'Life Sciences'],
'MonthlyIncome': [5993, 5130, 2090, 2909, 3468, 3068, 2670, 2693, 9526, 5237, 2426, 4193]
})
df_Adelie = df[df['species'] == 'Adelie']
df_Gentoo = df[df['species'] == 'Gentoo']
df_Chinstrap = df[df['species'] == 'Chinstrap']
datasets = [df_Adelie,df_Gentoo,df_Chinstrap]
color = ['skyblue','red','orange']
zip_datasets_color = zip(datasets, color)
for d,c in zip_datasets_color:
g = sns.lmplot(x = 'culmen_length_mm',
y = 'culmen_depth_mm',
sns.lmplot(x = 'culmen_length_mm',y = 'culmen_depth_mm', data = df);
# For calculating correlation coefficient and superimposing on the plot
r = stats.pearsonr(df['culmen_length_mm'], df['culmen_depth_mm'])[0]
ax = plt.gca()
ax.text(.03, 1, 'r={:.3f}'.format(r),
transform=ax.transAxes)
#Displaying the plot
plt.show()
%matplotlib inline
from sklearn.metrics import roc_curve, precision_recall_curve, auc
import matplotlib.pyplot as plt
import numpy as np
def get_auc(labels, scores):
fpr, tpr, thresholds = roc_curve(labels, scores)
auc_score = auc(fpr, tpr)
df = pd.read_csv("diabetes.csv")
df.head()
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1
time_series = df.groupBy(F.year('datetime').alias('year'),
F.weekofyear('datetime').alias('week')) \
.agg(rf_agg_mean('ndvi').alias('mean_ndvi'))
ts_pd = time_series.toPandas()
#Visualizing using matplotlib
ts_pd.sort_values(['year', 'week'], inplace=True)
# Create a compact label of year and week number yyyy_ww
ts_pd['year_week'] = ts_pd.apply(lambda r:'{0:g}_{1:02g}'.format(r.year, r.week), axis=1)
df.select('red',
'nir',
'datetime',
'id',
rf_extent('red').alias('extent'),
rf_crs('red').alias('crs')) \
.filter(rf_no_data_cells(rf_with_no_data('red', 0)) < 800)
# show tiles that have lots of valid data