Skip to content

Instantly share code, notes, and snippets.

View ks--ks's full-sized avatar
🎯
Focusing

Olga Berezovsky ks--ks

🎯
Focusing
View GitHub Profile
removed_by = grouped_df['removed_by'].tolist()
number_of_removed_posts = grouped_df['number_of_removed_posts'].tolist()
plt.figure(figsize=(12,8))
plt.ylabel("Number of deleted reddits")
plt.bar(removed_by, number_of_removed_posts)
plt.show()
q1 = """SELECT removed_by, count(distinct id)as number_of_removed_posts
FROM df
where removed_by is not null
group by removed_by
order by 2 desc """
grouped_df = ps.sqldf(q1, locals())
grouped_df
df.info()
df.describe() #or df.count()
df.score.describe()
df.score.median()
df = pd.read_csv('/kaggle/input/dataisbeautiful/r_dataisbeautiful_posts.csv')
df.head(5)
print(plt.style.available)
#plt.style.use('bmh') #setting up 'bmh' as "Bayesian Methods for Hackers" style sheet
plt.style.use('ggplot') #R ggplot style
@ks--ks
ks--ks / Pandas EDA.py
Created May 17, 2020 04:41
Blog article code
import numpy as np #linear algebra
import pandas as pd #data processing
import seaborn as sns #statistical graph package
import matplotlib.pyplot as plt #plot package for visualisations
import pandasql as ps #sql package
@ks--ks
ks--ks / ntile.sql
Last active January 11, 2019 04:31
PowerUser_Step3
, NTILE(100) OVER (ORDER BY recruit_score DESC, share_score DESC, days_share_score DESC, channel_share_score DESC)
@ks--ks
ks--ks / InfluenceScore.sql
Created January 11, 2019 04:20
PowerUser_Step2
, influence AS (
SELECT user_id
, recruit_score
, share_score
, days_share_score
, channel_share_score
, NTILE(100) OVER (ORDER BY recruit_score DESC, share_score DESC, days_share_score DESC, channel_share_score DESC)
FROM (
SELECT a.user_id
@ks--ks
ks--ks / ActionShares.sql
Created January 11, 2019 04:17
PowerUser_Step1
WITH
recruits AS (
SELECT recruiter_id AS user_id
, item_name
, COUNT(DISTINCT recruited_id) AS recruits
FROM recruit_table
WHERE created_at::DATE BETWEEN '2017-01-01'-180 AND '2017-01-08'
GROUP BY 1, 2
)