Skip to content

Instantly share code, notes, and snippets.

Avatar
🏠
Working from home

Khuyen Tran khuyentran1401

🏠
Working from home
View GitHub Profile
View bambi.py
import arviz as az
import bambi as bmb
# Convert to string
sample1.YearsCoding = sample1.YearsCoding.astype(str)
# Build model
model1 = bmb.Model("ConvertedSalary ~ YearsCoding", sample1)
result1 = model1.fit(random_seed=0)
View visualize.py
sns.boxplot(data=years_sorted, x="YearsCoding", y="ConvertedSalary")
plt.show()
View convert.py
def str_to_interval(text: str):
if isinstance(text, str):
years = text.split()[0].split("-")
if len(years) == 2:
return pd.Interval(int(years[0]), int(years[1]))
else:
return pd.Interval(int(years[0]), float("inf"))
return text
View plot.py
# Group by salary range
bins = [0, 25_000, 50_000, 75_000, 100_000, 125_000, 150_000, 300_000]
normal_salary["Salary Range"] = pd.cut(
normal_salary.dropna(subset=["ConvertedSalary"])["ConvertedSalary"], bins=bins
)
salary_groupby = normal_salary.groupby("Salary Range").agg({"Respondent": "count"})
# Create a bar plot
View filter.py
normal_salary = sample[sample["ConvertedSalary"] < 300_000].copy()
View filter.py
sample = df.query(
"Country == 'United States' & Employment == 'Employed full-time' & Student == 'No'"
)
View get_data.py
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("survey_results_public.csv")
View plot_interval.py
# Plot Data
plt.scatter(mat, por, label="data")
x_range = np.linspace(min(mat), max(mat), 2000)
y_pred = fitted.posterior.mat.values.mean() * x_range + fitted.posterior.Intercept.values.mean()
# Plot mean prediction
plt.plot(x_range, y_pred,color="black", label="Mean regression line")
# Plot HDIs
View intercept.py
gauss_fitted.posterior.Intercept.values.mean()