Skip to content

Instantly share code, notes, and snippets.

Avatar
💭
Coding

AnupJoseph

💭
Coding
View GitHub Profile
View kaggle_size_income.jl
Gadfly.with_theme(:dark) do
set_default_plot_size(30cm ,15cm)
size_col,income_col = "What is the size of the company where you are employed?","What is your current yearly compensation (approximate \$USD)?"
size_df = select(dataset,[size_col,income_col])
filter!(size_col => x->!ismissing(x),size_df)
filter!(income_col => x->!ismissing(x),size_df)
size_df[!,:median_income] = build_median_income.(size_df[!,income_col])
View kaggle_age_income_graph.jl
Gadfly.with_theme(:dark) do
income_col,exp_col =
"What is your current yearly compensation (approximate \$USD)?","For how many years have you been writing code and/or programming?"
income_subset = select(dataset,[income_col,exp_col])
# Clean out all the missing entries as they are way too hard to deal here
filter!(exp_col => x->!ismissing(x),income_subset)
filter!(income_col => x->!ismissing(x),income_subset)
income_exp_counts = combine(
groupby(income_subset
View kaggle_age_income_preprocessors.jl
function build_median_income(x)
x =strip(x,['>','\$','<'])
if occursin("-",x)
low,high = split(x,"-")
low = parse(Int,replace(low,","=>""))
high = parse(Int,replace(high,","=>""))
return (low+high)/2
else
x = parse(Int,replace(x,","=>""))
View kaggle_experience_plot.jl
Gadfly.with_theme(:dark) do
programming_exp = "For how many years have you been writing code and/or programming?"
experience_subset = select(dataset,programming_exp)
# Filter missing variables and count the number of respondents per each group
filter!(programming_exp => x->!ismissing(x),experience_subset)
experience_counts = combine(
groupby(experience_subset,programming_exp),
nrow=>:num_counts
)
View kaggle_age_gender_plot.jl
Gadfly.with_theme(:dark) do
# Subset the data with only rows I need
gender,age = names(dataset)[3],names(dataset)[2]
age_with_gender_data = dataset[!,2:3]
# grouby age and gender and then count
age_and_gender_counts = combine(
groupby(
age_with_gender_data,[gender,age])
View kaggle_barplot.jl
Gadfly.with_theme(:dark) do
age_counts = countmap(dataset[!,2])
custom_order = ["18-21","22-24","25-29","30-34", "35-39","40-44","45-49","50-54" ,"55-59","60-69" , "70+"]
age_order = OrderedDict()
# Using the above custom order to make a custom dictionary
for item in custom_order
age_order[item] = age_counts[item]
View kaggle_countplot.jl
Gadfly.with_theme(:dark) do # Dark theme cause dark theme looks great with pluto
set_default_plot_size(25cm ,15cm)
# Think if this as pandas value_counts function
value_counts = sort(countmap(dataset[!,4]),byvalue=true)
states = collect(keys(value_counts))
# Going to use a dotplot for the visual
# Using a log scale here as the graph's boring otherwise
View TIS.py
import time
from collections import namedtuple
import gi
import re
import numpy
from enum import Enum
gi.require_version("Gst", "1.0")
gi.require_version("Tcam", "1.0")
@AnupJoseph
AnupJoseph / dataset.csv
Last active Dec 20, 2021
Migrant deaths dataset from missing migrants.com
View dataset.csv
Incident Date Total Number of Dead and Missing Coordinates
Wed, 01/01/2020 - 12:00 1 45.59174847, 13.85420616
Wed, 01/01/2020 - 12:00 4 19.45699612, 5.76462348
Thu, 01/02/2020 - 12:00 1 31.76402, 14.03511
Thu, 01/02/2020 - 12:00 1 20.0522119, -99.3442487
Thu, 01/02/2020 - 12:00 1 32.35061, -112.33691
Fri, 01/03/2020 - 12:00 1 32.44845, -113.11031
Fri, 01/03/2020 - 12:00 15 36.59258427, 28.98970454
Fri, 01/03/2020 - 12:00 20 16.69165509, 98.45030991
Fri, 01/03/2020 - 12:00 1 22.78796, 5.52718
View App.svelte
<script>
import { json } from "d3";
import Marks from "./Marks.svelte";
let dataset = [];
json(
"https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/world.geojson"
).then((data) => {
dataset = data.features;