AnupJoseph

## kaggle_size_income.jl
Gadfly.with_theme(:dark) do
	set_default_plot_size(30cm ,15cm)
	size_col,income_col = "What is the size of the company where you are employed?","What is your current yearly compensation (approximate \$USD)?"
	size_df = select(dataset,[size_col,income_col])

	filter!(size_col => x->!ismissing(x),size_df)
	filter!(income_col => x->!ismissing(x),size_df)


	size_df[!,:median_income] = build_median_income.(size_df[!,income_col])

## kaggle_age_income_graph.jl
Gadfly.with_theme(:dark) do
	income_col,exp_col =
	"What is your current yearly compensation (approximate \$USD)?","For how many years have you been writing code and/or programming?"
	income_subset = select(dataset,[income_col,exp_col])

	# Clean out all the missing entries as they are way too hard to deal here
	filter!(exp_col => x->!ismissing(x),income_subset)
	filter!(income_col => x->!ismissing(x),income_subset)
	income_exp_counts = combine(
		groupby(income_subset

## kaggle_age_income_preprocessors.jl
function build_median_income(x)
	x =strip(x,['>','\$','<'])
	if occursin("-",x)
		low,high = split(x,"-")
		low = parse(Int,replace(low,","=>""))
		high = parse(Int,replace(high,","=>""))

		return (low+high)/2
	else
		x = parse(Int,replace(x,","=>""))

## kaggle_experience_plot.jl
Gadfly.with_theme(:dark) do
	programming_exp = "For how many years have you been writing code and/or programming?"
	experience_subset = select(dataset,programming_exp)

	# Filter missing variables and count the number of respondents per each group
	filter!(programming_exp => x->!ismissing(x),experience_subset)
	experience_counts = combine(
		groupby(experience_subset,programming_exp),
		nrow=>:num_counts
	)

## kaggle_age_gender_plot.jl
Gadfly.with_theme(:dark) do

	# Subset the data with only rows I need
	gender,age = names(dataset)[3],names(dataset)[2]
	age_with_gender_data = dataset[!,2:3]

	# grouby age and gender and then count
	age_and_gender_counts = combine(
		groupby(
			age_with_gender_data,[gender,age])

## kaggle_barplot.jl
Gadfly.with_theme(:dark) do
	age_counts = countmap(dataset[!,2])


	custom_order = ["18-21","22-24","25-29","30-34", "35-39","40-44","45-49","50-54" ,"55-59","60-69" , "70+"]
	age_order = OrderedDict()

	# Using the above custom order to make a custom dictionary
	for item in custom_order
		age_order[item] = age_counts[item]

## kaggle_countplot.jl
Gadfly.with_theme(:dark) do   # Dark theme cause dark theme looks great with pluto
	set_default_plot_size(25cm ,15cm)

	# Think if this as pandas value_counts function
	value_counts = sort(countmap(dataset[!,4]),byvalue=true)
	states = collect(keys(value_counts))


	# Going to use a dotplot for the visual
	# Using a log scale here as the graph's boring otherwise

## TIS.py
import time
from collections import namedtuple

import gi
import re
import numpy
from enum import Enum
gi.require_version("Gst", "1.0")
gi.require_version("Tcam", "1.0")

## dataset.csv

          
            Incident Date
            Total Number of Dead and Missing
            Coordinates

            
              Wed, 01/01/2020 - 12:00
              1
              45.59174847, 13.85420616

            
              Wed, 01/01/2020 - 12:00
              4
              19.45699612, 5.76462348

            
              Thu, 01/02/2020 - 12:00
              1
              31.76402, 14.03511

            
              Thu, 01/02/2020 - 12:00
              1
              20.0522119, -99.3442487

            
              Thu, 01/02/2020 - 12:00
              1
              32.35061, -112.33691

            
              Fri, 01/03/2020 - 12:00
              1
              32.44845, -113.11031

            
              Fri, 01/03/2020 - 12:00
              15
              36.59258427, 28.98970454

            
              Fri, 01/03/2020 - 12:00
              20
              16.69165509, 98.45030991

            
              Fri, 01/03/2020 - 12:00
              1
              22.78796, 5.52718

## App.svelte
<script>
  import { json } from "d3";

  import Marks from "./Marks.svelte";

  let dataset = [];
  json(
    "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/world.geojson"
  ).then((data) => {
    dataset = data.features;
	Gadfly.with_theme(:dark) do
	set_default_plot_size(30cm ,15cm)
	size_col,income_col = "What is the size of the company where you are employed?","What is your current yearly compensation (approximate \$USD)?"
	size_df = select(dataset,[size_col,income_col])

	filter!(size_col => x->!ismissing(x),size_df)
	filter!(income_col => x->!ismissing(x),size_df)


	size_df[!,:median_income] = build_median_income.(size_df[!,income_col])
	Gadfly.with_theme(:dark) do
	income_col,exp_col =
	"What is your current yearly compensation (approximate \$USD)?","For how many years have you been writing code and/or programming?"
	income_subset = select(dataset,[income_col,exp_col])

	# Clean out all the missing entries as they are way too hard to deal here
	filter!(exp_col => x->!ismissing(x),income_subset)
	filter!(income_col => x->!ismissing(x),income_subset)
	income_exp_counts = combine(
	groupby(income_subset
	function build_median_income(x)
	x =strip(x,['>','\$','<'])
	if occursin("-",x)
	low,high = split(x,"-")
	low = parse(Int,replace(low,","=>""))
	high = parse(Int,replace(high,","=>""))

	return (low+high)/2
	else
	x = parse(Int,replace(x,","=>""))
	Gadfly.with_theme(:dark) do
	programming_exp = "For how many years have you been writing code and/or programming?"
	experience_subset = select(dataset,programming_exp)

	# Filter missing variables and count the number of respondents per each group
	filter!(programming_exp => x->!ismissing(x),experience_subset)
	experience_counts = combine(
	groupby(experience_subset,programming_exp),
	nrow=>:num_counts
	)
	Gadfly.with_theme(:dark) do

	# Subset the data with only rows I need
	gender,age = names(dataset)[3],names(dataset)[2]
	age_with_gender_data = dataset[!,2:3]

	# grouby age and gender and then count
	age_and_gender_counts = combine(
	groupby(
	age_with_gender_data,[gender,age])
	Gadfly.with_theme(:dark) do
	age_counts = countmap(dataset[!,2])


	custom_order = ["18-21","22-24","25-29","30-34", "35-39","40-44","45-49","50-54" ,"55-59","60-69" , "70+"]
	age_order = OrderedDict()

	# Using the above custom order to make a custom dictionary
	for item in custom_order
	age_order[item] = age_counts[item]
	Gadfly.with_theme(:dark) do # Dark theme cause dark theme looks great with pluto
	set_default_plot_size(25cm ,15cm)

	# Think if this as pandas value_counts function
	value_counts = sort(countmap(dataset[!,4]),byvalue=true)
	states = collect(keys(value_counts))


	# Going to use a dotplot for the visual
	# Using a log scale here as the graph's boring otherwise
	import time
	from collections import namedtuple

	import gi
	import re
	import numpy
	from enum import Enum
	gi.require_version("Gst", "1.0")
	gi.require_version("Tcam", "1.0")
Incident Date	Total Number of Dead and Missing	Coordinates
Wed, 01/01/2020 - 12:00	1	45.59174847, 13.85420616
Wed, 01/01/2020 - 12:00	4	19.45699612, 5.76462348
Thu, 01/02/2020 - 12:00	1	31.76402, 14.03511
Thu, 01/02/2020 - 12:00	1	20.0522119, -99.3442487
Thu, 01/02/2020 - 12:00	1	32.35061, -112.33691
Fri, 01/03/2020 - 12:00	1	32.44845, -113.11031
Fri, 01/03/2020 - 12:00	15	36.59258427, 28.98970454
Fri, 01/03/2020 - 12:00	20	16.69165509, 98.45030991
Fri, 01/03/2020 - 12:00	1	22.78796, 5.52718
	<script>
	import { json } from "d3";

	import Marks from "./Marks.svelte";

	let dataset = [];
	json(
	"https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/world.geojson"
	).then((data) => {
	dataset = data.features;