AnupJoseph

## App.svelte
<script>
  import { json } from "d3";

  import Marks from "./Marks.svelte";

  let dataset = [];
  json(
    "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/world.geojson"
  ).then((data) => {
    dataset = data.features;

## Axis.svelte
<script>
  import { select, selectAll } from "d3-selection";
  import { axisBottom, axisLeft } from "d3-axis";
  import { timeFormat } from "d3-time-format";

  export let innerHeight;
  export let margin;
  export let position;
  export let scale;

## kaggle_size_income.jl
Gadfly.with_theme(:dark) do
	set_default_plot_size(30cm ,15cm)
	size_col,income_col = "What is the size of the company where you are employed?","What is your current yearly compensation (approximate \$USD)?"
	size_df = select(dataset,[size_col,income_col])

	filter!(size_col => x->!ismissing(x),size_df)
	filter!(income_col => x->!ismissing(x),size_df)


	size_df[!,:median_income] = build_median_income.(size_df[!,income_col])

## kaggle_age_income_graph.jl
Gadfly.with_theme(:dark) do
	income_col,exp_col =
	"What is your current yearly compensation (approximate \$USD)?","For how many years have you been writing code and/or programming?"
	income_subset = select(dataset,[income_col,exp_col])

	# Clean out all the missing entries as they are way too hard to deal here
	filter!(exp_col => x->!ismissing(x),income_subset)
	filter!(income_col => x->!ismissing(x),income_subset)
	income_exp_counts = combine(
		groupby(income_subset

## kaggle_age_income_preprocessors.jl
function build_median_income(x)
	x =strip(x,['>','\$','<'])
	if occursin("-",x)
		low,high = split(x,"-")
		low = parse(Int,replace(low,","=>""))
		high = parse(Int,replace(high,","=>""))

		return (low+high)/2
	else
		x = parse(Int,replace(x,","=>""))

## kaggle_experience_plot.jl
Gadfly.with_theme(:dark) do
	programming_exp = "For how many years have you been writing code and/or programming?"
	experience_subset = select(dataset,programming_exp)

	# Filter missing variables and count the number of respondents per each group
	filter!(programming_exp => x->!ismissing(x),experience_subset)
	experience_counts = combine(
		groupby(experience_subset,programming_exp),
		nrow=>:num_counts
	)

## kaggle_age_gender_plot.jl
Gadfly.with_theme(:dark) do

	# Subset the data with only rows I need
	gender,age = names(dataset)[3],names(dataset)[2]
	age_with_gender_data = dataset[!,2:3]

	# grouby age and gender and then count
	age_and_gender_counts = combine(
		groupby(
			age_with_gender_data,[gender,age])

## kaggle_barplot.jl
Gadfly.with_theme(:dark) do
	age_counts = countmap(dataset[!,2])


	custom_order = ["18-21","22-24","25-29","30-34", "35-39","40-44","45-49","50-54" ,"55-59","60-69" , "70+"]
	age_order = OrderedDict()

	# Using the above custom order to make a custom dictionary
	for item in custom_order
		age_order[item] = age_counts[item]

## kaggle_countplot.jl
Gadfly.with_theme(:dark) do   # Dark theme cause dark theme looks great with pluto
	set_default_plot_size(25cm ,15cm)

	# Think if this as pandas value_counts function
	value_counts = sort(countmap(dataset[!,4]),byvalue=true)
	states = collect(keys(value_counts))


	# Going to use a dotplot for the visual
	# Using a log scale here as the graph's boring otherwise

## TIS.py
import time
from collections import namedtuple

import gi
import re
import numpy
from enum import Enum
gi.require_version("Gst", "1.0")
gi.require_version("Tcam", "1.0")
	<script>
	import { json } from "d3";

	import Marks from "./Marks.svelte";

	let dataset = [];
	json(
	"https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/world.geojson"
	).then((data) => {
	dataset = data.features;
	<script>
	import { select, selectAll } from "d3-selection";
	import { axisBottom, axisLeft } from "d3-axis";
	import { timeFormat } from "d3-time-format";

	export let innerHeight;
	export let margin;
	export let position;
	export let scale;
	Gadfly.with_theme(:dark) do
	set_default_plot_size(30cm ,15cm)
	size_col,income_col = "What is the size of the company where you are employed?","What is your current yearly compensation (approximate \$USD)?"
	size_df = select(dataset,[size_col,income_col])

	filter!(size_col => x->!ismissing(x),size_df)
	filter!(income_col => x->!ismissing(x),size_df)


	size_df[!,:median_income] = build_median_income.(size_df[!,income_col])
	Gadfly.with_theme(:dark) do
	income_col,exp_col =
	"What is your current yearly compensation (approximate \$USD)?","For how many years have you been writing code and/or programming?"
	income_subset = select(dataset,[income_col,exp_col])

	# Clean out all the missing entries as they are way too hard to deal here
	filter!(exp_col => x->!ismissing(x),income_subset)
	filter!(income_col => x->!ismissing(x),income_subset)
	income_exp_counts = combine(
	groupby(income_subset
	function build_median_income(x)
	x =strip(x,['>','\$','<'])
	if occursin("-",x)
	low,high = split(x,"-")
	low = parse(Int,replace(low,","=>""))
	high = parse(Int,replace(high,","=>""))

	return (low+high)/2
	else
	x = parse(Int,replace(x,","=>""))
	Gadfly.with_theme(:dark) do
	programming_exp = "For how many years have you been writing code and/or programming?"
	experience_subset = select(dataset,programming_exp)

	# Filter missing variables and count the number of respondents per each group
	filter!(programming_exp => x->!ismissing(x),experience_subset)
	experience_counts = combine(
	groupby(experience_subset,programming_exp),
	nrow=>:num_counts
	)
	Gadfly.with_theme(:dark) do

	# Subset the data with only rows I need
	gender,age = names(dataset)[3],names(dataset)[2]
	age_with_gender_data = dataset[!,2:3]

	# grouby age and gender and then count
	age_and_gender_counts = combine(
	groupby(
	age_with_gender_data,[gender,age])
	Gadfly.with_theme(:dark) do
	age_counts = countmap(dataset[!,2])


	custom_order = ["18-21","22-24","25-29","30-34", "35-39","40-44","45-49","50-54" ,"55-59","60-69" , "70+"]
	age_order = OrderedDict()

	# Using the above custom order to make a custom dictionary
	for item in custom_order
	age_order[item] = age_counts[item]
	Gadfly.with_theme(:dark) do # Dark theme cause dark theme looks great with pluto
	set_default_plot_size(25cm ,15cm)

	# Think if this as pandas value_counts function
	value_counts = sort(countmap(dataset[!,4]),byvalue=true)
	states = collect(keys(value_counts))


	# Going to use a dotplot for the visual
	# Using a log scale here as the graph's boring otherwise
	import time
	from collections import namedtuple

	import gi
	import re
	import numpy
	from enum import Enum
	gi.require_version("Gst", "1.0")
	gi.require_version("Tcam", "1.0")