mmatkinson

## emr.py
#!/usr/bin/env python

EMR_COMMAND = os.path.expanduser('~/elastic-mapreduce/elastic-mapreduce')
EMR_LOGGING_DIR = "s3://songkick/emr-logs"

def create_pig_job_flow(pigscript, num_instances=1, extraArguments=[]):
  jobname = "Pig_Daily_" + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
  print "Creating pig job flow", jobname, pigscript
  args = [EMR_COMMAND,
    "--create",

## The Technical Interview Cheat Sheet.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                mdagost
                / The Technical Interview Cheat Sheet.md
            
            
              Created
              September 25, 2015 15:20
                — forked from tsiege/The Technical Interview Cheat Sheet.md
            
              
                This is my technical interview cheat sheet. Feel free to fork it or do whatever you want with it. PLEASE let me know if there are any errors or if anything crucial is missing. I will add more links soon.
              
          
    Studying for a Tech Interview Sucks, so Here's a Cheat Sheet to Help

This list is meant to be a both a quick guide and reference for further research into these topics.  It's basically a summary of that comp sci course you never took or forgot about, so there's no way it can cover everything in depth.  It also will be available as a gist on Github for everyone to edit and add to.
Data Structure Basics

###Array
####Definition:

Stores data elements based on an sequential, most commonly 0 based, index.
Based on tuples from set theory.


## gist:5d997f46f99180ab5910fc1122569783
#!/usr/local/bin/Rscript

'usage: my_prog.R [-a -r -m <msg>]

options:
-a        Add
-r        Remote
-m <msg>  Message' -> doc

library(docopt)

## s3_multipart_upload.py
#!/usr/bin/env python
"""Split large file into multiple pieces for upload to S3.

S3 only supports 5Gb files for uploading directly, so for larger CloudBioLinux
box images we need to use boto's multipart file support.

This parallelizes the task over available cores using multiprocessing.

Usage:
  s3_multipart_upload.py <file_to_transfer> <bucket_name> [<s3_key_name>]

## election-map-plots.R
library(XML)
library(uuid)
library(stringr)
library(plyr)
library(reshape2)
library(ggplot2)

f <- "https://raw.githubusercontent.com/chris-taylor/USElection/master/data/electoral-college-votes.csv"
electoral.college <- read.csv(f, header=FALSE)
names(electoral.college) <- c("state", "electoral_votes")

## how-to-make-a-racist-ai-without-really-trying.ipynb

      
              1 file
            
          
              38 forks
            
          
              9 comments
            
          
              228 stars
            
          
                rspeer
                / how-to-make-a-racist-ai-without-really-trying.ipynb
            
            
              Last active
              December 23, 2023 22:54
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## dash_simple_example_pandas_datareader.py
# See official docs at https://dash.plotly.com
# pip install dash pandas

from dash import Dash, dcc, html, Input, Output
import plotly.express as px

import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv')

## sublime_setup.md

      
              1 file
            
          
              22 forks
            
          
              20 comments
            
          
              95 stars
            
          
                barnes7td
                / sublime_setup.md
            
            
              Last active
              March 28, 2024 17:59
            
              
                Sublime Terminal Setup
              
          
    Setup Terminal for Sublime Shorcut "subl":

Open terminal and type:
1. Create a directory at ~/bin:
mkdir ~/bin
2. Copy sublime executable to your ~/bin directory:

  
## 00-about-search-api-examples.md

      
              7 files
            
          
              76 forks
            
          
              11 comments
            
          
              416 stars
            
          
                jasonrudolph
                / 00-about-search-api-examples.md
            
            
              Last active
              April 30, 2024 19:21
            
              
                5 entertaining things you can find with the GitHub Search API
              
          
    5 entertaining things you can find with the GitHub Search API

Let's have some command-line fun with curl, [jq][1], and the [new GitHub Search API][2].
Today we're looking for:

The hottest repositories created in the last week.
The oldest user accounts with zero followers. (So sad.)
Issue trends by language.
Clojure projects defying convention.


## postgres_queries_and_commands.sql
-- show running queries (pre 9.2)
SELECT procpid, age(clock_timestamp(), query_start), usename, current_query
FROM pg_stat_activity
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start desc;

-- show running queries (9.2)
SELECT pid, age(clock_timestamp(), query_start), usename, query
FROM pg_stat_activity
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'
	#!/usr/bin/env python

	EMR_COMMAND = os.path.expanduser('~/elastic-mapreduce/elastic-mapreduce')
	EMR_LOGGING_DIR = "s3://songkick/emr-logs"

	def create_pig_job_flow(pigscript, num_instances=1, extraArguments=[]):
	jobname = "Pig_Daily_" + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
	print "Creating pig job flow", jobname, pigscript
	args = [EMR_COMMAND,
	"--create",
	#!/usr/local/bin/Rscript

	'usage: my_prog.R [-a -r -m <msg>]

	options:
	-a Add
	-r Remote
	-m <msg> Message' -> doc

	library(docopt)
	#!/usr/bin/env python
	"""Split large file into multiple pieces for upload to S3.

	S3 only supports 5Gb files for uploading directly, so for larger CloudBioLinux
	box images we need to use boto's multipart file support.

	This parallelizes the task over available cores using multiprocessing.

	Usage:
	s3_multipart_upload.py <file_to_transfer> <bucket_name> [<s3_key_name>]
	library(XML)
	library(uuid)
	library(stringr)
	library(plyr)
	library(reshape2)
	library(ggplot2)

	f <- "https://raw.githubusercontent.com/chris-taylor/USElection/master/data/electoral-college-votes.csv"
	electoral.college <- read.csv(f, header=FALSE)
	names(electoral.college) <- c("state", "electoral_votes")
	# See official docs at https://dash.plotly.com
	# pip install dash pandas

	from dash import Dash, dcc, html, Input, Output
	import plotly.express as px

	import pandas as pd

	df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv')
	-- show running queries (pre 9.2)
	SELECT procpid, age(clock_timestamp(), query_start), usename, current_query
	FROM pg_stat_activity
	WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
	ORDER BY query_start desc;

	-- show running queries (9.2)
	SELECT pid, age(clock_timestamp(), query_start), usename, query
	FROM pg_stat_activity
	WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'