Skip to content

Instantly share code, notes, and snippets.

from data_requirements import DataRequirements
from data_source import DataSource
from sentiment_redshift import RedshiftRavenPackDataProvider, Equity
from pandas import to_datetime
import pandas as pd
from collections import namedtuple
from sqlalchemy import create_engine
import pytz
redshift = create_engine('redshift+psycopg2://masteruser:M4steruser@marketdata-test.cq0v4ljf3cuw.us-east-1.redshift.amazonaws.com:5439/dev', echo=True)
#!/usr/bin/env groovy
//@Grab('com.tinkerpop.blueprints:blueprints-core:2.6.0')
//import com.tinkerpop.blueprints.impls.tg.TinkerGraph
@Grab('com.thinkaurelius.titan:titan-all:0.5.4')
import com.thinkaurelius.titan.core.TitanFactory
import com.tinkerpop.blueprints.Vertex
@Grab('com.xlson.groovycsv:groovycsv:1.0')
import static com.xlson.groovycsv.CsvParser.parseCsv
import com.tinkerpop.blueprints.util.io.graphson.GraphSONWriter
_c0 network provider
2911078 ab ab
17048734 an an
12223 lr lr
197901828 ep lr
30923177 ad bk
138172797 bk bk
552494506 vw bk
---
- hosts: druid
sudo: yes
tasks:
- name: Create partitions
command: /sbin/parted --script /dev/{{ item }} mklabel gpt
command: /sbin/parted --script /dev/{{ item }} mkpart primary 0% 100%
with_items:
- sda
- sdb
day hour edge_name _c3
06 0000 batchimport 995,929,912
06 0000 dal 742,695,036
06 0000 dc 868,402,251
06 0000 multiscreen 48,844,550
06 0000 sea 479,333,938
06 0100 batchimport 107,987,677
06 0100 dal 87,756,429
06 0100 dc 98,111,624
06 0100 multiscreen 5,733,388
{
"Acrobatics": "Acrobatics",
"Aerobatics": "Aerobatics",
"Air Racing": "Air Racing",
"Alpine Skiing": "Alpine Skiing",
"American Handball": "American Handball",
"Archery": "Archery",
"Arena Football": "Arena Football",
"Arm Wrestling": "Arm Wrestling",
"Artistic Cycling": "Artistic Cycling",
#!/usr/bin/env ruby
require 'json'
require 'json/add/core'
require 'open-uri'
require 'pp'
api_results = JSON open('http://www.sport195.com/api/service/-/profiles/sports?per_page=10000').read
english_translations = JSON open('./english_translations.json').read
#!/usr/bin/env ruby
require 'colorize'
regions = ARGF.each_line.collect do |l|
l.chomp!
next if l.length < 1
l
end.compact
#!/usr/bin/env ruby
#
# HOW TO USE:
#
# $ gem install activerecord
# $ irb
# irb> require './oozie_db.rb'
# irb> coord = Coordinator.find("BIG-LONG-ID-C")
# irb> coord.coordinator_actions.map(&:missing_dependencies).select {|d| d != ""} # => Find all actions with missing dependencies
SET mapreduce.output.fileoutputformat.compress true
SET mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.SnappyCodec
SET mapreduce.output.fileoutputformat.compress.type BLOCK
SET pig.exec.mapPartAgg true
SET mapreduce.job.queuename testing
xfp_all = LOAD 'chris_295640.impression' USING org.apache.hcatalog.pig.HCatLoader();
xfp_partition = FILTER xfp_all BY year=='2013' AND month=='06' AND day=='19' AND hour=='20';
xfp = FILTER xfp_partition BY lineitemid != 0 AND time != 'Time';