Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import csv
import os
import sys
directory = sys.argv[1]
outcomes_name = sys.argv[2]
outfile_name = sys.argv[3]
tempfile = "temp_mort"
o=open(tempfile, "w")
SSL support for Kafka has been committed upstream. To get this to work, build from trunk
mkdir -p /opt/kafka/security
mkdir /opt/kafka/security/x509
mkdir /opt/kafka/security/jks
mkdir /opt/kafka/security/CAcerts
chown -R kafka:kafka /opt/kafka/security
export JAVA_HOME=/opt/jdk1.7.0_71/
ublic class CreditAvroRecord<T extends IndexedRecord> {
private static String timingsFieldName = "timingsRec";
private static String tagsFieldName = "tags";
private static final Schema keyValueSchema = createKeyValueSchema();
private static final Schema timingsSchema = createTimingsSchema();
private static final Schema tagsSchema = createTagsSchema();
public static void setTimingsFieldName(String name) {
timingsFieldName = name;
@jholoman
jholoman / p.py
Last active October 10, 2015 13:43
avro-json to solr
import json
import sys
solrFields = []
copyFields = []
morphlinePaths = []
def getFieldString(columnName, columnType):
str = '<field name="%s" type="%s" indexed="true" stored="true" required="false" multiValued="false"/>' % (columnName, columnType)
return str
@jholoman
jholoman / restart_workers.py
Last active August 29, 2015 14:18
Restart Dead Spark Workers
#!/usr/bin/python
## **********************************************************************
## restart-workers.py
##
## Example of how to restart a dead worker node using the Spark UI
## and the Cloudera Manager API
##
## *******************************************************************************************
@jholoman
jholoman / login.sql
Created February 13, 2015 04:26
My Oracle Login.sql
set termout off
define _editor=vi
set serveroutput on size 100000 format wrapped
column object_name format a30
column segment_name format a30
column comments format a30
column file_name format a40
column name format a30
column file_name format a30
column what format a30 word_wrapped
SELECT Count(*)
FROM (SELECT C.gcrmsacronym,
C.isccp,
C.isosc,
C.isdealer,
C.issensitive,
C.isisdabigbang,
C.isisdasmallbang,
C.isprivateclientisda,
C.isprivateclientnonisda,
@jholoman
jholoman / rp.sh
Created February 4, 2015 16:14
repackage oozie share libs
#!/bin/sh
#
#
# This will search the parcels directory for
# jars contained in a packaged oozie share lib tarball
# for mistmatches and copy them to a new archive
# we always use the target version
#
if [ $# -lt 2 ];
then
import scala.collection.{mutable, Map}
/**
* Created by jholoman on 12/8/14.
*/
class ParseMapTest {
val traditional = "k1:v1,k2:v2"
val newMap = "k1:a:word:v1,k2:another:word:v2"
@jholoman
jholoman / cm_rsync
Last active August 29, 2015 14:08
CM Level DB rsyncs
#rsync_cm_data.bash
#!/bin/bash
LOGFILE=cm_data_sync.log
ADMIN_NODE1="myhost.mycompany.com"
ADMIN_NODE2="myhost2.mycompany.com"
CM_LIB_DIR="/cdh/hddata02/cloudera-scm"
while true