Skip to content

Instantly share code, notes, and snippets.

View rustyrazorblade's full-sized avatar

Jon Haddad rustyrazorblade

View GitHub Profile
cat > ./policy-rc.d << EOF
#!/bin/sh
echo "All runlevel operations denied by policy" >&2
exit 101
EOF
jhaddad@rustyrazorblade ~$ ccm node1 cqlsh
Connected to 3.10 at 127.0.0.1:9042.
[cqlsh 5.0.1 | Cassandra 3.10 | CQL spec 3.4.4 | Native protocol v4]
Use HELP for help.
cqlsh> create KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
cqlsh> use test;
cqlsh:test> create table
...
cqlsh:test>
cqlsh:test> create table hello(
1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0
2|GoldenEye (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?GoldenEye%20(1995)|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0
3|Four Rooms (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995)|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0
4|Get Shorty (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995)|0|1|0|0|0|1|0|0|1|0|0|0|0|0|0|0|0|0|0
5|Copycat (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Copycat%20(1995)|0|0|0|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0
6|Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)|01-Jan-1995||http://us.imdb.com/Title?Yao+a+yao+yao+dao+waipo+qiao+(1995)|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0
7|Twelve Monkeys (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Twelve%20Monkeys%20(1995)|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0|1|0|0|0
8|Babe (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Babe%20(1995)|0|0|0|0|1|1|0|0|1|0|0|0|0|0|0|0|0
from pyspark.sql import SQLContext
sql = SQLContext(sc)
df = sql.read.format("org.apache.spark.sql.cassandra").load(keyspace="lens", table="raw_data")
df.write.format('parquet').save(hdfs_path)
@rustyrazorblade
rustyrazorblade / titan.groovy
Last active November 18, 2015 16:50
titan example
graph = TitanFactory.build().set('storage.backend', 'inmemory').open()
g = graph.traversal()
jcvd = graph.addVertex(label, "actor", "name", "jean claude van damme")
kick = graph.addVertex(label, "movie", "name", "Kickboxer", "year", 1989)
blood = graph.addVertex(label, "movie", "name", "Bloodsport", "year", 1988)
timecop = graph.addVertex(label, "movie", "name", "Timecop", "year", 1994)
inferno = graph.addVertex(label, "movie", "name", "Inferno","year", 1999)
sd = graph.addVertex(label, "movie", "name", "Sudden Death","year", 1995)
@rustyrazorblade
rustyrazorblade / gist:cb42d4aee0449233b463
Last active September 11, 2015 18:57
cassandra storage format notes
# https://github.com/apache/cassandra/blob/cassandra-3.0/src/java/org/apache/cassandra/io/sstable/format/big/BigTableWriter.java#L135
# UnfilteredRowIteratorSerializer
https://github.com/apache/cassandra/blob/cassandra-3.0/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java#L34-34
Partition Header: <key><flags><s_header>[<partition_deletion>][<static_row>][<row_estimate>]
Then the rows
https://github.com/apache/cassandra/blob/cassandra-3.0/src/java/org/apache/cassandra/db/rows/BufferCell.java#L200-200
from pyspark.sql import SQLContext
sql = SQLContext(sc)
mysql_movies = sql.read.jdbc("jdbc:mysql://127.0.0.1:3307/movielens?user=root", "movielens.movies")
mysql_movies.write.format("org.apache.spark.sql.cassandra").options(table="movies", keyspace="lens").save(mode="append")
@rustyrazorblade
rustyrazorblade / gist:9a38a9499a7531eefd1e
Last active August 29, 2015 14:27
failing migration
####### SPARK JOB #############
from pyspark import SparkContext
from pyspark.sql import SQLContext
sc = SparkContext()
sql = SQLContext(sc)
mysql_movies = sql.read.jdbc("jdbc:mysql://127.0.0.1:3307/movielens?user=root", "movielens.movies")
print mysql_movies.head()
tmp = mysql_movies.write.format("org.apache.spark.sql.cassandra").options(table="movie", keyspace="lens").save()
<std macros>:5:8: 6:42 error: mismatched types:
expected `()`,
found `core::result::Result<_, _>`
(expected (),
found enum `core::result::Result`) [E0308]
<std macros>:5 return $ crate:: result:: Result:: Err (
<std macros>:6 $ crate:: convert:: From:: from ( err ) ) } } )
<std macros>:1:1: 6:48 note: in expansion of try!
tests/traversals.rs:82:40: 82:65 note: expansion site
<std macros>:5:8: 6:42 help: run `rustc --explain E0308` to see a detailed explanation
@rustyrazorblade
rustyrazorblade / gist:959ad9473df74cea887b
Last active August 29, 2015 14:24
pyspark 1.4 with dataframes first gist on the planet
from pyspark.sql import SQLContext
sql = SQLContext(sc)
apd = sql.read.format("org.apache.spark.sql.cassandra").load(keyspace="labor", table="average_price_data")