- http://blog.cloudera.com/blog/2015/07/how-to-install-apache-zeppelin-on-cdh/
- https://ypg-data.github.io/post/2016/02/running-zeppelin-on-cdh/
sudo apt-get install node nodejs npm
/** | |
* | |
Find longest sequence of zeros in binary representation of an integer. | |
*/ | |
public class BinaryLongestZeroSequence { | |
/** | |
* worst-case time complexity is O(log(N)); | |
* number of bits = log(N) ==> worst case is O(N) | |
* @param N |
import time | |
__author__ = 'user' | |
import base64 | |
import json | |
from kafka import KafkaConsumer | |
from kafka import TopicPartition |
import time | |
from kafka import KafkaProducer | |
import json | |
import base64 | |
KAFKA_TOPIC = "scraped-data" | |
KAFKA_HOST = "localhost:9092" | |
producer = KafkaProducer(bootstrap_servers=KAFKA_HOST, value_serializer=lambda v: json.dumps(v).encode('utf-8')) |
sudo apt-get install node nodejs npm
load from file
val moviesDump = sc.textFile("hdfs://localhost:8020/user/datalake/movies/ml-latest/movies.csv")
case class Movie(movieId : Integer, title : String, genres : List[String])
val movies = moviesDump.map(s => s.split(",")).filter(s => s(0)!="movieId")
.map(
s => Movie(s(0).toInt,
s.slice(1, s.size-1).mkString(""),
GET _cluster/state?pretty
GET _search
{
"query": {
"match_all": {}
}
wikiagent.sources = spool | |
wikiagent.channels = memChannel | |
wikiagent.sinks = HDFS | |
# source config | |
wikiagent.sources.spool.type = spooldir | |
wikiagent.sources.spool.channels = memChannel | |
wikiagent.sources.spool.spoolDir = /home/ubuntu/datalake/processed |
//In the cell below, determine what is the most frequent CHARACTER in the README, and how many times was it used? | |
//spark and scala | |
var charCounts2 = readme.flatMap(line => line.toList). | |
filter( a => !a.equals("\n") && !a.equals(" ") && !a.equals("") ). | |
filter( _ != ' '). | |
map(character => (character, 1)). | |
reduceByKey((a,b) => a + b). | |
reduce((a, b) => if (a._2 > b._2) a else b) | |
//take(55). |
//example from https://courses.bigdatauniversity.com/courses/course-v1:BigDataUniversity+BD0212EN+2016/ exercises | |
val input1 = sc.textFile("data/trips/*") | |
val header1 = input1.first // to skip the header row | |
val trips = input1. | |
filter(_ != header1). | |
map(_.split(",")). | |
map(utils.Trip.parse(_)) |