lurenx

## install-azkaban.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                lurenx
                / install-azkaban.md
            
            
              Created
              May 14, 2016 08:17
                — forked from greenqy/install-azkaban.md
            
              
                install-azkaban.md
              
          
    azkaban 安装指南

参考


official documentation

下载


## HdfsReader.java
package com.hadoop.hdfs.reader;

import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

## producer.py
from pykafka import KafkaClient
import avro.schema
import io, random
from avro.io import DatumWriter

schema_path="user.avsc"
schema = avro.schema.parse(open(schema_path).read())

client = KafkaClient(hosts="127.0.0.1:9092")
topic = client.topics['avro']

## Consumer.py
from kafka import KafkaConsumer
import avro.schema
import avro.io
import io

# To consume messages
consumer = KafkaConsumer('my-topic',
                         group_id='my_group',
                         bootstrap_servers=['localhost:9092'])

## user.avsv
{"namespace": "example.avro",
 "type": "record",
 "name": "User",
 "fields": [
     {"name": "name", "type": "string"},
     {"name": "favorite_number",  "type": ["int", "null"]},
     {"name": "favorite_color", "type": ["string", "null"]}
 ]
}

## kafka.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                lurenx
                / kafka.md
            
            
              Created
              March 24, 2016 10:08
                — forked from ashrithr/kafka.md
            
              
                kafka introduction
              
          
    Introduction to Kafka

Kafka acts as a kind of write-ahead log (WAL) that records messages to a persistent store (disk) and allows subscribers to read and apply these changes to their own stores in a system appropriate time-frame.
Terminology:

Producers send messages to brokers
Consumers read messages from brokers
Messages are sent to a topic


## avro_kafka.rb
require 'poseidon'
require 'avro'
require 'json'

schema = Avro::Schema.parse(File.open("item.avsc", "rb").read)


dw = Avro::IO::DatumWriter.new(schema)
buffer = StringIO.new
encoder = Avro::IO::BinaryEncoder.new(buffer)

## items.avsc
{
 "type" : "record",
 "name" : "Item",
 "namespace" : "example.avro",
 "fields" : [
              {"name": "name", "type": "string"},
              {"name": "description", "type":["string", "null"]},
              {"name": "price", "type":["double", "null"]}
            ]
}

## deserialise_avro.rb
require 'rubygems'
require 'avro'

# Open items.avro file in read mode
file = File.open('items.avro', 'rb')

# Create an instance of DatumReader
reader = Avro::IO::DatumReader.new()

# Equivalent to DataFileReader instance creation in Java

## serialise_avro.rb
require 'rubygems'
require 'avro'

# Below line creates items.avro file if it is not present otherwise opens it in write mode
file = File.open('items.avro', 'wb')

# Opens item.avsc in read mode and parses the schema.
schema = Avro::Schema.parse(File.open("item.avsc", "rb").read)

# Creates DatumWriter instance with required schema.
	package com.hadoop.hdfs.reader;

	import java.io.BufferedOutputStream;
	import java.io.FileOutputStream;
	import java.io.InputStream;
	import java.io.OutputStream;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.conf.Configured;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	from pykafka import KafkaClient
	import avro.schema
	import io, random
	from avro.io import DatumWriter

	schema_path="user.avsc"
	schema = avro.schema.parse(open(schema_path).read())

	client = KafkaClient(hosts="127.0.0.1:9092")
	topic = client.topics['avro']
	from kafka import KafkaConsumer
	import avro.schema
	import avro.io
	import io

	# To consume messages
	consumer = KafkaConsumer('my-topic',
	group_id='my_group',
	bootstrap_servers=['localhost:9092'])
	{"namespace": "example.avro",
	"type": "record",
	"name": "User",
	"fields": [
	{"name": "name", "type": "string"},
	{"name": "favorite_number", "type": ["int", "null"]},
	{"name": "favorite_color", "type": ["string", "null"]}
	]
	}
	require 'poseidon'
	require 'avro'
	require 'json'

	schema = Avro::Schema.parse(File.open("item.avsc", "rb").read)


	dw = Avro::IO::DatumWriter.new(schema)
	buffer = StringIO.new
	encoder = Avro::IO::BinaryEncoder.new(buffer)
	{
	"type" : "record",
	"name" : "Item",
	"namespace" : "example.avro",
	"fields" : [
	{"name": "name", "type": "string"},
	{"name": "description", "type":["string", "null"]},
	{"name": "price", "type":["double", "null"]}
	]
	}
	require 'rubygems'
	require 'avro'

	# Open items.avro file in read mode
	file = File.open('items.avro', 'rb')

	# Create an instance of DatumReader
	reader = Avro::IO::DatumReader.new()

	# Equivalent to DataFileReader instance creation in Java
	require 'rubygems'
	require 'avro'

	# Below line creates items.avro file if it is not present otherwise opens it in write mode
	file = File.open('items.avro', 'wb')

	# Opens item.avsc in read mode and parses the schema.
	schema = Avro::Schema.parse(File.open("item.avsc", "rb").read)

	# Creates DatumWriter instance with required schema.