manku-timma

## gist:27d18c2a29fee73b3f2e

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                manku-timma
                / gist:27d18c2a29fee73b3f2e
            
            
              Created
              July 8, 2014 03:20
            
              
                test
              
          
    Test Gist


test 1
test 2
test 3


## gist:8d3166229659540d39ba
#include <stdio.h>
#include <pthread.h>

long state = 0;
pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t state_cond = PTHREAD_COND_INITIALIZER;

void* compute(void* arg) {
    pthread_mutex_lock(&state_mutex);
    printf("child: %lu\n", state);

## gist:8267baf6773230f790cb
yum install inotify-tools
inotifywait -e create,delete,modify,move -mr /usr/local/appcito @/usr/local/appcito/logs/ @/usr/local/appcito/flume/

## gist:04cbd20483dc86904c64

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                manku-timma
                / gist:04cbd20483dc86904c64
            
            
              Last active
              August 29, 2015 14:03
            
              
                Tech reading - 2014-07-14 (Openflow, graphlab, google datacenter)
              
          
    Today I read about openflow usage in google. The summary is that when
failures are detected in a network, the nodes inform the central
controller. The CC computes new topologies and distributes to all
relevant nodes. So recovery is very fast.
I also started reading about Google's datacenter architecture. The basic
ideas are fascinating. The arch is throughtput oriented and not latency
oriented. But split-and-parallelize is used to improve latency. It helps
that the web search problem is inherently splittable and parallelizable.
Failures are handled in software.

  
## gist:c7fc9701230801a9fee8

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                manku-timma
                / gist:c7fc9701230801a9fee8
            
            
              Last active
              August 29, 2015 14:04
            
              
                How to install and use storm on centos 6.4
              
          
Download axel :)


Download and install mvn


Download storm from https://storm.incubator.apache.org/downloads.html


cd apache-storm-0.9.2-incubating/examples/storm-starter


mvn compile exec:java -Dstorm.topology=storm.starter.WordCountTopology


Lots of output tuples of the form


Emitting: count default [nature, 49]


mvn  compile exec:java -Dstorm.topology=storm.starter.BracketTopology  -Dexec.args="arg1"


Hadoop-mapreduce-implementation <-> storm implementation


## BracketTopology.java
package storm.starter;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;

## BracketTopology.java
package storm.starter;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;

## gist:d26e561c0937170d3ed4

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                manku-timma
                / gist:d26e561c0937170d3ed4
            
            
              Last active
              August 29, 2015 14:04
            
              
                Notes from the Hive paper 
              
          
    Just some initial notes from the Hive paper.

complex types are implemented by making all types implement SerDe and ObjectInspector interfaces. The same mechanism supports legacy data formats also. This seems cool.
insert, update, delete are not allowed. only creation of a new table is the available operation (along with reading)
Only equi-join is supported.
hourly loading of data into the warehouse is mentioned along with daily. So it seems to be an important use case. We should not assume only daily as the common case.
half the queries are adhoc and remaining half are for dashboards and reports and they are run against separate hive clusters (because adhoc queries have unpredictable usage)


## producer-consumer.c
#include <stdio.h>
#include <pthread.h>

#define MAX 1024

int shared_buffer[MAX];
int producer_index;
int consumer_index;
int num_items;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

## graph.h
#ifndef __GRAPH_H_INCLUDED__
#define __GRAPH_H_INCLUDED__

#define MAX_VERTICES 10
struct graph {
    int vertices[MAX_VERTICES];
    int edges[MAX_VERTICES][MAX_VERTICES];
    int visited[MAX_VERTICES];
};
	#include <stdio.h>
	#include <pthread.h>

	long state = 0;
	pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER;
	pthread_cond_t state_cond = PTHREAD_COND_INITIALIZER;

	void* compute(void* arg) {
	pthread_mutex_lock(&state_mutex);
	printf("child: %lu\n", state);
	yum install inotify-tools
	inotifywait -e create,delete,modify,move -mr /usr/local/appcito @/usr/local/appcito/logs/ @/usr/local/appcito/flume/
	package storm.starter;

	import backtype.storm.Config;
	import backtype.storm.LocalCluster;
	import backtype.storm.StormSubmitter;
	import backtype.storm.task.OutputCollector;
	import backtype.storm.task.TopologyContext;
	import backtype.storm.testing.TestWordSpout;
	import backtype.storm.topology.OutputFieldsDeclarer;
	import backtype.storm.topology.TopologyBuilder;
	#include <stdio.h>
	#include <pthread.h>

	#define MAX 1024

	int shared_buffer[MAX];
	int producer_index;
	int consumer_index;
	int num_items;
	pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
	#ifndef __GRAPH_H_INCLUDED__
	#define __GRAPH_H_INCLUDED__

	#define MAX_VERTICES 10
	struct graph {
	int vertices[MAX_VERTICES];
	int edges[MAX_VERTICES][MAX_VERTICES];
	int visited[MAX_VERTICES];
	};