Anand Nalya anandnalya

## server.py
#!/usr/bin/env python3
"""
License: MIT License
Copyright (c) 2023 Miel Donkers

Very simple HTTP server in python for logging requests
Usage::
    ./server.py [<port>]
"""
from http.server import BaseHTTPRequestHandler, HTTPServer

## install.sh
# Ask for the user password
# Script only works if sudo caches the password for a few minutes
sudo true

# Install kernel extra's to enable docker aufs support
# sudo apt-get -y install linux-image-extra-$(uname -r)

# Add Docker PPA and install latest version
# sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9
# sudo sh -c "echo deb https://get.docker.io/ubuntu docker main > /etc/apt/sources.list.d/docker.list"

## gist:8172796

      
              1 file
            
          
              403 forks
            
          
              23 comments
            
          
              1642 stars
            
          
                debasishg
                / gist:8172796
            
            
              Last active
              March 15, 2024 15:05
            
              
                A collection of links for streaming algorithms and data structures
              
          
    General Background and Overview


Probabilistic Data Structures for Web Analytics and Data Mining : A great overview of the space of probabilistic data structures and how they are used in approximation algorithm implementation.
Models and Issues in Data Stream Systems
Philippe Flajolet’s contribution to streaming algorithms : A presentation by Jérémie Lumbroso that visits some of the hostorical perspectives and how it all began with Flajolet
Approximate Frequency Counts over Data Streams by Gurmeet Singh Manku & Rajeev Motwani : One of the early papers on the subject.
[Methods for Finding Frequent Items in Data Streams](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.187.9800&amp;rep=rep1&amp;t


## zook_grow.md

      
              1 file
            
          
              20 forks
            
          
              15 comments
            
          
              89 stars
            
          
                miketheman
                / zook_grow.md
            
            
              Created
              July 22, 2013 21:36
            
              
                Adding nodes to a ZooKeeper ensemble
              
          
    Adding 2 nodes to an existing 3-node ZooKeeper ensemble without losing the Quorum

Since many deployments may start out with 3 nodes and so little is known about how to grow a cluster from 3 memebrs to 5 members without losing the existing Quorum, here is an example of how this might be achieved.
In this example, all 5 nodes will be running on the same Vagrant host for the purpose of illustration, running on distinct configurations (ports and data directories) without the actual load of clients.
YMMV. Caveat usufructuarius.
Step 1: Have a healthy 3-node ensemble


## dim_calendar.hql

set hivevar:start_date=0000-01-01;
set hivevar:days=1000000;
set hivevar:table_name=[INSERT YOUR TABLE NAME HERE];

-- If you are running a version of HIVE prior to 1.2, comment out all uses of date_format() and uncomment the lines below for equivalent functionality

CREATE TABLE IF NOT EXISTS ${table_name} AS
WITH dates AS (
    SELECT date_add("${start_date}", a.pos) as date

## backup.sh
#!/bin/bash
# herein we backup our indexes! this script should run at like 6pm or something, after logstash
# rotates to a new ES index and theres no new data coming in to the old one. we grab metadatas,
# compress the data files, create a restore script, and push it all up to S3.
TODAY=`date +"%Y.%m.%d"`
INDEXNAME="logstash-$TODAY" # this had better match the index name in ES
INDEXDIR="/usr/local/elasticsearch/data/logstash/nodes/0/indices/"
BACKUPCMD="/usr/local/backupTools/s3cmd --config=/usr/local/backupTools/s3cfg put"
BACKUPDIR="/mnt/es-backups/"
YEARMONTH=`date +"%Y-%m"`

## TRUNCATE and DROP are both minimally logged.sql
SET NOCOUNT ON;
USE [tempdb];

CREATE TABLE a_farting_farthing (
	  an_integer	INT	DEFAULT (1)
);

INSERT INTO a_farting_farthing
DEFAULT VALUES;

## backup.sh
# TO_FOLDER=/something
# FROM=/your-es-installation

DATE=`date +%Y-%m-%d_%H-%M`
TO=$TO_FOLDER/$DATE/
echo "rsync from $FROM to $TO"
# the first times rsync can take a bit long - do not disable flusing
rsync -a $FROM $TO

# now disable flushing and do one manual flushing

## backup.sh
# Script to be placed in elasticsearch/bin
# Launch it from elasticsearch dir
# bin/backup indexname
# We suppose that data are under elasticsearch/data
# It will create a backup file under elasticsearch/backup

if [ -z "$1" ]; then
  INDEX_NAME="dummy"
else
  INDEX_NAME=$1

## log_backup.bash
#!/usr/bin/env bash

###############FUNCTIONS############

function prepare {
    #optimize the index
    echo -n "Optimizing index $INDEX_NAME..."
    curl -XPOST "$ADDRESS/$INDEX_NAME/_optimize" 2>/dev/null| grep 'failed":0' >/dev/null
    if [ $? -eq 0 ]; then
        echo "done"
	#!/usr/bin/env python3
	"""
	License: MIT License
	Copyright (c) 2023 Miel Donkers

	Very simple HTTP server in python for logging requests
	Usage::
	./server.py [<port>]
	"""
	from http.server import BaseHTTPRequestHandler, HTTPServer
	# Ask for the user password
	# Script only works if sudo caches the password for a few minutes
	sudo true

	# Install kernel extra's to enable docker aufs support
	# sudo apt-get -y install linux-image-extra-$(uname -r)

	# Add Docker PPA and install latest version
	# sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9
	# sudo sh -c "echo deb https://get.docker.io/ubuntu docker main > /etc/apt/sources.list.d/docker.list"

	set hivevar:start_date=0000-01-01;
	set hivevar:days=1000000;
	set hivevar:table_name=[INSERT YOUR TABLE NAME HERE];

	-- If you are running a version of HIVE prior to 1.2, comment out all uses of date_format() and uncomment the lines below for equivalent functionality

	CREATE TABLE IF NOT EXISTS ${table_name} AS
	WITH dates AS (
	SELECT date_add("${start_date}", a.pos) as date
	#!/bin/bash
	# herein we backup our indexes! this script should run at like 6pm or something, after logstash
	# rotates to a new ES index and theres no new data coming in to the old one. we grab metadatas,
	# compress the data files, create a restore script, and push it all up to S3.
	TODAY=`date +"%Y.%m.%d"`
	INDEXNAME="logstash-$TODAY" # this had better match the index name in ES
	INDEXDIR="/usr/local/elasticsearch/data/logstash/nodes/0/indices/"
	BACKUPCMD="/usr/local/backupTools/s3cmd --config=/usr/local/backupTools/s3cfg put"
	BACKUPDIR="/mnt/es-backups/"
	YEARMONTH=`date +"%Y-%m"`
	SET NOCOUNT ON;
	USE [tempdb];

	CREATE TABLE a_farting_farthing (
	an_integer INT DEFAULT (1)
	);

	INSERT INTO a_farting_farthing
	DEFAULT VALUES;
	# TO_FOLDER=/something
	# FROM=/your-es-installation

	DATE=`date +%Y-%m-%d_%H-%M`
	TO=$TO_FOLDER/$DATE/
	echo "rsync from $FROM to $TO"
	# the first times rsync can take a bit long - do not disable flusing
	rsync -a $FROM $TO

	# now disable flushing and do one manual flushing
	# Script to be placed in elasticsearch/bin
	# Launch it from elasticsearch dir
	# bin/backup indexname
	# We suppose that data are under elasticsearch/data
	# It will create a backup file under elasticsearch/backup

	if [ -z "$1" ]; then
	INDEX_NAME="dummy"
	else
	INDEX_NAME=$1
	#!/usr/bin/env bash

	###############FUNCTIONS############

	function prepare {
	#optimize the index
	echo -n "Optimizing index $INDEX_NAME..."
	curl -XPOST "$ADDRESS/$INDEX_NAME/_optimize" 2>/dev/null\| grep 'failed":0' >/dev/null
	if [ $? -eq 0 ]; then
	echo "done"