AshwinJay/bash-loop.sh

## bash-loop.sh
#! /bin/bash

a=(user1 pass1 user2 pass2)
for ((i=0; i<${#a[@]}; i+=2)); do
  echo "Loop 1: ${a[i]}: ${a[i+1]}"
done

a1=(user1 user2)
a2=(pass1 pass2)
for ((i=0; i<${#a1[@]}; i+=1)); do
  echo "Loop 2: ${a1[i]}: ${a2[i]} of ${#a1[@]}"
done

MAX_SERVERS=4
START_DW_PORT=9800
START_DW_ADMIN_PORT=9801
START_ES_HTTP_PORT=9200
START_EXPOSED_PORT=11000

EXPOSED_DW_PORT=()
EXPOSED_DW_ADMIN_PORT=()
EXPOSED_ES_HTTP_PORT=()
EXPOSED_ES_UNICAST_PORT=()

for ((i=0; i < $((MAX_SERVERS * 4));)); do
	EXPOSED_DW_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	EXPOSED_DW_ADMIN_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	EXPOSED_ES_HTTP_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	EXPOSED_ES_UNICAST_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
done
for ((i=0; i < MAX_SERVERS; i+=1)); do
  echo "${EXPOSED_DW_PORT[i]} ${EXPOSED_DW_ADMIN_PORT[i]} ${EXPOSED_ES_HTTP_PORT[i]} ${EXPOSED_ES_UNICAST_PORT[i]}"
done

## github-ops.sh
git fetch upstream
git rebase upstream/master

git fetch origin
//Rebase from my own fork
git rebase origin/map-merge
git push origin map-merge:map-merge

//map-merge-fresh is a new local branch with a clean history.
git push origin map-merge-fresh:map-merge --force

## lesson1.sh
# All these commands can be found on Mac/GNU Linux (Naturally) and Gow on Windows (https://github.com/bmatzelle/gow)

# Let's download a log file which has some Java stacktraces and exceptions.
# With Gow on Windows use: wget --no-check-certificate
wget https://issues.apache.org/jira/secure/attachment/12660187/dtest_sstablesplit_test-node1.log.txt
# List the directory contents.
ls -al

# Count the number of lines in the file.
wc -l dtest_sstablesplit_test-node1.log.txt
# Sample output
# 809 dtest_sstablesplit_test-node1.log.txt

# Search dtest_sstablesplit_test-node1.log.txt file for "WARN" text.
grep "WARN" dtest_sstablesplit_test-node1.log.txt
# Sample output
# WARN  [main] 2014-08-06 10:19:09,268 SystemKeyspace.java:696 - No host ID found, created a19e8911-e77c-4280-ae15-c7ed162e3a55 (Note: This should happen exactly once per node).
# WARN  [SharedPool-Worker-1] 2014-08-06 10:19:13,101 FBUtilities.java:359 - Trigger directory doesn't exist, please create it and try again.

# Search dtest_sstablesplit_test-node1.log.txt file for "ERROR" text.
grep "ERROR" dtest_sstablesplit_test-node1.log.txt
# Sample output
# ERROR [CompactionExecutor:2] 2014-08-06 10:20:04,790 CassandraDaemon.java:166 - Exception in thread Thread[CompactionExecutor:2,1,main]
# ERROR [CompactionExecutor:2] 2014-08-06 10:21:06,255 CassandraDaemon.java:166 - Exception in thread Thread[CompactionExecutor:2,1,main]

# Search dtest_sstablesplit_test-node1.log.txt file for "WARN" text and also case insensitive contains the string "worker".
grep "WARN" dtest_sstablesplit_test-node1.log.txt | grep -i "worker"
# WARN  [SharedPool-Worker-1] 2014-08-06 10:19:13,101 FBUtilities.java:359 - Trigger directory doesn't exist, please create it and try again.

# Search the given file using the regex which starts with a comma, folloed by 3 numbers, a space and some text ending with ".java".
# The matching text is colorized for easier readability.
grep -E ",[[:digit:]]{3} .*?\.java" --color dtest_sstablesplit_test-node1.log.txt

# Same as above but only prints the text that matched.
# Perfect for slicing and dicing with the pipe operator. More next.
grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt

# Refines the command above where we cut the already matched text using ' ' (space) as the delimiter.
# Then we pick the second field which contains the actual file name.
grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt | cut -d " " -f 2

# Now for some real "pipe" power!
# Grep, extract the file name and then sort the names. Using the sorted names, we pick the unique names.
# Not only do we pick the unique names but also the count.
# On top of that we also sort the results in descending order with the highest counts at the beginning.
# Similar to SQL "select name, count(*) from.. group by name, order by count(*) desc".
grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt | cut -d " " -f 2 | sort | uniq -c | sort -r
# Sample output
#  234 StatusLogger.java
#  136 Memtable.java
#  131 ColumnFamilyStore.java
#   51 CompactionTask.java
#   42 CommitLogReplayer.java
#   33 SSTableReader.java
#   31 CassandraDaemon.java
#   30 YamlConfigurationLoader.java
#   21 StorageService.java
#   12 CacheService.java
#   11 DatabaseDescriptor.java
#    8 ThriftServer.java
#    8 Server.java
#    7 MessagingService.java
#    5 GCInspector.java
#    5 DefsTables.java
#    5 CommitLog.java
#    3 MigrationManager.java
#    3 IndexSummaryManager.java
#    3 CLibrary.java
#    2 Gossiper.java
#    2 CompactionManager.java
#    1 SystemKeyspace.java
#    1 FBUtilities.java

# In case you were wondering if I appended each line with "# " manually to print it here, no! I used
# another extremely powerful tool called "sed" but that is for another time (Maybe your homework).
grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt | cut -d " " -f 2 | sort | uniq -c | sort -r | sed 's/^/# /'

# Let's download another slightly larger file that is already zipped.
# With Gow on Windows use: wget --no-check-certificate
wget https://issues.apache.org/jira/secure/attachment/12707043/system.log.1.zip
# Unzip the file.
# This command is not available in Gow.
unzip system.log.1.zip
# List the contents in reverse chronological order with file sizes in human readable format.
ls -altrh

# Other sample log files can be found at  https://issues.apache.org/jira/browse/CASSANDRA-8390?jql=issuetype%20%3D%20Bug%20AND%20%22Attachment%20count%22%20%3C%3D%20%2210%22%20AND%20%22Attachment%20count%22%20%3E%3D%20%221%22%20AND%20text%20~%20%22zip%20exception%20cassandra%22

# Examine the new file using the same method described above.
# Use "gsort" instead of "sort" with Gow.
grep -E ",[[:digit:]]{3} .*?\.java" -o system.log.2015-03-24_2144 | cut -d " " -f 2 | sort | uniq -c | sort -r

# Prepare the summaries of these 2 files. Just the unique, sorted file names that appear in each line of the log file.
# Store the contents into "small.txt".
grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt | cut -d " " -f 2 | sort | uniq > small.txt
# Store the bigger file's contents into "big.txt".
grep -E ",[[:digit:]]{3} .*?\.java" -o system.log.2015-03-24_2144 | cut -d " " -f 2 | sort | uniq > big.txt

ls -alt

# Compare the 2 sorted file contents, line by line.
# Output format is 3 columns that are tab separated
# Line appears in the first file only TAB Appears in second file only TAB Both files.
# Sadly "comm" is not available in Gow.
comm small.txt big.txt

# Replace the tab characters with comma and save it to a CSV file so that it can be opened in Excel.
comm small.txt big.txt | tr '\t' ',' > combo.csv

# A slightly less "exportable" alternative that works on Gow and of course Unix.
diff -y small.txt big.txt

# Some other goodies.
# (When using Gow use "gfind" instead of "find")

# List all file and directory names recursively in the current directory.
find .

# List all file names recursively in the current directory.
find . -type f

# List all names recursively in the current directory that match the wildcard "*log*.
# There is a big difference between wildcard and regex.
# For some reason, this does not work on Gow: https://github.com/bmatzelle/gow/issues/177
find . -name "*log*"
# Sample output
# ./dtest_sstablesplit_test-node1.log.txt
# ./system.log.1.zip
# ./system.log.2015-03-24_2144

# This matches a regular expression (more powerful) where names have "log" in them and do not end with the ".zip" extension.
# On Windows use: gfind . -regex ".*log.*[^.zip]"
find . -regex ".*log.*[^.zip]"
# Sample output
# ./dtest_sstablesplit_test-node1.log.txt
# ./system.log.2015-03-24_2144

# This demonstrats another very powerful tool called "xargs" which runs the comman "grep --color Exception" against each
# line produced by the command behind the pipe (|).
# Which means that it runs the grep command on the file names that were filtered by the find command.
find . -regex ".*log.*[^.zip]" | xargs grep --color Exception

# Similar to the one above but makes grep simply count the matches in each file.
find . -regex ".*log.*[^.zip]" | xargs grep -c Exception
# Sample output
# ./dtest_sstablesplit_test-node1.log.txt:4
# ./system.log.2015-03-24_2144:329

# An extension of the command above where only specific files found by "find" are are scanned by grep and
# their matching counts are then sorted and grouped by their counts.
# This is a great way to summarize the contents of a large number of log files without having to
# manually dredge through them.
find . -regex ".*log.*[^.zip]" | xargs grep -E "Caused by: .*: " -o | sort | uniq -c
# Sample output
#  63 ./system.log.2015-03-24_2144:Caused by: java.io.IOException:
	#! /bin/bash

	a=(user1 pass1 user2 pass2)
	for ((i=0; i<${#a[@]}; i+=2)); do
	echo "Loop 1: ${a[i]}: ${a[i+1]}"
	done

	a1=(user1 user2)
	a2=(pass1 pass2)
	for ((i=0; i<${#a1[@]}; i+=1)); do
	echo "Loop 2: ${a1[i]}: ${a2[i]} of ${#a1[@]}"
	done

	MAX_SERVERS=4
	START_DW_PORT=9800
	START_DW_ADMIN_PORT=9801
	START_ES_HTTP_PORT=9200
	START_EXPOSED_PORT=11000

	EXPOSED_DW_PORT=()
	EXPOSED_DW_ADMIN_PORT=()
	EXPOSED_ES_HTTP_PORT=()
	EXPOSED_ES_UNICAST_PORT=()

	for ((i=0; i < $((MAX_SERVERS * 4));)); do
	EXPOSED_DW_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	EXPOSED_DW_ADMIN_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	EXPOSED_ES_HTTP_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	EXPOSED_ES_UNICAST_PORT+=($((START_EXPOSED_PORT + i)))
	i=$((i+1))
	done
	for ((i=0; i < MAX_SERVERS; i+=1)); do
	echo "${EXPOSED_DW_PORT[i]} ${EXPOSED_DW_ADMIN_PORT[i]} ${EXPOSED_ES_HTTP_PORT[i]} ${EXPOSED_ES_UNICAST_PORT[i]}"
	done
	git fetch upstream
	git rebase upstream/master

	git fetch origin
	//Rebase from my own fork
	git rebase origin/map-merge
	git push origin map-merge:map-merge

	//map-merge-fresh is a new local branch with a clean history.
	git push origin map-merge-fresh:map-merge --force
	# All these commands can be found on Mac/GNU Linux (Naturally) and Gow on Windows (https://github.com/bmatzelle/gow)

	# Let's download a log file which has some Java stacktraces and exceptions.
	# With Gow on Windows use: wget --no-check-certificate
	wget https://issues.apache.org/jira/secure/attachment/12660187/dtest_sstablesplit_test-node1.log.txt
	# List the directory contents.
	ls -al

	# Count the number of lines in the file.
	wc -l dtest_sstablesplit_test-node1.log.txt
	# Sample output
	# 809 dtest_sstablesplit_test-node1.log.txt

	# Search dtest_sstablesplit_test-node1.log.txt file for "WARN" text.
	grep "WARN" dtest_sstablesplit_test-node1.log.txt
	# Sample output
	# WARN [main] 2014-08-06 10:19:09,268 SystemKeyspace.java:696 - No host ID found, created a19e8911-e77c-4280-ae15-c7ed162e3a55 (Note: This should happen exactly once per node).
	# WARN [SharedPool-Worker-1] 2014-08-06 10:19:13,101 FBUtilities.java:359 - Trigger directory doesn't exist, please create it and try again.

	# Search dtest_sstablesplit_test-node1.log.txt file for "ERROR" text.
	grep "ERROR" dtest_sstablesplit_test-node1.log.txt
	# Sample output
	# ERROR [CompactionExecutor:2] 2014-08-06 10:20:04,790 CassandraDaemon.java:166 - Exception in thread Thread[CompactionExecutor:2,1,main]
	# ERROR [CompactionExecutor:2] 2014-08-06 10:21:06,255 CassandraDaemon.java:166 - Exception in thread Thread[CompactionExecutor:2,1,main]

	# Search dtest_sstablesplit_test-node1.log.txt file for "WARN" text and also case insensitive contains the string "worker".
	grep "WARN" dtest_sstablesplit_test-node1.log.txt \| grep -i "worker"
	# WARN [SharedPool-Worker-1] 2014-08-06 10:19:13,101 FBUtilities.java:359 - Trigger directory doesn't exist, please create it and try again.

	# Search the given file using the regex which starts with a comma, folloed by 3 numbers, a space and some text ending with ".java".
	# The matching text is colorized for easier readability.
	grep -E ",[[:digit:]]{3} .*?\.java" --color dtest_sstablesplit_test-node1.log.txt

	# Same as above but only prints the text that matched.
	# Perfect for slicing and dicing with the pipe operator. More next.
	grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt

	# Refines the command above where we cut the already matched text using ' ' (space) as the delimiter.
	# Then we pick the second field which contains the actual file name.
	grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt \| cut -d " " -f 2

	# Now for some real "pipe" power!
	# Grep, extract the file name and then sort the names. Using the sorted names, we pick the unique names.
	# Not only do we pick the unique names but also the count.
	# On top of that we also sort the results in descending order with the highest counts at the beginning.
	# Similar to SQL "select name, count() from.. group by name, order by count() desc".
	grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt \| cut -d " " -f 2 \| sort \| uniq -c \| sort -r
	# Sample output
	# 234 StatusLogger.java
	# 136 Memtable.java
	# 131 ColumnFamilyStore.java
	# 51 CompactionTask.java
	# 42 CommitLogReplayer.java
	# 33 SSTableReader.java
	# 31 CassandraDaemon.java
	# 30 YamlConfigurationLoader.java
	# 21 StorageService.java
	# 12 CacheService.java
	# 11 DatabaseDescriptor.java
	# 8 ThriftServer.java
	# 8 Server.java
	# 7 MessagingService.java
	# 5 GCInspector.java
	# 5 DefsTables.java
	# 5 CommitLog.java
	# 3 MigrationManager.java
	# 3 IndexSummaryManager.java
	# 3 CLibrary.java
	# 2 Gossiper.java
	# 2 CompactionManager.java
	# 1 SystemKeyspace.java
	# 1 FBUtilities.java

	# In case you were wondering if I appended each line with "# " manually to print it here, no! I used
	# another extremely powerful tool called "sed" but that is for another time (Maybe your homework).
	grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt \| cut -d " " -f 2 \| sort \| uniq -c \| sort -r \| sed 's/^/# /'

	# Let's download another slightly larger file that is already zipped.
	# With Gow on Windows use: wget --no-check-certificate
	wget https://issues.apache.org/jira/secure/attachment/12707043/system.log.1.zip
	# Unzip the file.
	# This command is not available in Gow.
	unzip system.log.1.zip
	# List the contents in reverse chronological order with file sizes in human readable format.
	ls -altrh

	# Other sample log files can be found at https://issues.apache.org/jira/browse/CASSANDRA-8390?jql=issuetype%20%3D%20Bug%20AND%20%22Attachment%20count%22%20%3C%3D%20%2210%22%20AND%20%22Attachment%20count%22%20%3E%3D%20%221%22%20AND%20text%20~%20%22zip%20exception%20cassandra%22

	# Examine the new file using the same method described above.
	# Use "gsort" instead of "sort" with Gow.
	grep -E ",[[:digit:]]{3} .*?\.java" -o system.log.2015-03-24_2144 \| cut -d " " -f 2 \| sort \| uniq -c \| sort -r

	# Prepare the summaries of these 2 files. Just the unique, sorted file names that appear in each line of the log file.
	# Store the contents into "small.txt".
	grep -E ",[[:digit:]]{3} .*?\.java" -o dtest_sstablesplit_test-node1.log.txt \| cut -d " " -f 2 \| sort \| uniq > small.txt
	# Store the bigger file's contents into "big.txt".
	grep -E ",[[:digit:]]{3} .*?\.java" -o system.log.2015-03-24_2144 \| cut -d " " -f 2 \| sort \| uniq > big.txt

	ls -alt

	# Compare the 2 sorted file contents, line by line.
	# Output format is 3 columns that are tab separated
	# Line appears in the first file only TAB Appears in second file only TAB Both files.
	# Sadly "comm" is not available in Gow.
	comm small.txt big.txt

	# Replace the tab characters with comma and save it to a CSV file so that it can be opened in Excel.
	comm small.txt big.txt \| tr '\t' ',' > combo.csv

	# A slightly less "exportable" alternative that works on Gow and of course Unix.
	diff -y small.txt big.txt

	# Some other goodies.
	# (When using Gow use "gfind" instead of "find")

	# List all file and directory names recursively in the current directory.
	find .

	# List all file names recursively in the current directory.
	find . -type f

	# List all names recursively in the current directory that match the wildcard "log.
	# There is a big difference between wildcard and regex.
	# For some reason, this does not work on Gow: https://github.com/bmatzelle/gow/issues/177
	find . -name "log"
	# Sample output
	# ./dtest_sstablesplit_test-node1.log.txt
	# ./system.log.1.zip
	# ./system.log.2015-03-24_2144

	# This matches a regular expression (more powerful) where names have "log" in them and do not end with the ".zip" extension.
	# On Windows use: gfind . -regex ".log.[^.zip]"
	find . -regex ".log.[^.zip]"
	# Sample output
	# ./dtest_sstablesplit_test-node1.log.txt
	# ./system.log.2015-03-24_2144

	# This demonstrats another very powerful tool called "xargs" which runs the comman "grep --color Exception" against each
	# line produced by the command behind the pipe (\|).
	# Which means that it runs the grep command on the file names that were filtered by the find command.
	find . -regex ".log.[^.zip]" \| xargs grep --color Exception

	# Similar to the one above but makes grep simply count the matches in each file.
	find . -regex ".log.[^.zip]" \| xargs grep -c Exception
	# Sample output
	# ./dtest_sstablesplit_test-node1.log.txt:4
	# ./system.log.2015-03-24_2144:329

	# An extension of the command above where only specific files found by "find" are are scanned by grep and
	# their matching counts are then sorted and grouped by their counts.
	# This is a great way to summarize the contents of a large number of log files without having to
	# manually dredge through them.
	find . -regex ".log.[^.zip]" \| xargs grep -E "Caused by: .*: " -o \| sort \| uniq -c
	# Sample output
	# 63 ./system.log.2015-03-24_2144:Caused by: java.io.IOException: