Kirill Pavlov pavlov99

## test-javascript.sh
# This command line executes all test files with NodeJS.
# Test files are located in ./src folder and have *.spec.js
# In order to use ES6 import/export syntax, reify is required: https://github.com/benjamn/reify
# `find` command generates a "test file" which require() every actual test.
# If the output is TAP-compatible, one could pipe it to ./node_modules/.bin/tap-mocha-reporter spec
node --require reify -e "$(find ./src -name '*\.spec\.js' -type f -exec echo "require('{}');" \; | paste -s -d' ' -)"

## update-photo-metadata.sh
#!/usr/bin/env bash
# Update photo information: time and location.
# After photo editing by agency, all of the meta tags were changed. This script fixes it.
# Photos are ordered by filename, e.g. 001.jpg, 002.jpg, etc. The whole event happened
# between 16:30 and 21:00 Moscow time on 30th June. Set time to every picture as if they were
# taken uniformly during the event.
#
# See also:
#   exiftool https://www.sno.phy.queensu.ca/~phil/exiftool/ (allows batch editing as well).
#   https://www.latlong.net/ to find your location and get latitude and longitude info.

## combine-pdfs.sh
docker run -v "$(pwd)":/work mnuessler/pdftk *.pdf cat output combined.pdf

## archives-progress-bar.sh
# Install linux pipe viewer and optional dialog
sudo apt-get install dialog

# Archive:
tar cf - /folder-with-big-files -P | pv -s $(du -sb /folder-with-big-files | awk '{print $1}') | gzip > big-files.tar.gz
# OSX:
tar cf - /folder-with-big-files -P | pv -s $(($(du -sk /folder-with-big-files | awk '{print $1}') * 1024)) | gzip > big-files.tar.gz

# Unarchive:
pv file.tgz | tar xzf - -C target_directory

## bash-random-lines-test.sh
#!/bin/bash
FILENAME="/tmp/random-lines.$$.tmp"
NUMLINES=10000000
seq -f 'line %.0f' $NUMLINES > $FILENAME;

echo "10 random lines with nl:"
$(which time) -v nl -ba $filename | sort -r | sed 's/.*[0-9]\t//' | head > /dev/null

echo "10 random lines with shuf:"
$(which time) -v shuf $FILENAME -n10 | head > /dev/null

## configure-mac-os.sh
./configure \
  --enable-perlinterp \
  --enable-pythoninterp \
  --enable-rubyinterp \
  --enable-luainterp \
  --enable-fail-if-missing \
  --enable-cscope \
  --enable-gui=auto \
  --enable-gtk2-check \
  --enable-gnome-check \

## GPG-keys.sh
# Add gpg key: https://help.ubuntu.com/community/GnuPrivacyGuardHowto
# Install GnuPG from https://www.gnupg.org/download/index.html
gpg2 --full-generate-key

# Delete gpg key
gpg --delete-secret-key "User Name"
gpg --delete-key "User Name"

## group-additional-count.scala
// This method uses Window function to eliminate double counting of objects, which belong to multiple groups.
// `groups` is a DataSet with two columns: id and group. The first column identifies the object, the second is a group name.
// One of the usage of this method is customer segmentation.
val disjointGroups = groups
  .withColumn("_rank", dense_rank().over(org.apache.spark.sql.expressions.Window.partitionBy("id").orderBy("group")))
  .filter($"_rank" === 1).drop("_rank")

// Show disjoint groups with additional count.
disjointGroups
  .groupBy("group")

## group-overlap-detection.scala
// Data example:
// id    group
// 1     A
// 2     A
// 2     B
// In this case object `2` belongs to both groups "A" and "B"
val overlappedGroups = groups.select($"id", $"group" as "_group")

groups
    .join(overlappedGroups, (groups("id") === overlappedGroups("id")) && ($"group" < $"_group"))  // NOTE: group A < group B, so duplicates (A,B) (B,A) would be removed.

## spark-df-column-to-scala-list.scala
df.select("columnName").collect().map(_.getString(0)).sorted
	# This command line executes all test files with NodeJS.
	# Test files are located in ./src folder and have *.spec.js
	# In order to use ES6 import/export syntax, reify is required: https://github.com/benjamn/reify
	# `find` command generates a "test file" which require() every actual test.
	# If the output is TAP-compatible, one could pipe it to ./node_modules/.bin/tap-mocha-reporter spec
	node --require reify -e "$(find ./src -name '*\.spec\.js' -type f -exec echo "require('{}');" \; \| paste -s -d' ' -)"
	#!/usr/bin/env bash
	# Update photo information: time and location.
	# After photo editing by agency, all of the meta tags were changed. This script fixes it.
	# Photos are ordered by filename, e.g. 001.jpg, 002.jpg, etc. The whole event happened
	# between 16:30 and 21:00 Moscow time on 30th June. Set time to every picture as if they were
	# taken uniformly during the event.
	#
	# See also:
	# exiftool https://www.sno.phy.queensu.ca/~phil/exiftool/ (allows batch editing as well).
	# https://www.latlong.net/ to find your location and get latitude and longitude info.
	# Install linux pipe viewer and optional dialog
	sudo apt-get install dialog

	# Archive:
	tar cf - /folder-with-big-files -P \| pv -s $(du -sb /folder-with-big-files \| awk '{print $1}') \| gzip > big-files.tar.gz
	# OSX:
	tar cf - /folder-with-big-files -P \| pv -s $(($(du -sk /folder-with-big-files \| awk '{print $1}') * 1024)) \| gzip > big-files.tar.gz

	# Unarchive:
	pv file.tgz \| tar xzf - -C target_directory
	#!/bin/bash
	FILENAME="/tmp/random-lines.$$.tmp"
	NUMLINES=10000000
	seq -f 'line %.0f' $NUMLINES > $FILENAME;

	echo "10 random lines with nl:"
	$(which time) -v nl -ba $filename \| sort -r \| sed 's/.*[0-9]\t//' \| head > /dev/null

	echo "10 random lines with shuf:"
	$(which time) -v shuf $FILENAME -n10 \| head > /dev/null
	./configure \
	--enable-perlinterp \
	--enable-pythoninterp \
	--enable-rubyinterp \
	--enable-luainterp \
	--enable-fail-if-missing \
	--enable-cscope \
	--enable-gui=auto \
	--enable-gtk2-check \
	--enable-gnome-check \
	# Add gpg key: https://help.ubuntu.com/community/GnuPrivacyGuardHowto
	# Install GnuPG from https://www.gnupg.org/download/index.html
	gpg2 --full-generate-key

	# Delete gpg key
	gpg --delete-secret-key "User Name"
	gpg --delete-key "User Name"
	// This method uses Window function to eliminate double counting of objects, which belong to multiple groups.
	// `groups` is a DataSet with two columns: id and group. The first column identifies the object, the second is a group name.
	// One of the usage of this method is customer segmentation.
	val disjointGroups = groups
	.withColumn("_rank", dense_rank().over(org.apache.spark.sql.expressions.Window.partitionBy("id").orderBy("group")))
	.filter($"_rank" === 1).drop("_rank")

	// Show disjoint groups with additional count.
	disjointGroups
	.groupBy("group")
	// Data example:
	// id group
	// 1 A
	// 2 A
	// 2 B
	// In this case object `2` belongs to both groups "A" and "B"
	val overlappedGroups = groups.select($"id", $"group" as "_group")

	groups
	.join(overlappedGroups, (groups("id") === overlappedGroups("id")) && ($"group" < $"_group")) // NOTE: group A < group B, so duplicates (A,B) (B,A) would be removed.