Skip to content

Instantly share code, notes, and snippets.

#!/bin/bash
set -e
BEELINE=""
OUTPUT_FILE="hive_table_hdfs_stats.csv"
echo "Database,Table,HDFS_Location,Size_Bytes,File_Count" > "$OUTPUT_FILE"
get_hdfs_size() {
# PowerShell Script to Count Pages in .docx Files in the Current Folder
# This script scans all `.docx` files in the current directory, opens each document using Microsoft Word,
# and prints the file name along with the number of pages. It requires Microsoft Word to be installed
# on your system as it uses Word's COM interface to access the page count.
#
# How to Run:
# 1. Save this script as `CountDocxPages.ps1`.
# 2. Open PowerShell in the folder where your .docx files are located.
# 3. Run the script using the command: `.\CountDocxPages.ps1`

Back up the agents


Run these on all the hosts

export CM_BACKUP_DIR="`date +%F`-CM6.3.4"
echo $CM_BACKUP_DIR
mkdir -p $CM_BACKUP_DIR
import java.nio.charset.*;
import org.apache.commons.io.IOUtils;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
net.core.rmem_default(recommend: 4194304)
net.core.netdev_max_backlog(recommend: 250000)
net.ipv4.tcp_low_latency(recommend: 1)
net.core.rmem_max(recommend: 4194304)
net.core.wmem_max(recommend: 4194304)
net.core.optmem_max(recommend: 4194304)
net.core.somaxconn(recommend: 8192)
net.core.wmem_default(recommend: 4194304)
net.ipv4.tcp_wmem(recommend: 4096 65536 4194304)
net.ipv4.tcp_max_syn_backlog(recommend: 8192).
@edgl
edgl / output.txt
Created June 6, 2017 15:21
Sample HDP Configuration Util Ouput
# Using cores=12 memory=256GB disks=12 hbase=True
# Profile: cores=12 memory=196608MB reserved=64GB usableMem=192GB disks=12
# Num Container=22
# Container Ram=8192MB
# Used Ram=176GB
# Unused Ram=64GB
***** mapred-site.xml *****
mapreduce.map.memory.mb=8192
mapreduce.map.java.opts=-Xmx6144m
mapreduce.reduce.memory.mb=8192
@edgl
edgl / parsekagglefields.scala
Last active December 17, 2016 20:02
Parse Fields Of Kaggle Fields Column Header
/*
In Kaggle, copy the top rows of the sample dataset table. When you paste
it in a text editor it will produce this:
<field_name>
<field_type>
...
To Run:
> scala parsekagglefields.scala <filename>
*/
@edgl
edgl / gist:2f1aa2fdaa6a77297554433551313c62
Created November 26, 2016 01:44
HDP 2.5 Vagrant File
$script = <<SCRIPT
# limits
ulimit -n 10000
# disable firewall
chkconfig firewalld off
service iptables stop
rm -f /etc/yum.repos.d/HDP*
yum clean all
yum repolist
yum -y install wget
@edgl
edgl / gist:c4b698f94e27b5634549
Last active August 29, 2015 14:24
Installing Cloudera 5.4.3
@edgl
edgl / 0_reuse_code.js
Created June 10, 2014 14:01
Here are some things you can do with Gists in GistBox.
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console