Ed Gleeck edgl

## gist:30558cdb228aac60ad27e970014f9bd8
#!/bin/bash

set -e

BEELINE=""
OUTPUT_FILE="hive_table_hdfs_stats.csv"

echo "Database,Table,HDFS_Location,Size_Bytes,File_Count" > "$OUTPUT_FILE"

get_hdfs_size() {

## CountDocPages
# PowerShell Script to Count Pages in .docx Files in the Current Folder
# This script scans all `.docx` files in the current directory, opens each document using Microsoft Word,
# and prints the file name along with the number of pages. It requires Microsoft Word to be installed
# on your system as it uses Word's COM interface to access the page count.
#
# How to Run:
# 1. Save this script as `CountDocxPages.ps1`.
# 2. Open PowerShell in the folder where your .docx files are located.
# 3. Run the script using the command: `.\CountDocxPages.ps1`

## upgrade_notes.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                edgl
                / upgrade_notes.md
            
            
              Created
              January 30, 2023 02:24
            
          
    Back up the agents

Run these on all the hosts
export CM_BACKUP_DIR="`date +%F`-CM6.3.4"
echo $CM_BACKUP_DIR
mkdir -p $CM_BACKUP_DIR


## pdf_converter.groovy
import java.nio.charset.*;
import org.apache.commons.io.IOUtils;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;

## quick_changes
net.core.rmem_default(recommend: 4194304)
net.core.netdev_max_backlog(recommend: 250000)
net.ipv4.tcp_low_latency(recommend: 1)
net.core.rmem_max(recommend: 4194304)
net.core.wmem_max(recommend: 4194304)
net.core.optmem_max(recommend: 4194304)
net.core.somaxconn(recommend: 8192)
net.core.wmem_default(recommend: 4194304)
net.ipv4.tcp_wmem(recommend: 4096 65536 4194304)
net.ipv4.tcp_max_syn_backlog(recommend: 8192).

## output.txt
 # Using cores=12 memory=256GB disks=12 hbase=True
 # Profile: cores=12 memory=196608MB reserved=64GB usableMem=192GB disks=12
 # Num Container=22
 # Container Ram=8192MB
 # Used Ram=176GB
 # Unused Ram=64GB
 ***** mapred-site.xml *****
 mapreduce.map.memory.mb=8192
 mapreduce.map.java.opts=-Xmx6144m
 mapreduce.reduce.memory.mb=8192

## parsekagglefields.scala
/*
In Kaggle, copy the top rows of the sample dataset table. When you paste
it in a text editor it will produce this:
<field_name>
<field_type>
...

To Run:
> scala parsekagglefields.scala <filename>
*/

## gist:2f1aa2fdaa6a77297554433551313c62
$script = <<SCRIPT
# limits
ulimit -n 10000
# disable firewall
chkconfig firewalld off
service iptables stop
rm -f /etc/yum.repos.d/HDP*
yum clean all
yum repolist
yum -y install wget

## gist:c4b698f94e27b5634549

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                edgl
                / gist:c4b698f94e27b5634549
            
            
              Last active
              August 29, 2015 14:24
            
              
                Installing Cloudera 5.4.3
              
          
    Installing Cloudera on Virtual Box

Setup guest additions
Add rpmforge

curl -OL http://pkgs.repoforge.org/rpmforge-release/rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm
rpm -i rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm
rpm --import http://apt.sw.be/RPM-GPG-KEY.dag.txt

Setup Guest Additions

  
## 0_reuse_code.js
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console
	#!/bin/bash

	set -e

	BEELINE=""
	OUTPUT_FILE="hive_table_hdfs_stats.csv"

	echo "Database,Table,HDFS_Location,Size_Bytes,File_Count" > "$OUTPUT_FILE"

	get_hdfs_size() {
	# PowerShell Script to Count Pages in .docx Files in the Current Folder
	# This script scans all `.docx` files in the current directory, opens each document using Microsoft Word,
	# and prints the file name along with the number of pages. It requires Microsoft Word to be installed
	# on your system as it uses Word's COM interface to access the page count.
	#
	# How to Run:
	# 1. Save this script as `CountDocxPages.ps1`.
	# 2. Open PowerShell in the folder where your .docx files are located.
	# 3. Run the script using the command: `.\CountDocxPages.ps1`
	import java.nio.charset.*;
	import org.apache.commons.io.IOUtils;
	import java.awt.image.BufferedImage;
	import java.awt.image.RenderedImage;
	import java.io.File;
	import java.io.FileOutputStream;
	import java.util.Iterator;
	import java.util.List;
	import java.util.ArrayList;
	import java.util.Map;
	net.core.rmem_default(recommend: 4194304)
	net.core.netdev_max_backlog(recommend: 250000)
	net.ipv4.tcp_low_latency(recommend: 1)
	net.core.rmem_max(recommend: 4194304)
	net.core.wmem_max(recommend: 4194304)
	net.core.optmem_max(recommend: 4194304)
	net.core.somaxconn(recommend: 8192)
	net.core.wmem_default(recommend: 4194304)
	net.ipv4.tcp_wmem(recommend: 4096 65536 4194304)
	net.ipv4.tcp_max_syn_backlog(recommend: 8192).
	# Using cores=12 memory=256GB disks=12 hbase=True
	# Profile: cores=12 memory=196608MB reserved=64GB usableMem=192GB disks=12
	# Num Container=22
	# Container Ram=8192MB
	# Used Ram=176GB
	# Unused Ram=64GB
	*** mapred-site.xml ***
	mapreduce.map.memory.mb=8192
	mapreduce.map.java.opts=-Xmx6144m
	mapreduce.reduce.memory.mb=8192
	/*
	In Kaggle, copy the top rows of the sample dataset table. When you paste
	it in a text editor it will produce this:
	<field_name>
	<field_type>
	...

	To Run:
	> scala parsekagglefields.scala <filename>
	*/
	$script = <<SCRIPT
	# limits
	ulimit -n 10000
	# disable firewall
	chkconfig firewalld off
	service iptables stop
	rm -f /etc/yum.repos.d/HDP*
	yum clean all
	yum repolist
	yum -y install wget
	// Use Gists to store code you would like to remember later on
	console.log(window); // log the "window" object to the console