Albert alpoza

## jinspect.groovy
listFilter = []
jarfile = ""
fileList = []

cli = new CliBuilder(usage:"jinspect [-hHvxXoawmISMWjJrgGcCpPlLZZZ] jarfile [filename ...]")
cli.with {
  h(longOpt:'help', 'show help')
  v(longOpt:'verbose', 'be more verbose')
  X(longOpt:'save', 'save specified files (including path) to the current directory instead of printing them')
  l(longOpt:'war', 'list war contents; specify again to include more information')

## printpdf.groovy
import org.codehaus.groovy.scriptom.*
import org.codehaus.groovy.scriptom.tlb.office.*
import org.codehaus.groovy.scriptom.tlb.office.excel.*
import org.codehaus.groovy.scriptom.tlb.office.word.WdSaveOptions

def waitTime = 10000
Scriptom.inApartment {
  def dir = new File("c:/temp")
  def xlApp = new ActiveXObject('Excel.Application')
  def wdApp = new ActiveXObject('Word.Application')

## sshTunnel.groovy
import com.jcraft.jsch.JSch
import com.jcraft.jsch.Session
import com.jcraft.jsch.UserInfo
import com.jcraft.jsch.Channel
import com.jcraft.jsch.ChannelExec

def sshHost = '10.1.2.132'
def sshUser = 'root'
def sshPass = '******'
def sshPort = 22

## aggregateByKey.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                alpoza
                / aggregateByKey.md
            
            
              Last active
              August 29, 2015 14:23
                — forked from tdhopper/aggregateByKey.md
            
          
    The pyspark documentation doesn't include an example for the aggregateByKey RDD method. I didn't find any nice examples online, so I wrote my own.
Here's what the documetation does say:
aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None)

Aggregate the values of each key, using given combine functions and a neutral "zero value". This function can return a different result type, U, than the type of the values in this RDD, V. Thus, we need one operation for merging a V into a U and one operation for merging two U's, The former operation is used for merging values within a partition, and the latter is used for merging values between partitions. To avoid memory allocation, both of these functions are allowed to modify and return their first argument instead of creating a new U.

reduceByKey and aggregateByKey are much more efficient than groupByKey and should be used for aggregations as much as possible.

  
## one-hot.py
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer

def encode_onehot(df, cols):
    """
    One-hot encoding is applied to columns specified in a pandas DataFrame.

    Modified from: https://gist.github.com/kljensen/5452382


## gist:26f4177d85d3d134c350b9752bcf772a
import groovy.json.JsonOutput

/**
 * A simple CSV file to Json converter
 *
 * The CSV file is expected to have a header row to identify the columns. These
 * columns will be used to generate the corresponding Json field.
 *
 * @author Marco Pas
 */

## CsvSlurper.groovy
package groovy.csv

/**
 * CSV slurper which parses text or reader content into a data strucuture of lists and maps.
 * <p>
 * Example usage:
 * <code><pre>
 * def slurper = new CsvSlurper()
 * def result = slurper.parseText('''
 *     name,   age

## import_csv_to_mongo
#!/usr/bin/env python
import sys
import pandas as pd
import pymongo
import json


def import_content(filepath):
    mng_client = pymongo.MongoClient('localhost', 27017)

## jenkins_copying_configuration.sh
ORIGINAL_JENKINS_SERVER=
ORIGINAL_SERVER_USER=

NEW_JENKINS_SERVER=
NEW_SERVER_USER=

# ON THE ORIGINAL JENKINS SERVER
ssh $ORIGINAL_SERVER_USER@$ORIGINAL_JENKINS_SERVER
cd /var/lib/jenkins/
for i in `ls jobs`; do echo "jobs/$i/config.xml";done > config.totar

## Query LDAP from R
library(RCurl)
val <- getURL('ldap://ldap.domain.net/DC=domain,DC=net?sAMAccountName?sub?(employeeID=0123456)',
 .opts=list(userpwd = "DOMAIN\\domainid:password"))
	listFilter = []
	jarfile = ""
	fileList = []

	cli = new CliBuilder(usage:"jinspect [-hHvxXoawmISMWjJrgGcCpPlLZZZ] jarfile [filename ...]")
	cli.with {
	h(longOpt:'help', 'show help')
	v(longOpt:'verbose', 'be more verbose')
	X(longOpt:'save', 'save specified files (including path) to the current directory instead of printing them')
	l(longOpt:'war', 'list war contents; specify again to include more information')
	import org.codehaus.groovy.scriptom.*
	import org.codehaus.groovy.scriptom.tlb.office.*
	import org.codehaus.groovy.scriptom.tlb.office.excel.*
	import org.codehaus.groovy.scriptom.tlb.office.word.WdSaveOptions

	def waitTime = 10000
	Scriptom.inApartment {
	def dir = new File("c:/temp")
	def xlApp = new ActiveXObject('Excel.Application')
	def wdApp = new ActiveXObject('Word.Application')
	import com.jcraft.jsch.JSch
	import com.jcraft.jsch.Session
	import com.jcraft.jsch.UserInfo
	import com.jcraft.jsch.Channel
	import com.jcraft.jsch.ChannelExec

	def sshHost = '10.1.2.132'
	def sshUser = 'root'
	def sshPass = '******'
	def sshPort = 22
	import pandas as pd
	import numpy as np
	from sklearn.feature_extraction import DictVectorizer

	def encode_onehot(df, cols):
	"""
	One-hot encoding is applied to columns specified in a pandas DataFrame.

	Modified from: https://gist.github.com/kljensen/5452382
	import groovy.json.JsonOutput

	/**
	* A simple CSV file to Json converter
	*
	* The CSV file is expected to have a header row to identify the columns. These
	* columns will be used to generate the corresponding Json field.
	*
	* @author Marco Pas
	*/
	package groovy.csv

	/**
	* CSV slurper which parses text or reader content into a data strucuture of lists and maps.
	* <p>
	* Example usage:
	* <code><pre>
	* def slurper = new CsvSlurper()
	* def result = slurper.parseText('''
	* name, age
	#!/usr/bin/env python
	import sys
	import pandas as pd
	import pymongo
	import json



	def import_content(filepath):
	mng_client = pymongo.MongoClient('localhost', 27017)
	ORIGINAL_JENKINS_SERVER=
	ORIGINAL_SERVER_USER=

	NEW_JENKINS_SERVER=
	NEW_SERVER_USER=

	# ON THE ORIGINAL JENKINS SERVER
	ssh $ORIGINAL_SERVER_USER@$ORIGINAL_JENKINS_SERVER
	cd /var/lib/jenkins/
	for i in `ls jobs`; do echo "jobs/$i/config.xml";done > config.totar
	library(RCurl)
	val <- getURL('ldap://ldap.domain.net/DC=domain,DC=net?sAMAccountName?sub?(employeeID=0123456)',
	.opts=list(userpwd = "DOMAIN\\domainid:password"))