César de Pablo zdepablo

## tag-sentiment-textalytics.py
import smaclient

license_key = <textalytics-license-key>;

textalytics = smaclient.SmaClient(license_key)

doc = smaclient.Document('0', 'Italia se indigna por la negativa de #Barilla a hacer anuncios con gays')
doc.language = 'es'
doc.source = 'TWITTER'


## search-twitter.py
from TwitterAPI import TwitterAPI

# Go to http://dev.twitter.com and create an app.
# The consumer key and secret will be generated for you after
consumer_key = <consumer-key>
consumer_secret = <consumer-secret>

# After the step above, you will be redirected to your app's page.
# Create an access token under the the "Your access token" section
access_token_key = <access-token-key>

## git-branches-by-commit-date.sh
# Credit http://stackoverflow.com/a/2514279
for branch in `git branch -r | grep -v HEAD`;do echo -e `git show --format="%ci %cr" $branch | head -n 1` \\t$branch; done | sort -r

## extractranks.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

from lxml import html,etree
import requests
import unicodecsv

def group(iterator, count):
	itr = iter(iterator)
	while True:

## 0_reuse_code.js
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console

## gist:3587a6755b080b85136c
#Number of active users per service - with a cutoff

SELECT  `service`, COUNT(*) num_users
FROM
(
SELECT  `service`, `hash_key`, COUNT(*) num_requests
FROM `log`
WHERE `date_operation` > '2014-12-01'
GROUP BY `service`, `hash_key`
ORDER BY num_requests DESC

## hadoop-fs-receipts
# Reference: http://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-common/FileSystemShell.html

# Show disk usage in human format
  hadoop fs -du -s -h /user/hive/warehouse/da_cdepablo*

# Show permissions
 hadoop fs -getfacl /user/hive/warehouse/da_cdepablo*

# Change permissions
 hadoop fs -setfacl -R -m other::rwx /user/hive/warehouse/da_cdepablo

## hive-receipts
# Overwrite non-partitioned table with their own contents
CREATE table xx_COPY LIKE xx;

INSERT OVERWRITE TABLE xx
SELECT * FROM xx

# Overwrite partitioned table with their own contents
CREATE table xx_COPY LIKE xx;

SHOW PARTITIONS ABC;

## split_strat_scale.r
library(caret)

## select training indices preserving class distribution
in.train <- createDataPartition(yclass, p=0.8, list=FALSE)
summary(factor(yclass))
ytra <- yclass[in.train]; summary(factor(ytra))
ytst <- yclass[-in.train]; summary(factor(ytst))

## standardize features: training parameters of scaling for test-part
Xtra <- scale(X[in.train,])

## gist:daf71447c82391c1b4311ffcceec2ebe

# java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=12605 Main # Name of .class program

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/pr/cloudera/parcels/CDH-5.5.2-1.cdh5.5.2.p0.4/lib/hadoop/lib/native

java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=12611 -cp ta   rget/da_record_linkage-0.0.1-SNAPSHOT-jar-with-dependencies.jar da_record_linkage.TestSnappy

netstat -plten | grep LISTEN | grep :120* # See if there is any open port
	import smaclient

	license_key = <textalytics-license-key>;

	textalytics = smaclient.SmaClient(license_key)

	doc = smaclient.Document('0', 'Italia se indigna por la negativa de #Barilla a hacer anuncios con gays')
	doc.language = 'es'
	doc.source = 'TWITTER'
	from TwitterAPI import TwitterAPI

	# Go to http://dev.twitter.com and create an app.
	# The consumer key and secret will be generated for you after
	consumer_key = <consumer-key>
	consumer_secret = <consumer-secret>

	# After the step above, you will be redirected to your app's page.
	# Create an access token under the the "Your access token" section
	access_token_key = <access-token-key>
	# Credit http://stackoverflow.com/a/2514279
	for branch in `git branch -r \| grep -v HEAD`;do echo -e `git show --format="%ci %cr" $branch \| head -n 1` \\t$branch; done \| sort -r
	#!/usr/bin/python
	# -- coding: utf-8 --

	from lxml import html,etree
	import requests
	import unicodecsv

	def group(iterator, count):
	itr = iter(iterator)
	while True:
	// Use Gists to store code you would like to remember later on
	console.log(window); // log the "window" object to the console
	#Number of active users per service - with a cutoff

	SELECT `service`, COUNT(*) num_users
	FROM
	(
	SELECT `service`, `hash_key`, COUNT(*) num_requests
	FROM `log`
	WHERE `date_operation` > '2014-12-01'
	GROUP BY `service`, `hash_key`
	ORDER BY num_requests DESC
	# Reference: http://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-common/FileSystemShell.html

	# Show disk usage in human format
	hadoop fs -du -s -h /user/hive/warehouse/da_cdepablo*

	# Show permissions
	hadoop fs -getfacl /user/hive/warehouse/da_cdepablo*

	# Change permissions
	hadoop fs -setfacl -R -m other::rwx /user/hive/warehouse/da_cdepablo
	# Overwrite non-partitioned table with their own contents
	CREATE table xx_COPY LIKE xx;

	INSERT OVERWRITE TABLE xx
	SELECT * FROM xx

	# Overwrite partitioned table with their own contents
	CREATE table xx_COPY LIKE xx;

	SHOW PARTITIONS ABC;
	library(caret)

	## select training indices preserving class distribution
	in.train <- createDataPartition(yclass, p=0.8, list=FALSE)
	summary(factor(yclass))
	ytra <- yclass[in.train]; summary(factor(ytra))
	ytst <- yclass[-in.train]; summary(factor(ytst))

	## standardize features: training parameters of scaling for test-part
	Xtra <- scale(X[in.train,])

	# java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=12605 Main # Name of .class program

	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/pr/cloudera/parcels/CDH-5.5.2-1.cdh5.5.2.p0.4/lib/hadoop/lib/native

	java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=12611 -cp ta rget/da_record_linkage-0.0.1-SNAPSHOT-jar-with-dependencies.jar da_record_linkage.TestSnappy

	netstat -plten \| grep LISTEN \| grep :120* # See if there is any open port