# for auc()
> library(pROC)
# for performance plots
> library(ROCR)
Loading required package: gplots
KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009
Attaching package: ‘gplots’
# for auc()
> library(pROC)
# for performance plots
> library(ROCR)
Loading required package: gplots
KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009
Attaching package: ‘gplots’
public static class BigDecimalWritable implements Writable { | |
private BigDecimal value; | |
public BigDecimalWritable(BigDecimal value) { | |
this.value = value; | |
} | |
public BigDecimal value() { | |
return value; | |
} |
@Test | |
public void testStats() { | |
// the reference limits here were derived using a numerical simulation where I took | |
// 10,000 samples from the distribution in question and computed the stats from that | |
// sample to get min, 25%-ile, median and so on. I did this 1000 times to get 5% and | |
// 95% confidence limits for those values. | |
// symmetrical, well behaved | |
System.out.printf("normal\n"); | |
check(normal(10000)); |
Set<String> common = Sets.newHashSet(firstListOfEmails); | |
common.retainAll(secondListOfEmails); |
public class HbaseLookup { | |
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TrigoMathFunctions.class); | |
private HbaseLookup(){} | |
@FunctionTemplate(name = "hLookup", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL) | |
public static class Lookup implements DrillSimpleFunc { | |
@Param VarCharHolder table; // the table to read from |
# picking the corners of the hyper cube at random usually gives us a good selection | |
d = 0 | |
while (d == 0) { | |
centers = matrix(runif(10*10)>0.5, ncol=10) + 0 | |
# but occasionally we get a duplicate row that is easily detected | |
d = det(centers) | |
} | |
# start x out by selecting clusters | |
x = data.frame(n = ceiling(runif(10000,1e-10,10))) |
# Experiments with t-digest in R | |
standard.size.bound = function(n, q) { | |
4 * n * q * (1-q) | |
} | |
constant.size.bound = function(n, q) { | |
n | |
} |
Log in to the cluster: | |
ted:downloads$ ssh se-node10.se.lab | |
Last login: Mon Mar 23 17:35:37 2015 from 10.250.0.220 | |
Please check the cluster reservation calendar: | |
https://www.google.com/calendar/embed?src=maprtech.com_2d38343133383836382d313737%40resource.calendar.google.com | |
Poke around looking for my volume and such: | |
[tdunning@se-node10 ~]$ ls /mapr/se1/user/t |
import fileinput | |
from string import join | |
import json | |
import csv | |
import json | |
### read the output from MAHOUT and collect into hash ### | |
with open('x','rb') as csv_file: | |
csv_reader = csv.reader(csv_file,delimiter='\t') | |
old_id = "" | |
indicators = [] |
{"t":[100.44380099679421,200.7658959087325,301.6982003576183],"v1":[-3.2124876152877886,-17.9729521628487,-11.10212822944568],"v2":[20.668968387311498,39.70574384652023,33.97732641377096]} | |
{"t":[402.02369599592936,500.361291107067,601.0570362962695],"v1":[-10.357254868666516,-15.358599092346992,-18.17981637433697],"v2":[28.14680001787952,24.449514473332897,36.90232317832954]} | |
{"t":[700.3591871348365,801.7683561318721,901.3906331330202],"v1":[-4.184888093902327,2.266724195547855,1.8027188779133356],"v2":[43.63398291814092,37.96260382309654,34.558440299721525]} | |
{"t":[998.8508331065062,1097.1401685144158,1198.1819481032155],"v1":[-1.3711844607134631,5.5027134050661735,5.111544086242255],"v2":[34.518652181145356,39.89433181166691,44.621036340105604]} | |
{"t":[1298.1931737425077,1398.9874465151283,1498.3317303744573],"v1":[5.597833929076392,23.21742527898042,20.160346681365283],"v2":[46.6957213571881,36.773578638699526,26.80096644321689]} | |
{"t":[1598.4807447946152,1698.0002145693118,1797.9744831102964],"v1":[30.550780 |