Skip to content

Instantly share code, notes, and snippets.

View myui's full-sized avatar

Makoto YUI myui

View GitHub Profile
@myui
myui / lof_hivemall.md
Last active August 29, 2015 14:22
Outlier Detection using Local Outlier Factor on Hivemall

This article introduce how to find outliers using Local Outlier Detection (LOF) on Hivemall.

Data Preparation

create database lof;
use lof;

create external table hundred_balls (
  rowid int, 
@myui
myui / GreaterThanOrEqualsToUDTF.java
Created July 16, 2015 11:17
GreaterThanOrEqualsToUDTF.java
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
* Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
@myui
myui / each_top_k.sql
Created July 16, 2015 11:36
each_top_k.sql
create table similarities
as
SELECT
each_top_k(
10, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
@myui
myui / parallel_similarity_join.sql
Created July 16, 2015 12:05
parallel_similarity_join.sql
create table similarities
as
WITH test_rnd as (
select
rand(31) as rnd,
id,
features
from
test_hivemall
),
@myui
myui / ArrayOverlapsUDF.java
Created July 17, 2015 06:23
ArrayOverlapsUDF.java
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
* Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
@myui
myui / similarity10.sql
Created July 17, 2015 09:30
similarity10.sql
set hivevar:k=11;
create table similarities
as
WITH test_rnd as (
select
rand(31) as rnd,
id,
features
from
@myui
myui / DenseFeatureUDF.java
Created July 21, 2015 09:46
DenseFeatureUDF.java
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
@myui
myui / binarytree.txt
Created July 30, 2015 23:52
expected result of printing a binary tree
_______1_______
___2_______2___
_4___4___4___4_
8_8_8_8_8_8_8_8
@myui
myui / vmtest01.txt
Last active September 1, 2015 09:26
push x[10]; push 10.0; ifeq 205; push x[5]; push 275.5; ifle 68; push x[1]; push 7.0; ifeq 11; push 1; goto last; push x[15]; push 2.0; ifeq 26; push x[14]; push 2.5; ifle 24; push x[0]; push 49.5; ifle 22; push 0; goto last; push 1; goto last; push 1; goto last; push x[11]; push 327.5; ifle 66; push x[11]; push 265.5; ifle 64; push x[1]; push 6.0; ifeq 37; push 0; goto last; push x[11]; push 87.5; ifle 42; push 0; goto last; push x[11]; push 190.0; ifle 62; push x[9]; push 15.0; ifle 60; push x[3]; push 0.0; ifeq 58; push x[13]; push 264.0; ifle 56; push 1; goto last; push 0; goto last; push 0; goto last; push 1; goto last; push 0; goto last; push 1; goto last; push 0; goto last; push x[9]; push 18.5; ifle 128; push x[0]; push 25.0; ifle 76; push 1; goto last; push x[2]; push 2.0; ifeq 96; push x[11]; push 619.0; ifle 94; push x[6]; push 0.0; ifeq 87; push 1; goto last; push x[14]; push 3.5; ifle 92; push 0; goto last; push 1; goto last; push 0; goto last; push x[11]; push 153.0; ifle 101; push 0; goto last;
% 1. Title: Iris Plants Database
%
% 2. Sources:
% (a) Creator: R.A. Fisher
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
% (c) Date: July, 1988
%
% 3. Past Usage:
% - Publications: too many to mention!!! Here are a few.
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"