Skip to content

Instantly share code, notes, and snippets.

View kuenishi's full-sized avatar
🎴

UENISHI Kota kuenishi

🎴
View GitHub Profile
#!/bin/sh
echo "GET\n"
./s3curl.pl --id admin -- -s -v -x localhost:8080 http://test2.s3.amazonaws.com/?acl
echo "PUT\n"
./s3curl.pl --id admin -- -s -v -X PUT -x localhost:8080 http://test2.s3.amazonaws.com/?acl \
-H "content-type: application/xml" \
-d '<?xml version="1.0" encoding="UTF-8"?><AccessControlPolicy><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
@kuenishi
kuenishi / fmt.erl
Created February 5, 2014 15:30
totally broken around type/spec.
-module(fmt).
-compile(export_all).
main([File]) ->
{ok, Bin} = file:read_file(File),
{ok, Scanned, _Count} = erl_scan:string(binary_to_list(Bin)),
{ok, Parsed} = erl_parse:parse_form(Scanned),
io:fwrite("~s", [erl_pp:form(Parsed)]).
#!/usr/bin/env escript
-include_lib("riakc/include/riakc.hrl").
main([]) ->
{ok,Pid} = riakc_pb_socket:start_link(localhost, 8087, []),
StartKey = <<>>,
%%Now = list_to_binary(integer_to_list(riak_cs_gc:timestamp())),
EndKey = <<"9">>,
{ok, R} = riakc_pb_socket:get_index_range(Pid, <<"riak-cs-gc">>, <<"$key">>,
-module(d).
-export([sum_user/0]).
-spec sum_user() -> {ok, [{binary(), integer()}]}
| {error, term()}.
sum_user() ->
BucketUsages = [maybe_sum_bucket()],
{ok, BucketUsages}.
-- columnar style +
create table if not exists col_fluentlog
(dt string, tag string, host string, user string, method string, path string, code int, size int, referer string, agent string, time string, tag2 string)
row format serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
stored as inputformat 'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat';
create table if not exists ocol_fluentlog
(dt string, tag string, host string, user string, method string, path string, code int, size int, referer string, agent string, time string, tag2 string)
stored as orc;
#!/bin/sh
MAVEN_OPTS="-server -Xmx8G -XX:+UseConcMarkSweepGC -XX:+ExplicitGCInvokesConcurrent -XX:+CMSClassUnloadingEnabled -XX:+AggressiveOpts -XX:+HeapDumpOnOutOfMemoryError -XX:PermSize=150M -XX:MaxPermSize=150M -XX:ReservedCodeCacheSize=150M -Xbootclasspath/p:lib/floatingdecimal-0.1.jar -Djava.net.preferIPv4Stack=true -Dnode.environment=production -Dlog.levels-file=/home/kuenishi/hadoop/presto-coordinator/etc/log.properties -Dconfig=/home/kuenishi/hadoop/presto-coordinator/etc/config.properties -Dnode.data-dir=var/data -Dnode.id=ffffffff-ffff-ffff-ffff-fffffffffff"
MAVEN_OPTS=$MAVEN_OPTS mvn exec:java -Dexec.mainClass="com.facebook.presto.server.PrestoServer"
Date/Time: 2014-03-12 20:19:30 +0900
OS Version: 10.9.2 (Build 13C64)
Architecture: x86_64
Report Version: 18
Event: Sleep Wake Failure
Steps: 327
Hardware model: MacBookPro11,1
Active cpus: 4
2014-03-18 14:38:31.692 [info] <0.7.0> Application lager started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.710 [info] <0.7.0> Application sasl started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.710 [info] <0.7.0> Application asn1 started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.713 [info] <0.7.0> Application crypto started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.714 [info] <0.7.0> Application public_key started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.748 [info] <0.7.0> Application ssl started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.769 [info] <0.7.0> Application riak_sysmon started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.797 [info] <0.7.0> Application os_mon started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.811 [info] <0.7.0> Application runtime_tools started on node 'dev1@127.0.0.1'
2014-03-18 14:38:31.853 [info] <0.7.0> Application erlang_js started on node 'dev1@127.0.0.1'

Presto connector development 1

One of the very good design decisions Presto designers made is that it's loosely coupled from storages.

Presto is a distributed SQL executor engine, and doesn't manager schema or metadata of tables by itself. It doesn't manage read data from storage by itself. Those businesses are done by plugins called Connector. Presto comes with Hive connector built-in, which connects Hive's metastore and HDFS to Presto.

We can connect any storages into Presto by writing connector plugins.

Plugin Architecture

#!/bin/sh
for j in `seq 1 1000`; do
for i in `seq 1 800`; do
s3cmd put 100KB s3://test/$j/100KB-$i;
# s3cmd get s3://test/admin$i.txt -;
done;
#s3cmd ls s3://test/;
#for i in `seq 1 3000000`; do