原題:Dynamo: Amazon’s Highly Available Key-value Store
原文: Amazon's Dynamo - All Things Distributed (PDF Version)
This article is translated by @ono_matope. Please contact me if any problem.
use strict; | |
use warnings; | |
my @offset = (20, 20, -20, -20); | |
while(<*.pdf>) | |
{ | |
next if /-cropped/; | |
原題:Dynamo: Amazon’s Highly Available Key-value Store
原文: Amazon's Dynamo - All Things Distributed (PDF Version)
This article is translated by @ono_matope. Please contact me if any problem.
require 'td' | |
require 'td-client' | |
require 'time' | |
require 'date' | |
# auth | |
auth_key = File.read("client.cfg", :encoding => Encoding::UTF_8) | |
cln = TreasureData::Client.new(auth_key) | |
# argv | |
date = ARGV[0] |
-- Presto | |
SELECT | |
a.td_client_id as td_client_id | |
,a.td_os as td_os | |
,'Weekly_Over5_Accesses' as segment_name | |
FROM | |
( | |
SELECT | |
td_client_id |
--hive | |
SELECT | |
TD_SESSIONIZE(time, 86400, td_ip) as session_id | |
, time | |
, td_ip | |
, td_path | |
, td_client_id | |
, td_title | |
, td_browser | |
, td_color |
SELECT | |
TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') as date | |
,count(distinct session_id) as session_cnt | |
FROM | |
session_summary | |
GROUP BY | |
TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') | |
ORDER BY | |
TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') |
ito@ito:~/embulk$ embulk preview config/s3_fluent_load.yml | |
2015-12-02 00:28:28.930 -0800: Embulk v0.7.1 | |
2015-12-02 00:28:30.186 -0800 [INFO] (preview): Loaded plugin embulk-input-s3 (0.2.3) | |
2015-12-02 00:28:30.229 -0800 [INFO] (preview): Loaded plugin embulk-parser-fluent-s3-log (0.0.1) | |
java.lang.IllegalArgumentException: Multiple entries with same key: material_id=org.embulk.spi.util.dynamic.StringColumnSetter@5a82bc58 and material_id=org.embulk.spi.util.dynamic.StringColumnSetter@4aab7195 | |
at com.google.common.collect.ImmutableMap.checkNoConflict(com/google/common/collect/com/google/common/collect/com/google/common/collect/ImmutableMap.java:150) | |
at com.google.common.collect.RegularImmutableMap.checkNoConflictInBucket(com/google/common/collect/com/google/common/collect/com/google/common/collect/RegularImmutableMap.java:104) | |
at com.google.common.collect.RegularImmutableMap.<init>(com/google/common/collect/com/google/common/collect/com/google/common/collect/RegularImmutableMap.java:70) |
in: | |
type: s3 | |
bucket: td-test-data | |
path_prefix: test/example.log | |
access_key_id: TTTTTTTTTTTTTTTTTT | |
secret_access_key: XXXXXXXXXXXXXXXXXXXXXXXXXXXX | |
parser: | |
type: fluent-s3-log | |
columns: | |
- {name: uid, type: string} |
in: | |
type: postgresql | |
host: 00.00.00.00 | |
user: tank_user | |
password: "XXXXXXXXXXXXXXXX" | |
database: datatank | |
query: | | |
SELECT os,device,flag,count | |
FROM device_master | |
out: |
in: | |
type: s3 | |
access_key_id: XXXXXXXXXX | |
secret_access_key: YYYYYYYYYY | |
bucket: sample_bucket | |
path_prefix: path/to/sample_file | |
parser: | |
charset: UTF-8 | |
newline: CRLF | |
type: csv |