Skip to content

Instantly share code, notes, and snippets.

@aknirmal90
Last active January 15, 2019 11:54
Show Gist options
  • Save aknirmal90/32456ceb30d06ff27561e0fc658577e6 to your computer and use it in GitHub Desktop.
Save aknirmal90/32456ceb30d06ff27561e0fc658577e6 to your computer and use it in GitHub Desktop.
WITH miner_addresses AS (
SELECT
ARRAY_TO_STRING(outputs.addresses,',') AS miner
FROM
`crypto-etl-ethereum-dev.bitcoin_blockchain.blocks` AS blocks,
`crypto-etl-ethereum-dev.bitcoin_blockchain.transactions` AS transactions JOIN UNNEST(outputs) AS outputs
WHERE blocks.hash = transactions.block_hash
AND is_coinbase IS TRUE
AND ( FALSE
--
-- miner signatures from https://en.bitcoin.it/wiki/Comparison_of_mining_pools
--
OR coinbase_param LIKE '%4d696e656420627920416e74506f6f6c%' --AntPool
OR coinbase_param LIKE '%2f42434d6f6e737465722f%' --BCMonster
--BitcoinAffiliateNetwork
OR coinbase_param LIKE '%4269744d696e746572%' --BitMinter
--BTC.com
--BTCC Pool
--BTCDig
OR coinbase_param LIKE '%2f7374726174756d2f%' --Btcmp
--btcZPool.com
--BW Mining
OR coinbase_param LIKE '%456c6967697573%' --Eligius
--F2Pool
--GHash.IO
--Give Me COINS
--Golden Nonce Pool
OR coinbase_param LIKE '%2f627261766f2d6d696e696e672f%' --Bravo Mining
OR coinbase_param LIKE '%4b616e6f%' --KanoPool
--kmdPool.org
OR coinbase_param LIKE '%2f6d6d706f6f6c%' --Merge Mining Pool
--MergeMining
--Multipool
--P2Pool
OR coinbase_param LIKE '%2f736c7573682f%' --Slush Pool
--ZenPool.org
)
GROUP BY miner
HAVING COUNT(1) >= 20
),
non_miner_addresses AS (
SELECT
ARRAY_TO_STRING(outputs.addresses,',') AS non_miner
FROM `crypto-etl-ethereum-dev.bitcoin_blockchain.outputs` AS outputs
WHERE ARRAY_TO_STRING(outputs.addresses,',') NOT IN (SELECT miner FROM miner_addresses)
GROUP BY non_miner
LIMIT 1000000
),
output_stats AS (
SELECT
ARRAY_TO_STRING(outputs.addresses,',') AS output_address
, block_timestamp AS output_block_time
, DATE(EXTRACT(YEAR FROM block_timestamp), EXTRACT(MONTH FROM block_timestamp), 1) as output_block_month
, LAG(block_timestamp) OVER (PARTITION BY ARRAY_TO_STRING(outputs.addresses,',') ORDER BY block_timestamp) AS output_prev_block_time
, value
, transaction_hash
FROM `crypto-etl-ethereum-dev.bitcoin_blockchain.outputs` AS outputs
WHERE ARRAY_TO_STRING(outputs.addresses,',') IN (SELECT non_miner FROM non_miner_addresses)
),
input_stats AS (
SELECT
ARRAY_TO_STRING(inputs.addresses,',') AS input_address
, block_timestamp AS input_block_time
, DATE(EXTRACT(YEAR FROM block_timestamp), EXTRACT(MONTH FROM block_timestamp), 1) as input_block_month
, LAG(block_timestamp) OVER (PARTITION BY ARRAY_TO_STRING(inputs.addresses,',') ORDER BY block_timestamp) AS input_prev_block_time
, value
, transaction_hash
FROM `crypto-etl-ethereum-dev.bitcoin_blockchain.inputs` AS inputs
WHERE ARRAY_TO_STRING(inputs.addresses,',') IN (SELECT non_miner FROM non_miner_addresses)
),
output_summary AS (
SELECT
output_address
, MIN(output_block_month) AS output_month_min
, MAX(output_block_month) AS output_month_max
, COUNT(DISTINCT output_block_month) AS output_active_months
, COUNT(DISTINCT(transaction_hash)) AS total_output_tx
, SUM(value) AS total_tx_output_value
, AVG(value) AS mean_tx_output_value
, STDDEV(value) AS stddev_tx_output_value
, SUM(value)/COUNT(DISTINCT output_block_month) AS mean_monthly_output_value
, COUNT(transaction_hash)/COUNT(DISTINCT output_block_month) AS mean_monthly_output_count
, AVG(IF(output_prev_block_time IS NULL, NULL, UNIX_SECONDS(output_block_time) - UNIX_SECONDS(output_prev_block_time))) AS mean_output_idle_time
, STDDEV(IF(output_prev_block_time IS NULL, NULL, UNIX_SECONDS(output_block_time) - UNIX_SECONDS(output_prev_block_time))) AS stddev_output_idle_time
FROM output_stats
GROUP BY output_address
),
input_summary AS (
SELECT
input_address
, MIN(input_block_month) AS input_month_min
, MAX(input_block_month) AS input_month_max
, COUNT(DISTINCT input_block_month) AS input_active_months
, COUNT(DISTINCT(transaction_hash)) AS total_input_tx
, SUM(value) AS total_tx_input_value
, AVG(value) AS mean_tx_input_value
, STDDEV(value) AS stddev_tx_input_value
, SUM(value)/COUNT(DISTINCT input_block_month) AS mean_monthly_input_value
, COUNT(transaction_hash)/COUNT(DISTINCT input_block_month) AS mean_monthly_input_count
, AVG(IF(input_prev_block_time IS NULL, NULL, UNIX_SECONDS(input_block_time) - UNIX_SECONDS(input_prev_block_time))) AS mean_input_idle_time
, STDDEV(IF(input_prev_block_time IS NULL, NULL, UNIX_SECONDS(input_block_time) - UNIX_SECONDS(input_prev_block_time))) AS stddev_input_idle_time
FROM input_stats
GROUP BY input_address
)
SELECT
output_address
, FALSE AS is_miner
, output_month_min
, output_month_max
, output_active_months
, total_tx_output_value
, mean_tx_output_value
, stddev_tx_output_value
, total_output_tx
, mean_monthly_output_value
, mean_monthly_output_count
, mean_output_idle_time
, stddev_output_idle_time
, input_month_min
, input_month_max
, input_active_months
, total_tx_input_value
, mean_tx_input_value
, stddev_tx_input_value
, total_input_tx
, mean_monthly_input_value
, mean_monthly_input_count
, mean_input_idle_time
, stddev_input_idle_time
, UNIX_SECONDS(CAST(output_month_max AS TIMESTAMP)) - UNIX_SECONDS(CAST(output_month_min AS TIMESTAMP)) AS output_active_time
, UNIX_SECONDS(CAST(input_month_max AS TIMESTAMP)) - UNIX_SECONDS(CAST(input_month_min AS TIMESTAMP)) AS input_active_time
, UNIX_SECONDS(CAST(output_month_max AS TIMESTAMP)) - UNIX_SECONDS(CAST(input_month_max AS TIMESTAMP)) AS io_max_lag
, UNIX_SECONDS(CAST(output_month_min AS TIMESTAMP)) - UNIX_SECONDS(CAST(input_month_min AS TIMESTAMP)) AS io_min_lag
FROM input_summary, output_summary
WHERE input_summary.input_address = output_summary.output_address
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment