Skip to content

Instantly share code, notes, and snippets.

@aknirmal90
aknirmal90 / bitcoin_balances.sql
Created January 10, 2019 10:32
Balance Query for Google BQ public bitcoin dataset
WITH double_entry_book AS (
-- debits
SELECT
array_to_string(inputs.addresses, ",") as address
, inputs.type
, -inputs.value as value
FROM `crypto-etl-bitcoin-prod.bitcoin_blockchain.inputs` as inputs
UNION ALL
WITH
input_metrics AS (
SELECT
ARRAY_TO_STRING(inputs.addresses,',') AS address,
SUM(inputs.value) as value,
COUNT(inputs.value) as cnt,
AVG(inputs.value) as mean,
STDDEV(inputs.value) as std_dev
FROM `crypto-etl-ethereum-dev.bitcoin_blockchain.transactions` AS transactions JOIN UNNEST(transactions.inputs) AS inputs
GROUP BY 1
WITH output_stats AS (
SELECT
ARRAY_TO_STRING(outputs.addresses,',') AS output_address
, block_timestamp AS output_block_time
, DATE(EXTRACT(YEAR FROM block_timestamp), EXTRACT(MONTH FROM block_timestamp), 1) as output_block_month
, LAG(block_timestamp) OVER (PARTITION BY ARRAY_TO_STRING(outputs.addresses,',') ORDER BY block_timestamp) AS output_prev_block_time
, value
, transaction_hash
FROM `crypto-etl-ethereum-dev.bitcoin_blockchain.outputs` AS outputs
),
WITH miner_addresses AS (
SELECT
ARRAY_TO_STRING(outputs.addresses,',') AS miner
FROM
`crypto-etl-ethereum-dev.bitcoin_blockchain.blocks` AS blocks,
`crypto-etl-ethereum-dev.bitcoin_blockchain.transactions` AS transactions JOIN UNNEST(outputs) AS outputs
WHERE blocks.hash = transactions.block_hash
AND is_coinbase IS TRUE
AND ( FALSE
--
WITH miner_addresses AS (
SELECT
ARRAY_TO_STRING(outputs.addresses,',') AS miner
FROM
`crypto-etl-ethereum-dev.bitcoin_blockchain.blocks` AS blocks,
`crypto-etl-ethereum-dev.bitcoin_blockchain.transactions` AS transactions JOIN UNNEST(outputs) AS outputs
WHERE blocks.hash = transactions.block_hash
AND is_coinbase IS TRUE
AND ( FALSE
--
CREATE INDEX ON :Block(height);
CREATE INDEX ON :Transaction(hash);
CREATE INDEX ON :Output(tx_hash, output_index);
CREATE INDEX ON :Address(address_string);
LOAD CSV WITH HEADERS FROM "http://storage.googleapis.com/bitcoin-neo4j-etl/blocks.csv" AS row
MERGE (b:Block {height: toInt(row.number)})
ON CREATE SET
b.hash = row.hash,
b.size = toInt(row.size),
cat transactions.json | jq -cr 'del(.inputs, .outputs)' > txns.json
cat transactions.json | jq -cr '{hash: .hash, outputs: .outputs[]} | {hash: .hash, index: .outputs.index, addresses: .outputs.addresses, required_signatures: .outputs.required_signatures, type: .outputs.type, value: .outputs.value}' > outputs.json
cat transactions.json | jq -cr '{hash: .hash, outputs: .outputs[]} | {hash: .hash, index: .outputs.index, addresses: .outputs.addresses[]}' > outputs_addresses.json
cat transactions.json | jq -cr '{hash: .hash, inputs: .inputs[]} | { hash: .hash, index: .inputs.index, spent_transaction_hash: .inputs.spent_transaction_hash, spent_output_index: .inputs.spent_output_index }' > inputs.json
1. Cross validate number of blocks
i)BQ
SELECT
COUNT(*) AS num_blocks
FROM
`bigquery-public-data.crypto_bitcoin.blocks` AS blocks
WHERE
blocks.number <= 300
################################################################################
1. Find number of addresses sending funds to an address
################################################################################
SELECT
COUNT(DISTINCT ARRAY_TO_STRING(inputs.addresses, '')) AS addresses
, MIN(block_timestamp) as start_date
FROM `bigquery-public-data.crypto_bitcoin.transactions` AS txns
, UNNEST(txns.outputs) AS outputs
, UNNEST(txns.inputs) AS inputs
WITH shared_inputs AS (
SELECT
ARRAY_TO_STRING(inputs.addresses, '') AS address
FROM `bigquery-public-data.crypto_bitcoin.transactions` AS txns
, UNNEST(txns.inputs) AS inputs
WHERE txns.hash IN
(
SELECT
txns.hash as tx_hash
FROM `bigquery-public-data.crypto_bitcoin.transactions` AS txns