Skip to content

Instantly share code, notes, and snippets.

@IsurangaPerera
Created February 19, 2020 07:43
Show Gist options
  • Save IsurangaPerera/0090ef25d8763db7793b135770f26b84 to your computer and use it in GitHub Desktop.
Save IsurangaPerera/0090ef25d8763db7793b135770f26b84 to your computer and use it in GitHub Desktop.
docker run -it -d -v ~/.bitcoin:/blockchain -p 80:4200 -p 18333:18333 -p 18332:18332 -p 5000:5000 84fe0610c41d
redis-server &
service postgresql start
bitcoind
ng serve --host 0.0.0.0
celery worker -A app.celery -l info &
Node specs: 64GB 8xcores 1TB HHD
BlockSci parser = 11.5 hours with 16GB
Bitcoin-ETL parsring + enriching = 1.4hrs + 1.3hrs = 2.7hrs
loading 100GB csv to postgres DB using bulk insert = 47secs
querying for all addresses which has 35+ bitcoins
Other queries to run:
* Average transaction fee
SELECT avg(fee)
FROM transaction
* Number of coinbase transactions
SELECT count(*)
FROM transaction
WHERE is_coinbase = true
* Avg block fee
SELECT avg(sum_block_fee)
FROM (
SELECT block_number, sum(fee) as sum_block_fee
FROM transaction
GROUP BY block_number
)
* Address held by each addresses
SELECT sum(amount) as balance
FROM (
SELECT Address, sum(Value) as amount from input_address GROUP BY Address
UNION All
SELECT Address, -sum(Value) as amount from outputs GROUP BY Address
) as derived
GROUP BY Address;
CREATE TABLE IF NOT EXISTS transaction (
fee bigint NOT NULL,
block_number integer NOT NULL,
input_value bigint NOT NULL DEFAULT '0',
index integer NOT NULL,
#block_timestamp integer NOT NULL,
is_coinbase boolean,
#version integer NOT NULL,
#size integer NOT NULL,
output_count integer NOT NULL,
#lock_time integer NOT NULL,
output_value bigint NOT NULL,
hash varchar(100),
#block_hash varchar(100),
input_count integer NOT NULL,
#virtual_size integer NOT NULL,
PRIMARY KEY (hash)
);
CREATE TABLE IF NOT EXISTS block (
Hash CHAR(30) PRIMARY KEY,
#Size INT(20),
#StrippedSize INT(20),
#Weight CHAR(20),
#Version CHAR(20),
#MerkleRoot CHAR(20),
Timestamp CHAR(20),
#Nonce CHAR(30),
#CoinbaseParam CHAR(30),
TransactionCount INT(20)
);
CREATE TABLE IF NOT EXISTS input_address (
Id serial,
TxHash varchar(100),
Index integer,
RequiredSignatures integer,
#ScriptASM varchar(500),
#ScriptHex varchar(500),
#Sequence bigint,
Spent_output_index int,
Spent_transaction_hash varchar(100),
Type varchar(20),
Value bigint NOT NULL,
Address varchar(50) NOT NULL,
PRIMARY KEY(id)
);
CREATE TABLE IF NOT EXISTS output_address (
Id serial,
TxHash varchar(100),
Index integer NOT NULL,
requiredSignatures integer NOT NULL,
#ScriptASM varchar(500),
#ScriptHex varchar(500),
Type varchar(20),
Value bigint NOT NULL,
Address varchar(50) NOT NULL,
PRIMARY KEY(TxHash, Index)
);
# JSON to CSV
cat < transactions.json | jq -c 'del(.outputs, .inputs)'| jq --slurp . | jq -r '(.[0] | keys_unsorted) as $keys | $keys, map([.[ $keys[] ]])[] | @csv' > testf.csv
# Copy CSV to Postgres
\COPY transaction(input_count, block_number, index, virtual_size, fee, block_timestamp, input_value, output_value, hash, block_hash, size, version, output_count, is_coinbase, lock_time) FROM transactions.csv CSV HEADER DELIMITER E','
\COPY transaction(address, value) FROM outputs.csv CSV DELIMITER E','
# Apache Ignite
sqlline -u jdbc:ignite:thin://127.0.0.1
0: jdbc:ignite:thin://127.0.0.1>COPY FROM "testf.csv" INTO transaction(fee, block_number, input_value, index, block_timestamp, is_coinbase, version, size, output_count, lock_time, output_value, hash, block_hash, input_count, virtual_size) FORMAT CSV;
bitcoinetl export_blocks_and_transactions --start-block 0 --end-block 600000 \
--provider-uri http://username:password@localhost:8332 \
--blocks-output blocks.json --transactions-output transactions.json --max-workers
while mapfile -t -n 1000000 ary && ((${#ary[@]})); do
echo ${ary[@]} | jq -c 'del(.outputs, .inputs)'| jq --slurp . | jq -r '(.[0] | keys_unsorted) as $keys | map([.[ $keys[] ]])[] | @csv'
done < transactions.json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment