Skip to content

Instantly share code, notes, and snippets.

@bczhc
Created November 6, 2024 17:25
Show Gist options
  • Save bczhc/48e34122c58181bab03c7717752c77df to your computer and use it in GitHub Desktop.
Save bczhc/48e34122c58181bab03c7717752c77df to your computer and use it in GitHub Desktop.
Bitcoin OP_RETURN messages: write to SQLite database #bitcoin
use bitcoin::{OutPoint, Script};
use bitcoin_demo::{extract_op_return, han_char, new_parser, EncodeHex};
use chrono::TimeZone;
use rusqlite::{params, Connection};
fn main() {
let mut db = Connection::open("./op-return-messages.db").unwrap();
db.execute(
r#"create table if not exists op_return_msg
(
data blob not null,
hex text not null,
text text not null,
block_timestamp integer not null,
block_height integer not null,
txo_txid text not null,
txo_vout integer not null
)"#,
params![],
)
.unwrap();
let db_transaction = db.transaction().unwrap();
let mut stmt = db_transaction.prepare(r#"insert into op_return_msg (data, hex, text, block_timestamp, block_height, txo_txid, txo_vout)
values (?, ?, ?, ?, ?, ?, ?)"#).unwrap();
// let parser = block_parser_recent(10 * 30 * 24 * 60 / 10 /* 10 months */);
let parser = new_parser();
for (height, block) in parser {
for tx in block.txdata {
for (txo_idx, txo) in tx.output.iter().enumerate() {
let script = &txo.script_pubkey;
if script.is_op_return() {
let Some(data) = extract_op_return(Script::from_bytes(script.as_bytes()))
else {
continue;
};
// only accept data that is valid UTF-8
let Ok(text) = std::str::from_utf8(data) else {
continue;
};
// only accept printable data
if text.chars().any(|x| x.is_ascii_control()) {
continue;
}
// reject text with all asciis but without any space
if text.chars().all(|x| x.is_ascii()) && !text.contains(' ') {
continue;
}
let data = trim_null(data);
// validate_han(s) && !s.is_empty()
// !s.is_empty() && s.contains("bitcoin")
// || s.contains("Bitcoin")
// || s.contains("BITCOIN")
// || s.contains("?")
// s.chars().any(han_char) && !s.as_bytes().starts_with(&hex!("146f6d6e69"))
let predicate = text.chars().any(han_char) /* contains Han characters*/
&& data.len() >= 3 * 2 /* more than two Han characters */
&& !text.starts_with("omni") /* filter out some specific string patterns */;
let predicate2 = text.contains("bitcoin")
|| text.contains("BITCOIN")
|| text.contains("Bitcoin");
if
/*predicate || predicate2*/
true {
// OP_RETURN has all Han characters
let block_time = chrono::Local
.timestamp_millis_opt(block.header.time as i64 * 1000)
.unwrap();
let txid_hex = tx.compute_txid().to_string();
println!(
"Block Time: {:?}, Height: {height}, txo: {}, data: {}",
block_time,
OutPoint {
txid: txid_hex.parse().unwrap(),
vout: txo_idx as u32,
},
text.replace("\n", " ")
);
stmt.execute(params![
data,
data.hex(),
text,
block.header.time,
height,
txid_hex,
txo_idx as u32,
])
.unwrap();
}
}
}
}
}
drop(stmt);
db_transaction.commit().unwrap();
drop(db);
}
fn trim_null(bytes: &[u8]) -> &[u8] {
let Some(start) = bytes.iter().position(|&x| x != 0) else {
return &[];
};
let Some(end) = bytes.iter().rposition(|&x| x != 0) else {
return &[];
};
&bytes[start..=end]
}
fn validate_han(s: &str) -> bool {
s.chars().all(han_char)
}
/// <snippet>
pub fn new_parser() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
let options = Options::default().order_output();
let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
let mut headers = HeaderParser::parse(blk_dir).unwrap();
// headers.reverse();
let parser = DefaultParser.parse_with_opts(&headers, options);
let mut height = headers.len() - 1;
(0..=height)
// .rev()
.zip(parser.into_iter().map(Result::unwrap))
}
pub fn new_parser_rev() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
let options = Options::default().order_output();
let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
let mut headers = HeaderParser::parse(blk_dir).unwrap();
headers.reverse();
let parser = DefaultParser.parse_with_opts(&headers, options);
let mut height = headers.len() - 1;
(0..=height)
.rev()
.zip(parser.into_iter().map(Result::unwrap))
}
const BITCOIN_CORE_BLK_DIR: &str = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
pub fn parse_headers() -> Vec<ParsedHeader> {
HeaderParser::parse(BITCOIN_CORE_BLK_DIR).unwrap()
}
pub fn block_parser(
headers: &[ParsedHeader],
) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
let options = Options::default().order_output();
let parser = DefaultParser.parse_with_opts(headers, options);
let mut height = headers.len() - 1;
(0..=height).zip(parser.into_iter().map(Result::unwrap))
}
/// Only takes the recent `block_count` blocks.
pub fn block_parser_recent(
block_count: usize,
) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
let headers = parse_headers();
let start = headers.len() - block_count;
let selected_headers = &headers[start..];
let selected_height_start = headers.len() - block_count;
let options = Options::default().order_output();
let parser = DefaultParser.parse_with_opts(selected_headers, options);
(selected_height_start..(selected_height_start + block_count))
.zip(parser.into_iter().map(Result::unwrap))
}
pub fn extract_op_return(script: &Script) -> Option<&[u8]> {
if !script.is_op_return() {
return None;
}
let bytes = script.as_bytes();
// merely OP_RETURN
if bytes.len() == 1 {
return None;
}
// OP_RETURN <OP_PUSHBYTES_1..=OP_PUSHBYTES_75> <data>
if (OP_PUSHBYTES_1.to_u8()..=OP_PUSHBYTES_75.to_u8()).contains(&bytes[1]) {
let pushed_len = (bytes[1] - OP_PUSHBYTES_1.to_u8() + 1) as usize;
if bytes.len() - 2 < pushed_len {
return None;
}
let data = &bytes[2..(2 + pushed_len)];
return Some(data);
}
// OP_RETURN <OP_PUSHDATA1> <length> <data>
if bytes[1] == OP_PUSHDATA1.to_u8() {
let len = bytes[2] as usize;
if bytes.len() - 3 < len {
return None;
}
return Some(&bytes[3..(3 + len)]);
}
None
}
pub fn han_char(c: char) -> bool {
if ",。《》?!【】〔〕「」·—:;“”‘’…"
.chars()
.any(|x| x == c)
{
return true;
};
han_char_range(c as u32)
}
//! Read database and print Chinese OP_RETURN messages.
use bczhc_lib::char::han_char_range;
use chrono::TimeZone;
use rusqlite::{params, Connection};
fn main() {
struct Row {
time: String,
text: String,
txid: String,
vout: u32,
}
let db = Connection::open("./op-return-messages.db").unwrap();
let mut stmt = db
.prepare("select * from op_return_msg order by block_timestamp")
.unwrap();
let rows = stmt
.query_map(params![], |r| {
let timestamp: u64 = r.get_unwrap("block_timestamp");
Ok(Row {
text: r.get_unwrap("text"),
vout: r.get_unwrap("txo_vout"),
txid: r.get_unwrap("txo_txid"),
time: chrono::Local
.timestamp_millis_opt(timestamp as i64 * 1000)
.unwrap()
.to_string(),
})
})
.unwrap();
for x in rows {
let row = x.unwrap();
let text = &row.text;
if text.chars().any(|x| han_char_range(x as u32)) && !text.chars().any(japanese_syllabary) {
println!("{} {}:{} {}", row.time, row.txid, row.vout, text);
}
}
}
fn japanese_syllabary(c: char) -> bool {
matches!(c as u32, 0x3040..=0x309f | 0x30A0..=0x30ff)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment