-
-
Save bczhc/48e34122c58181bab03c7717752c77df to your computer and use it in GitHub Desktop.
Bitcoin OP_RETURN messages: write to SQLite database #bitcoin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use bitcoin::{OutPoint, Script}; | |
use bitcoin_demo::{extract_op_return, han_char, new_parser, EncodeHex}; | |
use chrono::TimeZone; | |
use rusqlite::{params, Connection}; | |
fn main() { | |
let mut db = Connection::open("./op-return-messages.db").unwrap(); | |
db.execute( | |
r#"create table if not exists op_return_msg | |
( | |
data blob not null, | |
hex text not null, | |
text text not null, | |
block_timestamp integer not null, | |
block_height integer not null, | |
txo_txid text not null, | |
txo_vout integer not null | |
)"#, | |
params![], | |
) | |
.unwrap(); | |
let db_transaction = db.transaction().unwrap(); | |
let mut stmt = db_transaction.prepare(r#"insert into op_return_msg (data, hex, text, block_timestamp, block_height, txo_txid, txo_vout) | |
values (?, ?, ?, ?, ?, ?, ?)"#).unwrap(); | |
// let parser = block_parser_recent(10 * 30 * 24 * 60 / 10 /* 10 months */); | |
let parser = new_parser(); | |
for (height, block) in parser { | |
for tx in block.txdata { | |
for (txo_idx, txo) in tx.output.iter().enumerate() { | |
let script = &txo.script_pubkey; | |
if script.is_op_return() { | |
let Some(data) = extract_op_return(Script::from_bytes(script.as_bytes())) | |
else { | |
continue; | |
}; | |
// only accept data that is valid UTF-8 | |
let Ok(text) = std::str::from_utf8(data) else { | |
continue; | |
}; | |
// only accept printable data | |
if text.chars().any(|x| x.is_ascii_control()) { | |
continue; | |
} | |
// reject text with all asciis but without any space | |
if text.chars().all(|x| x.is_ascii()) && !text.contains(' ') { | |
continue; | |
} | |
let data = trim_null(data); | |
// validate_han(s) && !s.is_empty() | |
// !s.is_empty() && s.contains("bitcoin") | |
// || s.contains("Bitcoin") | |
// || s.contains("BITCOIN") | |
// || s.contains("?") | |
// s.chars().any(han_char) && !s.as_bytes().starts_with(&hex!("146f6d6e69")) | |
let predicate = text.chars().any(han_char) /* contains Han characters*/ | |
&& data.len() >= 3 * 2 /* more than two Han characters */ | |
&& !text.starts_with("omni") /* filter out some specific string patterns */; | |
let predicate2 = text.contains("bitcoin") | |
|| text.contains("BITCOIN") | |
|| text.contains("Bitcoin"); | |
if | |
/*predicate || predicate2*/ | |
true { | |
// OP_RETURN has all Han characters | |
let block_time = chrono::Local | |
.timestamp_millis_opt(block.header.time as i64 * 1000) | |
.unwrap(); | |
let txid_hex = tx.compute_txid().to_string(); | |
println!( | |
"Block Time: {:?}, Height: {height}, txo: {}, data: {}", | |
block_time, | |
OutPoint { | |
txid: txid_hex.parse().unwrap(), | |
vout: txo_idx as u32, | |
}, | |
text.replace("\n", " ") | |
); | |
stmt.execute(params![ | |
data, | |
data.hex(), | |
text, | |
block.header.time, | |
height, | |
txid_hex, | |
txo_idx as u32, | |
]) | |
.unwrap(); | |
} | |
} | |
} | |
} | |
} | |
drop(stmt); | |
db_transaction.commit().unwrap(); | |
drop(db); | |
} | |
fn trim_null(bytes: &[u8]) -> &[u8] { | |
let Some(start) = bytes.iter().position(|&x| x != 0) else { | |
return &[]; | |
}; | |
let Some(end) = bytes.iter().rposition(|&x| x != 0) else { | |
return &[]; | |
}; | |
&bytes[start..=end] | |
} | |
fn validate_han(s: &str) -> bool { | |
s.chars().all(han_char) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <snippet> | |
pub fn new_parser() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> { | |
let options = Options::default().order_output(); | |
let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks"; | |
let mut headers = HeaderParser::parse(blk_dir).unwrap(); | |
// headers.reverse(); | |
let parser = DefaultParser.parse_with_opts(&headers, options); | |
let mut height = headers.len() - 1; | |
(0..=height) | |
// .rev() | |
.zip(parser.into_iter().map(Result::unwrap)) | |
} | |
pub fn new_parser_rev() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> { | |
let options = Options::default().order_output(); | |
let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks"; | |
let mut headers = HeaderParser::parse(blk_dir).unwrap(); | |
headers.reverse(); | |
let parser = DefaultParser.parse_with_opts(&headers, options); | |
let mut height = headers.len() - 1; | |
(0..=height) | |
.rev() | |
.zip(parser.into_iter().map(Result::unwrap)) | |
} | |
const BITCOIN_CORE_BLK_DIR: &str = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks"; | |
pub fn parse_headers() -> Vec<ParsedHeader> { | |
HeaderParser::parse(BITCOIN_CORE_BLK_DIR).unwrap() | |
} | |
pub fn block_parser( | |
headers: &[ParsedHeader], | |
) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> { | |
let options = Options::default().order_output(); | |
let parser = DefaultParser.parse_with_opts(headers, options); | |
let mut height = headers.len() - 1; | |
(0..=height).zip(parser.into_iter().map(Result::unwrap)) | |
} | |
/// Only takes the recent `block_count` blocks. | |
pub fn block_parser_recent( | |
block_count: usize, | |
) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> { | |
let headers = parse_headers(); | |
let start = headers.len() - block_count; | |
let selected_headers = &headers[start..]; | |
let selected_height_start = headers.len() - block_count; | |
let options = Options::default().order_output(); | |
let parser = DefaultParser.parse_with_opts(selected_headers, options); | |
(selected_height_start..(selected_height_start + block_count)) | |
.zip(parser.into_iter().map(Result::unwrap)) | |
} | |
pub fn extract_op_return(script: &Script) -> Option<&[u8]> { | |
if !script.is_op_return() { | |
return None; | |
} | |
let bytes = script.as_bytes(); | |
// merely OP_RETURN | |
if bytes.len() == 1 { | |
return None; | |
} | |
// OP_RETURN <OP_PUSHBYTES_1..=OP_PUSHBYTES_75> <data> | |
if (OP_PUSHBYTES_1.to_u8()..=OP_PUSHBYTES_75.to_u8()).contains(&bytes[1]) { | |
let pushed_len = (bytes[1] - OP_PUSHBYTES_1.to_u8() + 1) as usize; | |
if bytes.len() - 2 < pushed_len { | |
return None; | |
} | |
let data = &bytes[2..(2 + pushed_len)]; | |
return Some(data); | |
} | |
// OP_RETURN <OP_PUSHDATA1> <length> <data> | |
if bytes[1] == OP_PUSHDATA1.to_u8() { | |
let len = bytes[2] as usize; | |
if bytes.len() - 3 < len { | |
return None; | |
} | |
return Some(&bytes[3..(3 + len)]); | |
} | |
None | |
} | |
pub fn han_char(c: char) -> bool { | |
if ",。《》?!【】〔〕「」·—:;“”‘’…" | |
.chars() | |
.any(|x| x == c) | |
{ | |
return true; | |
}; | |
han_char_range(c as u32) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! Read database and print Chinese OP_RETURN messages. | |
use bczhc_lib::char::han_char_range; | |
use chrono::TimeZone; | |
use rusqlite::{params, Connection}; | |
fn main() { | |
struct Row { | |
time: String, | |
text: String, | |
txid: String, | |
vout: u32, | |
} | |
let db = Connection::open("./op-return-messages.db").unwrap(); | |
let mut stmt = db | |
.prepare("select * from op_return_msg order by block_timestamp") | |
.unwrap(); | |
let rows = stmt | |
.query_map(params![], |r| { | |
let timestamp: u64 = r.get_unwrap("block_timestamp"); | |
Ok(Row { | |
text: r.get_unwrap("text"), | |
vout: r.get_unwrap("txo_vout"), | |
txid: r.get_unwrap("txo_txid"), | |
time: chrono::Local | |
.timestamp_millis_opt(timestamp as i64 * 1000) | |
.unwrap() | |
.to_string(), | |
}) | |
}) | |
.unwrap(); | |
for x in rows { | |
let row = x.unwrap(); | |
let text = &row.text; | |
if text.chars().any(|x| han_char_range(x as u32)) && !text.chars().any(japanese_syllabary) { | |
println!("{} {}:{} {}", row.time, row.txid, row.vout, text); | |
} | |
} | |
} | |
fn japanese_syllabary(c: char) -> bool { | |
matches!(c as u32, 0x3040..=0x309f | 0x30A0..=0x30ff) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment