bczhc/a.rs

## a.rs
use bitcoin::{OutPoint, Script};
use bitcoin_demo::{extract_op_return, han_char, new_parser, EncodeHex};
use chrono::TimeZone;
use rusqlite::{params, Connection};

fn main() {
    let mut db = Connection::open("./op-return-messages.db").unwrap();
    db.execute(
        r#"create table if not exists op_return_msg
(
    data            blob    not null,
    hex             text    not null,
    text            text    not null,
    block_timestamp integer not null,
    block_height    integer not null,
    txo_txid        text    not null,
    txo_vout        integer not null
)"#,
        params![],
    )
    .unwrap();
    let db_transaction = db.transaction().unwrap();
    let mut stmt = db_transaction.prepare(r#"insert into op_return_msg (data, hex, text, block_timestamp, block_height, txo_txid, txo_vout)
    values (?, ?, ?, ?, ?, ?, ?)"#).unwrap();

    // let parser = block_parser_recent(10 * 30 * 24 * 60 / 10 /* 10 months */);
    let parser = new_parser();
    for (height, block) in parser {
        for tx in block.txdata {
            for (txo_idx, txo) in tx.output.iter().enumerate() {
                let script = &txo.script_pubkey;
                if script.is_op_return() {
                    let Some(data) = extract_op_return(Script::from_bytes(script.as_bytes()))
                    else {
                        continue;
                    };
                    // only accept data that is valid UTF-8
                    let Ok(text) = std::str::from_utf8(data) else {
                        continue;
                    };
                    // only accept printable data
                    if text.chars().any(|x| x.is_ascii_control()) {
                        continue;
                    }
                    // reject text with all asciis but without any space
                    if text.chars().all(|x| x.is_ascii()) && !text.contains(' ') {
                        continue;
                    }
                    let data = trim_null(data);
                    // validate_han(s) && !s.is_empty()
                    // !s.is_empty() && s.contains("bitcoin")
                    //     || s.contains("Bitcoin")
                    //     || s.contains("BITCOIN")
                    //     || s.contains("？")
                    // s.chars().any(han_char) && !s.as_bytes().starts_with(&hex!("146f6d6e69"))
                    let predicate = text.chars().any(han_char) /* contains Han characters*/
                        && data.len() >= 3 * 2 /* more than two Han characters */
                        && !text.starts_with("omni") /* filter out some specific string patterns */;
                    let predicate2 = text.contains("bitcoin")
                        || text.contains("BITCOIN")
                        || text.contains("Bitcoin");
                    if
                    /*predicate || predicate2*/
                    true {
                        // OP_RETURN has all Han characters
                        let block_time = chrono::Local
                            .timestamp_millis_opt(block.header.time as i64 * 1000)
                            .unwrap();
                        let txid_hex = tx.compute_txid().to_string();
                        println!(
                            "Block Time: {:?}, Height: {height}, txo: {}, data: {}",
                            block_time,
                            OutPoint {
                                txid: txid_hex.parse().unwrap(),
                                vout: txo_idx as u32,
                            },
                            text.replace("\n", " ")
                        );
                        stmt.execute(params![
                            data,
                            data.hex(),
                            text,
                            block.header.time,
                            height,
                            txid_hex,
                            txo_idx as u32,
                        ])
                        .unwrap();
                    }
                }
            }
        }
    }
    drop(stmt);
    db_transaction.commit().unwrap();
    drop(db);
}

fn trim_null(bytes: &[u8]) -> &[u8] {
    let Some(start) = bytes.iter().position(|&x| x != 0) else {
        return &[];
    };
    let Some(end) = bytes.iter().rposition(|&x| x != 0) else {
        return &[];
    };
    &bytes[start..=end]
}

fn validate_han(s: &str) -> bool {
    s.chars().all(han_char)
}

## lib.rs
/// <snippet>

pub fn new_parser() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
    let options = Options::default().order_output();
    let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
    let mut headers = HeaderParser::parse(blk_dir).unwrap();
    // headers.reverse();
    let parser = DefaultParser.parse_with_opts(&headers, options);

    let mut height = headers.len() - 1;
    (0..=height)
        // .rev()
        .zip(parser.into_iter().map(Result::unwrap))
}

pub fn new_parser_rev() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
    let options = Options::default().order_output();
    let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
    let mut headers = HeaderParser::parse(blk_dir).unwrap();
    headers.reverse();
    let parser = DefaultParser.parse_with_opts(&headers, options);

    let mut height = headers.len() - 1;
    (0..=height)
        .rev()
        .zip(parser.into_iter().map(Result::unwrap))
}

const BITCOIN_CORE_BLK_DIR: &str = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";

pub fn parse_headers() -> Vec<ParsedHeader> {
    HeaderParser::parse(BITCOIN_CORE_BLK_DIR).unwrap()
}

pub fn block_parser(
    headers: &[ParsedHeader],
) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
    let options = Options::default().order_output();
    let parser = DefaultParser.parse_with_opts(headers, options);

    let mut height = headers.len() - 1;
    (0..=height).zip(parser.into_iter().map(Result::unwrap))
}

/// Only takes the recent `block_count` blocks.
pub fn block_parser_recent(
    block_count: usize,
) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
    let headers = parse_headers();
    let start = headers.len() - block_count;
    let selected_headers = &headers[start..];
    let selected_height_start = headers.len() - block_count;

    let options = Options::default().order_output();
    let parser = DefaultParser.parse_with_opts(selected_headers, options);
    (selected_height_start..(selected_height_start + block_count))
        .zip(parser.into_iter().map(Result::unwrap))
}

pub fn extract_op_return(script: &Script) -> Option<&[u8]> {
    if !script.is_op_return() {
        return None;
    }

    let bytes = script.as_bytes();

    // merely OP_RETURN
    if bytes.len() == 1 {
        return None;
    }

    // OP_RETURN <OP_PUSHBYTES_1..=OP_PUSHBYTES_75> <data>
    if (OP_PUSHBYTES_1.to_u8()..=OP_PUSHBYTES_75.to_u8()).contains(&bytes[1]) {
        let pushed_len = (bytes[1] - OP_PUSHBYTES_1.to_u8() + 1) as usize;
        if bytes.len() - 2 < pushed_len {
            return None;
        }
        let data = &bytes[2..(2 + pushed_len)];
        return Some(data);
    }

    // OP_RETURN <OP_PUSHDATA1> <length> <data>
    if bytes[1] == OP_PUSHDATA1.to_u8() {
        let len = bytes[2] as usize;
        if bytes.len() - 3 < len {
            return None;
        }
        return Some(&bytes[3..(3 + len)]);
    }

    None
}

pub fn han_char(c: char) -> bool {
    if "，。《》？！【】〔〕「」·—：；“”‘’…"
        .chars()
        .any(|x| x == c)
    {
        return true;
    };

    han_char_range(c as u32)
}

## read-db.rs
//! Read database and print Chinese OP_RETURN messages.

use bczhc_lib::char::han_char_range;
use chrono::TimeZone;
use rusqlite::{params, Connection};

fn main() {
    struct Row {
        time: String,
        text: String,
        txid: String,
        vout: u32,
    }

    let db = Connection::open("./op-return-messages.db").unwrap();
    let mut stmt = db
        .prepare("select * from op_return_msg order by block_timestamp")
        .unwrap();
    let rows = stmt
        .query_map(params![], |r| {
            let timestamp: u64 = r.get_unwrap("block_timestamp");
            Ok(Row {
                text: r.get_unwrap("text"),
                vout: r.get_unwrap("txo_vout"),
                txid: r.get_unwrap("txo_txid"),
                time: chrono::Local
                    .timestamp_millis_opt(timestamp as i64 * 1000)
                    .unwrap()
                    .to_string(),
            })
        })
        .unwrap();
    for x in rows {
        let row = x.unwrap();
        let text = &row.text;
        if text.chars().any(|x| han_char_range(x as u32)) && !text.chars().any(japanese_syllabary) {
            println!("{} {}:{} {}", row.time, row.txid, row.vout, text);
        }
    }
}

fn japanese_syllabary(c: char) -> bool {
    matches!(c as u32, 0x3040..=0x309f | 0x30A0..=0x30ff)
}
	use bitcoin::{OutPoint, Script};
	use bitcoin_demo::{extract_op_return, han_char, new_parser, EncodeHex};
	use chrono::TimeZone;
	use rusqlite::{params, Connection};

	fn main() {
	let mut db = Connection::open("./op-return-messages.db").unwrap();
	db.execute(
	r#"create table if not exists op_return_msg
	(
	data blob not null,
	hex text not null,
	text text not null,
	block_timestamp integer not null,
	block_height integer not null,
	txo_txid text not null,
	txo_vout integer not null
	)"#,
	params![],
	)
	.unwrap();
	let db_transaction = db.transaction().unwrap();
	let mut stmt = db_transaction.prepare(r#"insert into op_return_msg (data, hex, text, block_timestamp, block_height, txo_txid, txo_vout)
	values (?, ?, ?, ?, ?, ?, ?)"#).unwrap();

	// let parser = block_parser_recent(10 * 30 * 24 * 60 / 10 /* 10 months */);
	let parser = new_parser();
	for (height, block) in parser {
	for tx in block.txdata {
	for (txo_idx, txo) in tx.output.iter().enumerate() {
	let script = &txo.script_pubkey;
	if script.is_op_return() {
	let Some(data) = extract_op_return(Script::from_bytes(script.as_bytes()))
	else {
	continue;
	};
	// only accept data that is valid UTF-8
	let Ok(text) = std::str::from_utf8(data) else {
	continue;
	};
	// only accept printable data
	if text.chars().any(\|x\| x.is_ascii_control()) {
	continue;
	}
	// reject text with all asciis but without any space
	if text.chars().all(\|x\| x.is_ascii()) && !text.contains(' ') {
	continue;
	}
	let data = trim_null(data);
	// validate_han(s) && !s.is_empty()
	// !s.is_empty() && s.contains("bitcoin")
	// \|\| s.contains("Bitcoin")
	// \|\| s.contains("BITCOIN")
	// \|\| s.contains("？")
	// s.chars().any(han_char) && !s.as_bytes().starts_with(&hex!("146f6d6e69"))
	let predicate = text.chars().any(han_char) /* contains Han characters*/
	&& data.len() >= 3 * 2 /* more than two Han characters */
	&& !text.starts_with("omni") /* filter out some specific string patterns */;
	let predicate2 = text.contains("bitcoin")
	\|\| text.contains("BITCOIN")
	\|\| text.contains("Bitcoin");
	if
	/predicate \|\| predicate2/
	true {
	// OP_RETURN has all Han characters
	let block_time = chrono::Local
	.timestamp_millis_opt(block.header.time as i64 * 1000)
	.unwrap();
	let txid_hex = tx.compute_txid().to_string();
	println!(
	"Block Time: {:?}, Height: {height}, txo: {}, data: {}",
	block_time,
	OutPoint {
	txid: txid_hex.parse().unwrap(),
	vout: txo_idx as u32,
	},
	text.replace("\n", " ")
	);
	stmt.execute(params![
	data,
	data.hex(),
	text,
	block.header.time,
	height,
	txid_hex,
	txo_idx as u32,
	])
	.unwrap();
	}
	}
	}
	}
	}
	drop(stmt);
	db_transaction.commit().unwrap();
	drop(db);
	}

	fn trim_null(bytes: &[u8]) -> &[u8] {
	let Some(start) = bytes.iter().position(\|&x\| x != 0) else {
	return &[];
	};
	let Some(end) = bytes.iter().rposition(\|&x\| x != 0) else {
	return &[];
	};
	&bytes[start..=end]
	}

	fn validate_han(s: &str) -> bool {
	s.chars().all(han_char)
	}
	/// <snippet>

	pub fn new_parser() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
	let options = Options::default().order_output();
	let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
	let mut headers = HeaderParser::parse(blk_dir).unwrap();
	// headers.reverse();
	let parser = DefaultParser.parse_with_opts(&headers, options);

	let mut height = headers.len() - 1;
	(0..=height)
	// .rev()
	.zip(parser.into_iter().map(Result::unwrap))
	}

	pub fn new_parser_rev() -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
	let options = Options::default().order_output();
	let blk_dir = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";
	let mut headers = HeaderParser::parse(blk_dir).unwrap();
	headers.reverse();
	let parser = DefaultParser.parse_with_opts(&headers, options);

	let mut height = headers.len() - 1;
	(0..=height)
	.rev()
	.zip(parser.into_iter().map(Result::unwrap))
	}

	const BITCOIN_CORE_BLK_DIR: &str = "/mnt/nvme/bitcoin/bitcoind/blocks/blocks";

	pub fn parse_headers() -> Vec<ParsedHeader> {
	HeaderParser::parse(BITCOIN_CORE_BLK_DIR).unwrap()
	}

	pub fn block_parser(
	headers: &[ParsedHeader],
	) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
	let options = Options::default().order_output();
	let parser = DefaultParser.parse_with_opts(headers, options);

	let mut height = headers.len() - 1;
	(0..=height).zip(parser.into_iter().map(Result::unwrap))
	}

	/// Only takes the recent `block_count` blocks.
	pub fn block_parser_recent(
	block_count: usize,
	) -> impl IntoIterator<Item = (usize, bitcoincore_rpc::bitcoin::Block)> {
	let headers = parse_headers();
	let start = headers.len() - block_count;
	let selected_headers = &headers[start..];
	let selected_height_start = headers.len() - block_count;

	let options = Options::default().order_output();
	let parser = DefaultParser.parse_with_opts(selected_headers, options);
	(selected_height_start..(selected_height_start + block_count))
	.zip(parser.into_iter().map(Result::unwrap))
	}

	pub fn extract_op_return(script: &Script) -> Option<&[u8]> {
	if !script.is_op_return() {
	return None;
	}

	let bytes = script.as_bytes();

	// merely OP_RETURN
	if bytes.len() == 1 {
	return None;
	}

	// OP_RETURN <OP_PUSHBYTES_1..=OP_PUSHBYTES_75> <data>
	if (OP_PUSHBYTES_1.to_u8()..=OP_PUSHBYTES_75.to_u8()).contains(&bytes[1]) {
	let pushed_len = (bytes[1] - OP_PUSHBYTES_1.to_u8() + 1) as usize;
	if bytes.len() - 2 < pushed_len {
	return None;
	}
	let data = &bytes[2..(2 + pushed_len)];
	return Some(data);
	}

	// OP_RETURN <OP_PUSHDATA1> <length> <data>
	if bytes[1] == OP_PUSHDATA1.to_u8() {
	let len = bytes[2] as usize;
	if bytes.len() - 3 < len {
	return None;
	}
	return Some(&bytes[3..(3 + len)]);
	}

	None
	}

	pub fn han_char(c: char) -> bool {
	if "，。《》？！【】〔〕「」·—：；“”‘’…"
	.chars()
	.any(\|x\| x == c)
	{
	return true;
	};

	han_char_range(c as u32)
	}
	//! Read database and print Chinese OP_RETURN messages.

	use bczhc_lib::char::han_char_range;
	use chrono::TimeZone;
	use rusqlite::{params, Connection};

	fn main() {
	struct Row {
	time: String,
	text: String,
	txid: String,
	vout: u32,
	}

	let db = Connection::open("./op-return-messages.db").unwrap();
	let mut stmt = db
	.prepare("select * from op_return_msg order by block_timestamp")
	.unwrap();
	let rows = stmt
	.query_map(params![], \|r\| {
	let timestamp: u64 = r.get_unwrap("block_timestamp");
	Ok(Row {
	text: r.get_unwrap("text"),
	vout: r.get_unwrap("txo_vout"),
	txid: r.get_unwrap("txo_txid"),
	time: chrono::Local
	.timestamp_millis_opt(timestamp as i64 * 1000)
	.unwrap()
	.to_string(),
	})
	})
	.unwrap();
	for x in rows {
	let row = x.unwrap();
	let text = &row.text;
	if text.chars().any(\|x\| han_char_range(x as u32)) && !text.chars().any(japanese_syllabary) {
	println!("{} {}:{} {}", row.time, row.txid, row.vout, text);
	}
	}
	}

	fn japanese_syllabary(c: char) -> bool {
	matches!(c as u32, 0x3040..=0x309f \| 0x30A0..=0x30ff)
	}