Skip to content

Instantly share code, notes, and snippets.

@laughinghan
Created October 7, 2013 01:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save laughinghan/6861452 to your computer and use it in GitHub Desktop.
Save laughinghan/6861452 to your computer and use it in GitHub Desktop.
#!/usr/bin/env node
if (process.stdin.isTTY) {
console.log('Usage: Inside the directory with all the sent text message files:');
console.log();
console.log('ls | path/to/contacts_extractor.js | sort | uniq | ./mkvcards -o /path/to/output_dir/');
process.exit();
}
var DEBUG_MODE = '-d --debug'.split(' ').indexOf(process.argv.slice(-1)[0]) > -1;
//////////////// read stdin ////////////////
process.stdin.resume();
process.stdin.setEncoding('utf8');
var data = '';
process.stdin.on('data', function(chunk) { data += chunk; });
process.stdin.on('end', function() {
var lines = data.trim().split('\n');
//////////////// read max number of files at a time ////////////////
var fs = require('fs');
var q = [];
lines.forEach(function(path) {
if (q.length) q.push(path);
else {
fs.readFile(path, function(e, data) {
if (e && e.code === 'EMFILE') q.push(path);
else (function onFileData(path, e, data) {
if (e) throw e;
parseFile(path, data);
if (q.length) {
var path = q.shift();
fs.readFile(path, onFileData.bind(null, path));
}
}(path, e, data));
});
}
});
//////////////// parse message file ////////////////
// by inspection of message format
const TEXT_OFFSET_BASE = 245;
const PHONE_OFFSET_BASE = 252;
const CONTACT_HEADER_SIZE = 29;
var assert = require('assert');
function parseFile(path, data) {
try {
var headerSize = data.readUInt32BE(4);
var textOffset = TEXT_OFFSET_BASE + headerSize;
if (textOffset > data.length) {
console.error('Text offset', textOffset, '> buffer size', data.length, 'in', path);
return;
}
if (DEBUG_MODE) {
console.log('Sent "' + readUCS2BEString(textOffset).str + '" to:');
}
var textSize = data.readUInt16BE(textOffset);
function readUCS2BEString(offset) {
var size = data.readUInt16BE(offset);
var start = offset + 2, end = start + size - 2; // omit null terminator
for (var i = start; i < end; i += 2) { // swap byte order >:(
var swap = data[i];
data[i] = data[i+1];
data[i+1] = swap;
}
return { str: data.toString('ucs2', start, end), end: end };
}
var contacts = [], phoneOffset = PHONE_OFFSET_BASE + headerSize + textSize;
do {
var phone = readUCS2BEString(phoneOffset);
if (data[phone.end + 2] === 12) var name = { str: '', end: phone.end };
else var name = readUCS2BEString(phone.end + 3); // skip null and comma
// ^ why is the comma only one byte?!
contacts.push({ name: name.str, phone: tenDigitize(phone.str) });
var contactSeparatorSize = data.readUInt16BE(name.end + 3);
if ('23'.split('').indexOf(contactSeparatorSize) > -1) {
console.error('Unusual contact separator size', contactSeparatorSize,
'at byte', name.end + 3, 'in', path);
}
phoneOffset = name.end + contactSeparatorSize + CONTACT_HEADER_SIZE;
} while (phoneOffset < data.length);
function tenDigitize(phone) {
if (phone.length === 11) {
assert(phone[0] === '1' || phone[0] === '+',
'"'+phone[0]+'" === "1" || "'+phone[0]+'" === "+"');
phone = phone.slice(1);
}
else if (phone.length === 12) {
assert.strictEqual(phone.slice(0,2), '+1');
phone = phone.slice(2);
}
if (phone.length !== 10) {
console.error('Unusual phone length: "' + phone + '" in', path);
}
return phone;
}
contacts.forEach(function(contact, i) {
if (DEBUG_MODE) {
console.log('\t"' + contact.phone + '", "' + contact.name + '"' +
(i > 0
? ' (' + (i+1) + ('nd rd'.split(' ')[i-1] || 'th') + ')'
: ''));
}
else if (contact.name) {
console.log(contact.phone, contact.name);
}
});
}
catch (e) {
console.error('Error while parsing ' + path + ':');
throw e;
}
}
});
#!/bin/bash
OUTPUT_DIR=.
while [[ $# -gt 0 ]]; do
case "$1" in
-o|--output-dir)
OUTPUT_DIR="$2"; shift 2
;;
*)
echo "unrecognized option $1" >&2; shift
;;
esac
done
mkdir -p "$OUTPUT_DIR"
while read PHONE FIRSTNAME OTHERNAMES; do
cat <<EOF > "$OUTPUT_DIR/$FIRSTNAME $OTHERNAMES $PHONE.vcf"
BEGIN:VCARD
VERSION:2.1
N;CHARSET=UTF-8;ENCODING=8BIT:$OTHERNAMES;$FIRSTNAME
TEL;PREF;CELL;VOICE;ENCODING=8BIT:$PHONE
END:VCARD
EOF
done
@laughinghan
Copy link
Author

Inspired by http://developer.nokia.com/Community/Discussion/showthread.php/204463-Raw-text-message-file-format-(from-*-nbf)-description , written to extract contacts from the Sent text messages from my old Nokia C3-00. (I somehow deleted all the contacts; and received text messages don't have the name recorded, only the phone number, for some reason.)

@laughinghan
Copy link
Author

I really should have written the contacts extractor in Python, but I wanted to play with Node.JS and I knew that JS has native support for UTF-16. Little did I know it was only little-endian, and to read big-endian UTF-16 you have to manually go through and swap the fucking byte order. Seriously. Plus Python being an actual good language.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment