Skip to content

Instantly share code, notes, and snippets.

@slowfranklin
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slowfranklin/5145c4552af6b5bcbd3d to your computer and use it in GitHub Desktop.
Save slowfranklin/5145c4552af6b5bcbd3d to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -w
# source: https://forums.adobe.com/message/3706123#3706123
#
use strict;
$/=undef;
if ($#ARGV < 1) {
die "idstrings INFILE ... OUTFILE";
}
my $outfile = pop(@ARGV);
# print STDERR "outfile is $outfile\n";
open (OUTFILE, ">", $outfile) || die "open";
my $indd = <> || die;
my $p=0;
my $debug=0;
print "Read " . length($indd) . " bytes\n" if $debug;
my $endian = unpack("C", substr($indd,24,1));
my ($endianname, $short);
if ($endian == 1) {
$endianname = "little";
$short="v";
} elsif ($endian == 2) {
$endianname = "big";
$short="n";
} else {
print "Not an InDesign file! Or endianness is $endian.\n";
exit(0);
}
print "InDesign layout is $endianname-endian.\n" if $debug;
# STORY (littleendian):
# 2 storylength <--------- sp
# 2 type (1920, 0)
# 1 stringlength
# 1 '@'
# n string
# $sp is the @-sign.
sub story() {
my ($sp, $max) = @_;
my ($slen, $stype);
if ($endian == 1) {
$slen = unpack($short, substr($indd,$sp+0,2));
$stype = unpack($short, substr($indd,$sp+2,2));
} else {
$stype = unpack($short, substr($indd,$sp+0,2));
$slen = unpack($short, substr($indd,$sp+2,2));
}
my $chars = $slen;
my $start = $sp;
my $page = $sp >> 12;
my $string = "";
my $pagestart = $page <<12;
my $guid = unpack("H*", substr($indd,$pagestart+00, 16));
my $magic = unpack("H*", substr($indd,$pagestart+16, 8));
if ($debug) {
printf "*0x%08x story %3d bytes %s (%s)\n", $sp, $slen, $guid, $magic;
}
$sp += 4;
while ($sp < $max+100 && $chars>0) { # xxx
printf "loop (0x%08x) $sp < $max; $chars remaining\n", $sp if $debug>1;
my $type2 = unpack($short, substr($indd, $sp, 2));
my $type2a = ($type2 & 0xff);
my $type2b = ($type2 & 0xff00) >> 8;
my $flaglen = unpack($short, substr($indd, $sp, 2));
my $flags = $flaglen & 0xf000;
my $len = $flaglen & 0x0fff; # at least 0x190 => 0x1ff
if ($flags == 0x8000) {
# unicode / UCS-2 (almost utf-16)
my @ucodes = unpack("$short*",substr($indd, $sp+2, $len*2));
$sp+=2;
for (@ucodes) {
if ($debug) {
printf " 0x%08x (+1)/%d U+%04x\n", $sp, $#ucodes, $_;
} else {
}
$string .= sprintf "\\U%04x", $_;
#$string .= pack("U",$_); #xxx
$sp += 2;
$chars--;
}
} elsif ($flags==0x4000) # ascii[ish]
{
if ($debug) {
printf " 0x%08x (+%d) @ <%s>\n", $sp, $len,
substr($indd, $sp+2, $len);
} else {
$string .= substr($indd, $sp+2, $len);
}
$sp += $len+2;
$chars -= $len;
} else {
if ($debug>1) {
printf "X0x%08x (+1) ? 0x%02x flagslen %04x '%s'\n", $flaglen, $sp,
ord(substr($indd,$sp,1)),
substr($indd,$sp,1);
}
$sp++;
$chars--;
}
}
# punt nonascii
return if ( $string =~ /[[:^ascii:]\0]/ );
# minimums
return if ( length($string) < 2);
# too soon?
return if ($page < 200);
return if ($string =~ /^ADBE_\w+$/);
return if ($string eq "file");
return if ($string =~ /^file \d+ \d+$/);
return if ($string =~ /^Version \d.\d\d\d;PS /);
if (!$debug) {
# print "$page ($guid):" . $string . "\n";
print OUTFILE "$string\n";
}
}
while ($p < length($indd)) {
my $char = substr($indd,$p,1);
if ($char eq '@' || $char eq 'A') {
my ($count, $ocount, $count2, $type, $start);
if ($endian == 1) {
$start = $p-5;
$count = unpack($short, substr($indd,$p-5,2));
$type = unpack($short, substr($indd,$p-3,2));
$count2 = ord(substr($indd, $p-1, 1));
} else {
$start = $p-4;
$type = unpack($short, substr($indd,$p-4,2));
$count = unpack($short, substr($indd,$p-2,2));
$count2 = ord(substr($indd, $p+1, 1));
}
if ($type == 0x0000 && $count2 != 0 && $count<=400) {
&story($start, $p+$count);
$p += $count+1;
} else {
$p++;
}
} else {
$p++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment