josephg/0dedict.py

## 0dedict.py
# Thanks to commenters for providing the base of this much nicer implementation!
# Save and run with $ python 0dedict.py
# You may need to hunt down the dictionary files yourself and change the awful path string below.
# This works for me on MacOS 10.14 Mohave

from struct import unpack
from zlib import decompress
import re
filename = '/System/Library/Assets/com_apple_MobileAsset_DictionaryServices_dictionaryOSX/9f5862030e8f00af171924ebbc23ebfd6e91af78.asset/AssetData/Oxford Dictionary of English.dictionary/Contents/Resources/Body.data'
f = open(filename, 'rb')

def gen_entry():
    f.seek(0x40)
    limit = 0x40 + unpack('i', f.read(4))[0]
    f.seek(0x60)
    while f.tell()<limit:
        sz, = unpack('i', f.read(4))
        buf = decompress(f.read(sz)[8:])

        pos = 0
        while pos < len(buf):
            chunksize, = unpack('i', buf[pos:pos+4])
            pos += 4

            entry = buf[pos:pos+chunksize]
            title = re.search('d:title="(.*?)"', entry).group(1)
            yield title, entry

            pos += chunksize

for word, definition in gen_entry():
    print(word)

## dedict.c
// *** Old code - not needed given the python code above

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "zlib.h"

#define CHUNK 16384

/*
 40 Length of the zlib stream
 4c 0020
 54 0275 number of blocks

 60 808c pointer to the next block
 64 8088 length of the first block
 68 047a4a length of the unpacked block
 6c start of the zlib stream

 80fc second block

 13cd134
 13cd174
*/

int unpack(unsigned char *in, int len)
{
  int ret,outed=0;
  unsigned have;
  z_stream strm;
  unsigned char out[CHUNK];

  strm.zalloc = Z_NULL;
  strm.zfree = Z_NULL;
  strm.opaque = Z_NULL;
  strm.avail_in = 0;
  strm.next_in = Z_NULL;
  ret = inflateInit(&strm);
  if (ret != Z_OK)
    return ret;
  strm.avail_in = len;
  strm.next_in = in;
  do {
    strm.avail_out = CHUNK;
    strm.next_out = out;
    ret = inflate(&strm, Z_NO_FLUSH);
    assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
    switch (ret) {
      case Z_NEED_DICT:
        ret = Z_DATA_ERROR;     /* and fall through */
      case Z_DATA_ERROR:
      case Z_MEM_ERROR:
        (void)inflateEnd(&strm);
        return ret;
    }
//    printf("%lx %x\n",strm.next_in-in,strm.avail_in);
    have = CHUNK - strm.avail_out /* - (outed?0:4)*/;

    int off = 0;
    /*
    while (have - off > 3 && out[off] != '<' && out[1+off] != 'd' && out[2+off] != ':') {
      ++off;
    }*/

    if (have - off <= 3) {
      fprintf(stderr, "could not find entry\n");
    }

    if (fwrite(out + off/*+(outed?0:4)*/, have - off, 1, stdout) != 1 || ferror(stdout)) {
      (void)inflateEnd(&strm);
      return Z_ERRNO;
    }
    //exit(0);

    outed+=have;
  } while (strm.avail_out == 0);
  printf("%06x\n",outed);
  (void)inflateEnd(&strm);
  return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}

char filename[256];

int main(int argc,char **argv) {
  FILE *fin; int limit,blen=0,p,l,bcnt=0; unsigned char *buf=NULL;
  assert(argc >= 2);
  sprintf(filename,"/Library/Dictionaries/%s.dictionary/Contents/Body.data",argv[1]);
  if((fin=fopen(filename,"rb"))) {
    fseek(fin,0x40,SEEK_SET);
    fread(&l,1,4,fin);
    limit=0x40+l;
    p=0x60;
    do {
      fseek(fin,p,SEEK_SET);
      fread(&l,1,4,fin);
//      if(0==l) break;
      if(blen<l) {
        if(buf!=NULL) free(buf);
        blen=l;
        buf=(unsigned char *)malloc(blen);
      }
      fread(buf,1,l,fin);
      //fprintf(stderr, "%x@%06x: %x>%06x\n",bcnt,p,l,((int *)buf)[1]);
      unpack(buf+8,l-8);
      p+=4+l;
      ++bcnt;
    } while(p<limit);
    free(buf);
    fclose(fin);
  }
  return 0;
}

## strip.c
// This program strips the first 4 characters from each line in the input
#include <stdio.h>

int main() {
  while(!ferror(stdin) && !feof(stdin)) {
    size_t len = 0;
    char *line = fgetln(stdin, &len);
    if (!line) break;

    if (len > 4)
      fwrite(line + 4, 1, len - 4, stdout);
  }

  return 0;
}
	# Thanks to commenters for providing the base of this much nicer implementation!
	# Save and run with $ python 0dedict.py
	# You may need to hunt down the dictionary files yourself and change the awful path string below.
	# This works for me on MacOS 10.14 Mohave

	from struct import unpack
	from zlib import decompress
	import re
	filename = '/System/Library/Assets/com_apple_MobileAsset_DictionaryServices_dictionaryOSX/9f5862030e8f00af171924ebbc23ebfd6e91af78.asset/AssetData/Oxford Dictionary of English.dictionary/Contents/Resources/Body.data'
	f = open(filename, 'rb')

	def gen_entry():
	f.seek(0x40)
	limit = 0x40 + unpack('i', f.read(4))[0]
	f.seek(0x60)
	while f.tell()<limit:
	sz, = unpack('i', f.read(4))
	buf = decompress(f.read(sz)[8:])

	pos = 0
	while pos < len(buf):
	chunksize, = unpack('i', buf[pos:pos+4])
	pos += 4

	entry = buf[pos:pos+chunksize]
	title = re.search('d:title="(.*?)"', entry).group(1)
	yield title, entry

	pos += chunksize

	for word, definition in gen_entry():
	print(word)
	// *** Old code - not needed given the python code above

	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <assert.h>
	#include "zlib.h"

	#define CHUNK 16384

	/*
	40 Length of the zlib stream
	4c 0020
	54 0275 number of blocks

	60 808c pointer to the next block
	64 8088 length of the first block
	68 047a4a length of the unpacked block
	6c start of the zlib stream

	80fc second block

	13cd134
	13cd174
	*/

	int unpack(unsigned char *in, int len)
	{
	int ret,outed=0;
	unsigned have;
	z_stream strm;
	unsigned char out[CHUNK];

	strm.zalloc = Z_NULL;
	strm.zfree = Z_NULL;
	strm.opaque = Z_NULL;
	strm.avail_in = 0;
	strm.next_in = Z_NULL;
	ret = inflateInit(&strm);
	if (ret != Z_OK)
	return ret;
	strm.avail_in = len;
	strm.next_in = in;
	do {
	strm.avail_out = CHUNK;
	strm.next_out = out;
	ret = inflate(&strm, Z_NO_FLUSH);
	assert(ret != Z_STREAM_ERROR); /* state not clobbered */
	switch (ret) {
	case Z_NEED_DICT:
	ret = Z_DATA_ERROR; /* and fall through */
	case Z_DATA_ERROR:
	case Z_MEM_ERROR:
	(void)inflateEnd(&strm);
	return ret;
	}
	// printf("%lx %x\n",strm.next_in-in,strm.avail_in);
	have = CHUNK - strm.avail_out /* - (outed?0:4)*/;

	int off = 0;
	/*
	while (have - off > 3 && out[off] != '<' && out[1+off] != 'd' && out[2+off] != ':') {
	++off;
	}*/

	if (have - off <= 3) {
	fprintf(stderr, "could not find entry\n");
	}

	if (fwrite(out + off/+(outed?0:4)/, have - off, 1, stdout) != 1 \|\| ferror(stdout)) {
	(void)inflateEnd(&strm);
	return Z_ERRNO;
	}
	//exit(0);

	outed+=have;
	} while (strm.avail_out == 0);
	printf("%06x\n",outed);
	(void)inflateEnd(&strm);
	return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
	}

	char filename[256];

	int main(int argc,char **argv) {
	FILE fin; int limit,blen=0,p,l,bcnt=0; unsigned char buf=NULL;
	assert(argc >= 2);
	sprintf(filename,"/Library/Dictionaries/%s.dictionary/Contents/Body.data",argv[1]);
	if((fin=fopen(filename,"rb"))) {
	fseek(fin,0x40,SEEK_SET);
	fread(&l,1,4,fin);
	limit=0x40+l;
	p=0x60;
	do {
	fseek(fin,p,SEEK_SET);
	fread(&l,1,4,fin);
	// if(0==l) break;
	if(blen<l) {
	if(buf!=NULL) free(buf);
	blen=l;
	buf=(unsigned char *)malloc(blen);
	}
	fread(buf,1,l,fin);
	//fprintf(stderr, "%x@%06x: %x>%06x\n",bcnt,p,l,((int *)buf)[1]);
	unpack(buf+8,l-8);
	p+=4+l;
	++bcnt;
	} while(p<limit);
	free(buf);
	fclose(fin);
	}
	return 0;
	}
	// This program strips the first 4 characters from each line in the input
	#include <stdio.h>

	int main() {
	while(!ferror(stdin) && !feof(stdin)) {
	size_t len = 0;
	char *line = fgetln(stdin, &len);
	if (!line) break;

	if (len > 4)
	fwrite(line + 4, 1, len - 4, stdout);
	}

	return 0;
	}