Skip to content

Instantly share code, notes, and snippets.

@fabiomontefuscolo
Created February 26, 2014 17:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabiomontefuscolo/9234485 to your computer and use it in GitHub Desktop.
Save fabiomontefuscolo/9234485 to your computer and use it in GitHub Desktop.
remove obsfucação dos arquivos do dicionário houaiss
#!/usr/bin/python2
# -*- coding: utf-8 -*-
#
# Coloque esse script na pasta com os arquivos dhx.
# O resultado estará em iso-8859-1
#
#
# Segui o tutorial em http://www.caloni.com.br/blog/archives/conversor-de-houaiss-para-babylon-parte-1
#
import os
files = os.listdir('.')
for arq in files:
if not arq.endswith('dhx'):
continue
print 'Abrindo "%s"' % arq
origin = open(arq, 'r')
target = open('%s.txt' % arq, 'w+')
char = origin.read(1)
while char:
byte = ord(char) + 0x0B
new_char = chr(byte % 256)
target.write(new_char)
char = origin.read(1)
origin.close()
target.close()
@apompeia
Copy link

Python 3.9.1 (tags/v3.9.1:1e5d33e, Dec 7 2020, 17:08:21) [MSC v.1927 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license()" for more information.

help
Type help() for interactive help, or help(object) for help about object.

======== RESTART: J:\Visual Basic Progs\houaiss\Dicionario\cahouaiss.py ========
Abrindo "deah001.dhx"
Traceback (most recent call last):
File "J:\Visual Basic Progs\houaiss\Dicionario\cahouaiss.py", line 25, in
char = origin.read(1)
File "C:\Users\Alvaro Ramos\AppData\Local\Programs\Python\Python39\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd6 in position 11: invalid continuation byte

@fabiomontefuscolo
Copy link
Author

Já fazem 7 anos que fiz isso. Talvez rode com python2

@chicolismo
Copy link

A quem interessar possa, testei com clang:

#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

/*
 * Colocar o executável na mesma pasta dos arquivos .dhx
 */

void convert(const char *entry_name) {
    fprintf(stdout, "Convertendo o arquivo '%s'\n", entry_name);
    FILE *input;
    if ((input = fopen(entry_name, "rb")) == NULL) {
        fprintf(stderr, "Erro ao ler o arquivo '%s'\n", entry_name);
        return;
    }

    // O novo nome que ter 5 caracteres a mais para incluir ".txt\0" no final
    const size_t len = strlen(entry_name) + 5;
    char *output_name = malloc(sizeof(char) * len);
    snprintf(output_name, len, "%s.txt", entry_name);

    FILE *output;
    if ((output = fopen(output_name, "wb")) == NULL) {
        fprintf(stderr, "Erro ao criar o arquivo '%s'\n", output_name);
        fclose(input);
        free(output_name);
        return;
    }

    unsigned char byte = fgetc(input);
    while (!feof(input)) {
        fputc(byte + 0x0B, output);
        byte = fgetc(input);
    }

    fclose(input);
    fclose(output);
    free(output_name);
}


const char *extension(const char *entry_name) {
    const char *dot = strrchr(entry_name, '.');
    if (!dot || dot == entry_name) {
        return "";
    }
    return dot + 1;
}


int main() {
    DIR *dir = opendir(".");

    if (!dir) {
        fprintf(stderr, "Erro ao abrir o diretório atual\n");
    }

    struct dirent *entry;
    while ((entry = readdir(dir)) != NULL) {
        if (strcasecmp(extension(entry->d_name), "dhx") == 0) {
            convert(entry->d_name);
        }
    }

    closedir(dir);
    return EXIT_SUCCESS;
}

@apompeia
Copy link

apompeia commented Apr 6, 2021 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment