Skip to content

Instantly share code, notes, and snippets.

@DonalDuck004
Last active November 25, 2023 21:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DonalDuck004/2096f9089933d4ad71bf9183ecfb5c4d to your computer and use it in GitHub Desktop.
Save DonalDuck004/2096f9089933d4ad71bf9183ecfb5c4d to your computer and use it in GitHub Desktop.
Presenta piccoli controlli extra non pieanamente gestiti a fini di debug, è sempre divertente litigare con gli errori di segmentazione :D
P.S. Usa un algortimo diverso (non direttamente applicabile in Python) che risulta essere fino a 4/5 volte più veloce in C
(^ Parte di algoritmo.txt)
C = True
if C:
from program01_c import Umkansanize as real
def Umkansanize(source_root: str, target_root: str) -> dict[str, int]:
return real(source_root, target_root)
else:
from os import makedirs as mkdir
if False:
from typing import Generator
TABLE = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1fP!"#$%&\'()*#,b./ABCDEFG789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
def index_eater(src: bytes, dest: bytes) -> "Generator[tuple[bytes, bytes, bytes], None, None]":
last = None
with open(src + b"/index.txt", "rb") as f:
for song_name, file_name in (x[1:-2].split(b'" "') for x in f if x != "\n"):
target_dir = (dest + b"/" + file_name).rpartition(b"/")[0]
if target_dir != last:
mkdir(target_dir, exist_ok=True)
last = target_dir
yield song_name, src + b"/" + file_name, target_dir + b"/" + song_name + b".txt"
def tknz_tarahumara(path: bytes, out: bytes) -> int:
with open(path, "rb") as inp:
content = b"".join(x[:-1][::-1] for x in inp).translate(TABLE).decode() + "Z"
total = len(content)
i = 0
tokens = 0
with open(out, "w") as out:
while i < total - 1:
c = 1
if content[i + 1] == "#" or content[i + 1] == "b":
out.write(content[i] + content[i + 1])
while content[i] == content[i + 2] and content[i + 1] == content[i + 3]:
c += 1
i += 2
i += 2
else:
out.write(content[i])
while content[i] == content[(i := i + 1)] and (content[i + 1] != "#" and content[i + 1] != "b"):
c += 1
out.write(str(c))
tokens += c
return tokens
def Umkansanize(source_root: str, target_root: str) -> dict[str, int]:
target_root = target_root.encode()
stats = [(n.decode(), tknz_tarahumara(inp, out)) for n, inp, out in index_eater(source_root.encode(), target_root)]
stats.sort(key=lambda x: (-x[1], x[0]))
with open(target_root + b"/index.txt", "wb") as out:
out.write(b'\n'.join(f'"{x}" {y}'.encode() for x, y in stats))
return dict(stats)
#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
typedef struct {
char* name;
char* input_path;
char* output_path;
int tokens;
} raw_song_t;
typedef struct {
int len;
raw_song_t* group;
} raw_song_group_t;
char* strconcat(char* str1, char* str2){
char* buff = (char*)PyMem_RawMalloc(sizeof(char) * (strlen(str1) + strlen(str2) + 1));
sprintf(buff, "%s%s", str1, str2);
return buff;
}
char* path_join(char* str1, char* str2){
char* buff = (char*)PyMem_RawMalloc(sizeof(char) * (strlen(str1) + strlen(str2) + 2));
sprintf(buff, "%s/%s", str1, str2);
return buff;
}
void recursive_mkdir(char *dir, int perm) {
char* tmp = malloc(sizeof(char) * (1 + strlen(dir)));
char *p = NULL;
size_t len;
memcpy(tmp, dir, strlen(dir));
tmp[strlen(dir)] = '\x00';
len = strlen(tmp);
if (tmp[len - 1] == '/')
tmp[len - 1] = 0;
for (p = tmp + 1; *p; p++)
if (*p == '/') {
*p = 0;
mkdir(tmp, perm);
*p = '/';
}
mkdir(tmp, perm);
}
raw_song_group_t* parse_index(char* src, char* dest){
raw_song_group_t* out = (raw_song_group_t*)PyMem_RawMalloc(sizeof(raw_song_group_t));
out->len = 7;
out->group = (raw_song_t*)PyMem_RawMalloc(sizeof(raw_song_t) * out->len);
int c = -1;
char* index = path_join(src, "index.txt");
FILE* fd = fopen(index, "r");
size_t _ = 0;
char* line;
int begin;
int offset;
char* tmp;
int temp_size;
while (getline(&line, &_, fd) != -1){
if (++c == out->len){
out->len += 3;
out->group = (raw_song_t*)PyMem_RawRealloc(out->group, sizeof(raw_song_t) * out->len);
}
begin = 0;
while (line[++begin] != '"');
temp_size = begin - 1;
out->group[c].name = (char*)PyMem_RawMalloc(sizeof(char) * (temp_size + 1));
memcpy(out->group[c].name, line + 1, temp_size);
out->group[c].name[temp_size] = '\x00';
begin += 2;
offset = begin;
while (line[++offset] != '"');
temp_size = offset - begin - 1;
out->group[c].input_path = (char*)PyMem_RawMalloc(sizeof(char) * (temp_size + 1));
memcpy(out->group[c].input_path, line + begin + 1, temp_size);
out->group[c].input_path[temp_size] = '\x00';
out->group[c].output_path = (char*)PyMem_RawMalloc(sizeof(char) * (temp_size + 1));
memcpy(out->group[c].output_path, out->group[c].input_path, temp_size);
out->group[c].output_path[temp_size] = '\x00';
tmp = dirname(path_join(dest, out->group[c].input_path));
recursive_mkdir(tmp, 0777);
tmp = path_join(tmp, out->group[c].name);
out->group[c].output_path = strconcat(tmp, ".txt");
out->group[c].input_path = path_join(src, out->group[c].input_path);
PyMem_RawFree(tmp);
}
free(line);
if (out->len > c + 1){
out->len = c + 1;
out->group = (raw_song_t*)PyMem_RawRealloc(out->group, sizeof(raw_song_t) * out->len);
}
fclose(fd);
PyMem_RawFree(index);
return out;
}
void translate(char *src){
switch (*src)
{
case '0':
*src = 'A';
break;
case '1':
*src = 'B';
break;
case '2':
*src = 'C';
break;
case '3':
*src = 'D';
break;
case '4':
*src = 'E';
break;
case '5':
*src = 'F';
break;
case '6':
*src = 'G';
break;
case ' ':
*src = 'P';
break;
case '-':
*src = 'b';
break;
default: // case '+':
*src = '#';
break;
}
}
void parse_tarahumara(raw_song_t* raw){
FILE* fd = fopen(raw->input_path, "r");
if (fd == NULL)
fprintf(stderr, "Failed to open file: %d", __LINE__);
char* line;
int j = 0;
size_t _ = 0;
char* content = (char*)PyMem_RawMalloc(sizeof(char));
while (getline(&line, &_, fd) != -1){
content = (char*)PyMem_RawRealloc(content, sizeof(char*) * (j + strlen(line)));
for (int i = strlen(line) - 2; i >= 0; --i, ++j){
content[j] = line[i];
translate(&content[j]);
}
content[j] = '\x00';
}
fclose(fd);
int c;
raw->tokens = 0;
fd = fopen(raw->output_path, "w");
if (fd == NULL)
fprintf(stderr, "Failed to open file: %d %s\n", __LINE__, raw->output_path);
for (int i = 0; i < strlen(content);){
c = 1;
if (content[i + 1] == '#' || content[i + 1] == 'b'){
fwrite(content + i, 1, 2, fd);
while (content[i] == content[i + 2] && content[i + 1] == content[i + 3]){
++c;
i += 2;
}
i += 2;
}else{
fwrite(content + i, 1, 1, fd);
while (content[i] == content[++i] && (content[i + 1] != '#' && content[i + 1] != 'b'))
++c;
}
raw->tokens += c;
fprintf(fd, "%d", c);
}
if (line != NULL)
free(line);
PyMem_RawFree(content);
fclose(fd);
}
int compare_raw_song(const void *a, const void *b) {
int result = -((raw_song_t*)a)->tokens + ((raw_song_t*)b)->tokens;
if (result == 0)
result = strcmp(((raw_song_t*)a)->name, ((raw_song_t*)b)->name);
return result;
}
void cleanup(raw_song_group_t* groups){
for (int i = 0; i < groups->len; i++){
PyMem_RawFree(groups->group[i].input_path);
PyMem_RawFree(groups->group[i].output_path);
PyMem_RawFree(groups->group[i].name);
}
PyMem_RawFree(groups->group);
PyMem_RawFree(groups);
}
raw_song_group_t* Umkansanize(char* source_root, char* target_root){
raw_song_group_t* groups = parse_index(source_root, target_root);
for (int i = 0; i < groups->len; i++)
parse_tarahumara(&groups->group[i]);
qsort(groups->group, groups->len, sizeof(raw_song_t), compare_raw_song);
FILE* fd = fopen(path_join(target_root, "index.txt"), "w");
for (int i = 0; i < groups->len; i++)
fprintf(fd, "\"%s\" %d\n", groups->group[i].name, groups->group[i].tokens);
fclose(fd);
return groups;
}
static PyObject* py_Umkansanize(PyObject *self, PyObject *args)
{
char *source_root;
char *target_root;
if (!PyArg_ParseTuple(args, "ss", &source_root, &target_root))
return NULL;
raw_song_group_t* groups = Umkansanize(source_root, target_root);
PyObject* dict = PyDict_New();
for (int i = 0; i < groups->len; i++)
PyDict_SetItem(dict, Py_BuildValue("s", groups->group[i].name), PyLong_FromLong(groups->group[i].tokens));
// missing call to cleanup, never tested
return dict;
}
static PyMethodDef methods[] = {
{"Umkansanize", py_Umkansanize, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL} /* Sentinel */
};
static struct PyModuleDef program01_module = {
PyModuleDef_HEAD_INIT,
"program01_c",
NULL,
-1,
methods
};
PyMODINIT_FUNC PyInit_program01_c(void)
{
PyObject *m = PyModule_Create(&program01_module);
if (m == NULL)
return NULL;
return m;
}
int main(){
if (PyImport_AppendInittab("program01", PyInit_program01_c) == -1) {
fprintf(stderr, "Error: could not extend in-built modules table\n");
exit(1);
}
Py_Initialize();
return 0;
}
from distutils.core import setup, Extension
def main():
setup(name="program01_c",
ext_modules=[Extension("program01_c", ["main.c"],
)]
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment