-
-
Save DonalDuck004/2096f9089933d4ad71bf9183ecfb5c4d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Presenta piccoli controlli extra non pieanamente gestiti a fini di debug, è sempre divertente litigare con gli errori di segmentazione :D | |
P.S. Usa un algortimo diverso (non direttamente applicabile in Python) che risulta essere fino a 4/5 volte più veloce in C | |
(^ Parte di algoritmo.txt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C = True | |
if C: | |
from program01_c import Umkansanize as real | |
def Umkansanize(source_root: str, target_root: str) -> dict[str, int]: | |
return real(source_root, target_root) | |
else: | |
from os import makedirs as mkdir | |
if False: | |
from typing import Generator | |
TABLE = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1fP!"#$%&\'()*#,b./ABCDEFG789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' | |
def index_eater(src: bytes, dest: bytes) -> "Generator[tuple[bytes, bytes, bytes], None, None]": | |
last = None | |
with open(src + b"/index.txt", "rb") as f: | |
for song_name, file_name in (x[1:-2].split(b'" "') for x in f if x != "\n"): | |
target_dir = (dest + b"/" + file_name).rpartition(b"/")[0] | |
if target_dir != last: | |
mkdir(target_dir, exist_ok=True) | |
last = target_dir | |
yield song_name, src + b"/" + file_name, target_dir + b"/" + song_name + b".txt" | |
def tknz_tarahumara(path: bytes, out: bytes) -> int: | |
with open(path, "rb") as inp: | |
content = b"".join(x[:-1][::-1] for x in inp).translate(TABLE).decode() + "Z" | |
total = len(content) | |
i = 0 | |
tokens = 0 | |
with open(out, "w") as out: | |
while i < total - 1: | |
c = 1 | |
if content[i + 1] == "#" or content[i + 1] == "b": | |
out.write(content[i] + content[i + 1]) | |
while content[i] == content[i + 2] and content[i + 1] == content[i + 3]: | |
c += 1 | |
i += 2 | |
i += 2 | |
else: | |
out.write(content[i]) | |
while content[i] == content[(i := i + 1)] and (content[i + 1] != "#" and content[i + 1] != "b"): | |
c += 1 | |
out.write(str(c)) | |
tokens += c | |
return tokens | |
def Umkansanize(source_root: str, target_root: str) -> dict[str, int]: | |
target_root = target_root.encode() | |
stats = [(n.decode(), tknz_tarahumara(inp, out)) for n, inp, out in index_eater(source_root.encode(), target_root)] | |
stats.sort(key=lambda x: (-x[1], x[0])) | |
with open(target_root + b"/index.txt", "wb") as out: | |
out.write(b'\n'.join(f'"{x}" {y}'.encode() for x, y in stats)) | |
return dict(stats) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <libgen.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/stat.h> | |
#define PY_SSIZE_T_CLEAN | |
#include <Python.h> | |
typedef struct { | |
char* name; | |
char* input_path; | |
char* output_path; | |
int tokens; | |
} raw_song_t; | |
typedef struct { | |
int len; | |
raw_song_t* group; | |
} raw_song_group_t; | |
char* strconcat(char* str1, char* str2){ | |
char* buff = (char*)PyMem_RawMalloc(sizeof(char) * (strlen(str1) + strlen(str2) + 1)); | |
sprintf(buff, "%s%s", str1, str2); | |
return buff; | |
} | |
char* path_join(char* str1, char* str2){ | |
char* buff = (char*)PyMem_RawMalloc(sizeof(char) * (strlen(str1) + strlen(str2) + 2)); | |
sprintf(buff, "%s/%s", str1, str2); | |
return buff; | |
} | |
void recursive_mkdir(char *dir, int perm) { | |
char* tmp = malloc(sizeof(char) * (1 + strlen(dir))); | |
char *p = NULL; | |
size_t len; | |
memcpy(tmp, dir, strlen(dir)); | |
tmp[strlen(dir)] = '\x00'; | |
len = strlen(tmp); | |
if (tmp[len - 1] == '/') | |
tmp[len - 1] = 0; | |
for (p = tmp + 1; *p; p++) | |
if (*p == '/') { | |
*p = 0; | |
mkdir(tmp, perm); | |
*p = '/'; | |
} | |
mkdir(tmp, perm); | |
} | |
raw_song_group_t* parse_index(char* src, char* dest){ | |
raw_song_group_t* out = (raw_song_group_t*)PyMem_RawMalloc(sizeof(raw_song_group_t)); | |
out->len = 7; | |
out->group = (raw_song_t*)PyMem_RawMalloc(sizeof(raw_song_t) * out->len); | |
int c = -1; | |
char* index = path_join(src, "index.txt"); | |
FILE* fd = fopen(index, "r"); | |
size_t _ = 0; | |
char* line; | |
int begin; | |
int offset; | |
char* tmp; | |
int temp_size; | |
while (getline(&line, &_, fd) != -1){ | |
if (++c == out->len){ | |
out->len += 3; | |
out->group = (raw_song_t*)PyMem_RawRealloc(out->group, sizeof(raw_song_t) * out->len); | |
} | |
begin = 0; | |
while (line[++begin] != '"'); | |
temp_size = begin - 1; | |
out->group[c].name = (char*)PyMem_RawMalloc(sizeof(char) * (temp_size + 1)); | |
memcpy(out->group[c].name, line + 1, temp_size); | |
out->group[c].name[temp_size] = '\x00'; | |
begin += 2; | |
offset = begin; | |
while (line[++offset] != '"'); | |
temp_size = offset - begin - 1; | |
out->group[c].input_path = (char*)PyMem_RawMalloc(sizeof(char) * (temp_size + 1)); | |
memcpy(out->group[c].input_path, line + begin + 1, temp_size); | |
out->group[c].input_path[temp_size] = '\x00'; | |
out->group[c].output_path = (char*)PyMem_RawMalloc(sizeof(char) * (temp_size + 1)); | |
memcpy(out->group[c].output_path, out->group[c].input_path, temp_size); | |
out->group[c].output_path[temp_size] = '\x00'; | |
tmp = dirname(path_join(dest, out->group[c].input_path)); | |
recursive_mkdir(tmp, 0777); | |
tmp = path_join(tmp, out->group[c].name); | |
out->group[c].output_path = strconcat(tmp, ".txt"); | |
out->group[c].input_path = path_join(src, out->group[c].input_path); | |
PyMem_RawFree(tmp); | |
} | |
free(line); | |
if (out->len > c + 1){ | |
out->len = c + 1; | |
out->group = (raw_song_t*)PyMem_RawRealloc(out->group, sizeof(raw_song_t) * out->len); | |
} | |
fclose(fd); | |
PyMem_RawFree(index); | |
return out; | |
} | |
void translate(char *src){ | |
switch (*src) | |
{ | |
case '0': | |
*src = 'A'; | |
break; | |
case '1': | |
*src = 'B'; | |
break; | |
case '2': | |
*src = 'C'; | |
break; | |
case '3': | |
*src = 'D'; | |
break; | |
case '4': | |
*src = 'E'; | |
break; | |
case '5': | |
*src = 'F'; | |
break; | |
case '6': | |
*src = 'G'; | |
break; | |
case ' ': | |
*src = 'P'; | |
break; | |
case '-': | |
*src = 'b'; | |
break; | |
default: // case '+': | |
*src = '#'; | |
break; | |
} | |
} | |
void parse_tarahumara(raw_song_t* raw){ | |
FILE* fd = fopen(raw->input_path, "r"); | |
if (fd == NULL) | |
fprintf(stderr, "Failed to open file: %d", __LINE__); | |
char* line; | |
int j = 0; | |
size_t _ = 0; | |
char* content = (char*)PyMem_RawMalloc(sizeof(char)); | |
while (getline(&line, &_, fd) != -1){ | |
content = (char*)PyMem_RawRealloc(content, sizeof(char*) * (j + strlen(line))); | |
for (int i = strlen(line) - 2; i >= 0; --i, ++j){ | |
content[j] = line[i]; | |
translate(&content[j]); | |
} | |
content[j] = '\x00'; | |
} | |
fclose(fd); | |
int c; | |
raw->tokens = 0; | |
fd = fopen(raw->output_path, "w"); | |
if (fd == NULL) | |
fprintf(stderr, "Failed to open file: %d %s\n", __LINE__, raw->output_path); | |
for (int i = 0; i < strlen(content);){ | |
c = 1; | |
if (content[i + 1] == '#' || content[i + 1] == 'b'){ | |
fwrite(content + i, 1, 2, fd); | |
while (content[i] == content[i + 2] && content[i + 1] == content[i + 3]){ | |
++c; | |
i += 2; | |
} | |
i += 2; | |
}else{ | |
fwrite(content + i, 1, 1, fd); | |
while (content[i] == content[++i] && (content[i + 1] != '#' && content[i + 1] != 'b')) | |
++c; | |
} | |
raw->tokens += c; | |
fprintf(fd, "%d", c); | |
} | |
if (line != NULL) | |
free(line); | |
PyMem_RawFree(content); | |
fclose(fd); | |
} | |
int compare_raw_song(const void *a, const void *b) { | |
int result = -((raw_song_t*)a)->tokens + ((raw_song_t*)b)->tokens; | |
if (result == 0) | |
result = strcmp(((raw_song_t*)a)->name, ((raw_song_t*)b)->name); | |
return result; | |
} | |
void cleanup(raw_song_group_t* groups){ | |
for (int i = 0; i < groups->len; i++){ | |
PyMem_RawFree(groups->group[i].input_path); | |
PyMem_RawFree(groups->group[i].output_path); | |
PyMem_RawFree(groups->group[i].name); | |
} | |
PyMem_RawFree(groups->group); | |
PyMem_RawFree(groups); | |
} | |
raw_song_group_t* Umkansanize(char* source_root, char* target_root){ | |
raw_song_group_t* groups = parse_index(source_root, target_root); | |
for (int i = 0; i < groups->len; i++) | |
parse_tarahumara(&groups->group[i]); | |
qsort(groups->group, groups->len, sizeof(raw_song_t), compare_raw_song); | |
FILE* fd = fopen(path_join(target_root, "index.txt"), "w"); | |
for (int i = 0; i < groups->len; i++) | |
fprintf(fd, "\"%s\" %d\n", groups->group[i].name, groups->group[i].tokens); | |
fclose(fd); | |
return groups; | |
} | |
static PyObject* py_Umkansanize(PyObject *self, PyObject *args) | |
{ | |
char *source_root; | |
char *target_root; | |
if (!PyArg_ParseTuple(args, "ss", &source_root, &target_root)) | |
return NULL; | |
raw_song_group_t* groups = Umkansanize(source_root, target_root); | |
PyObject* dict = PyDict_New(); | |
for (int i = 0; i < groups->len; i++) | |
PyDict_SetItem(dict, Py_BuildValue("s", groups->group[i].name), PyLong_FromLong(groups->group[i].tokens)); | |
// missing call to cleanup, never tested | |
return dict; | |
} | |
static PyMethodDef methods[] = { | |
{"Umkansanize", py_Umkansanize, METH_VARARGS, NULL}, | |
{NULL, NULL, 0, NULL} /* Sentinel */ | |
}; | |
static struct PyModuleDef program01_module = { | |
PyModuleDef_HEAD_INIT, | |
"program01_c", | |
NULL, | |
-1, | |
methods | |
}; | |
PyMODINIT_FUNC PyInit_program01_c(void) | |
{ | |
PyObject *m = PyModule_Create(&program01_module); | |
if (m == NULL) | |
return NULL; | |
return m; | |
} | |
int main(){ | |
if (PyImport_AppendInittab("program01", PyInit_program01_c) == -1) { | |
fprintf(stderr, "Error: could not extend in-built modules table\n"); | |
exit(1); | |
} | |
Py_Initialize(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from distutils.core import setup, Extension | |
def main(): | |
setup(name="program01_c", | |
ext_modules=[Extension("program01_c", ["main.c"], | |
)] | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment