Created
September 17, 2022 23:20
-
-
Save lichrot/1280b11c44b1b63cb3daa75fd0d33715 to your computer and use it in GitHub Desktop.
Get edit distance between two strings, with or without allocation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdbool.h> | |
#include <time.h> | |
char* get_substring(char* string, int start_idx, int end_idx) { | |
int total_len = end_idx - start_idx; | |
char* result = malloc(sizeof(char) * total_len); | |
for (int idx = 0; idx < total_len; idx++) { | |
int offset_idx = start_idx + idx; | |
result[idx] = string[offset_idx]; | |
} | |
return result; | |
} | |
char* get_common_substring( | |
char* short_str, | |
int short_len, | |
char* long_str | |
) { | |
for (int end_idx = short_len; end_idx > 0; end_idx--) { | |
for (int start_idx = 0; start_idx < short_len; start_idx++) { | |
char* substring = get_substring(short_str, start_idx, end_idx); | |
if (strstr(long_str, substring) != NULL) return substring; | |
free(substring); | |
} | |
} | |
return '\0'; | |
} | |
int get_distance_alloc(char* a, char* b) { | |
int a_len = strlen(a); | |
int b_len = strlen(b); | |
int result; | |
if (a_len <= b_len) { | |
char* substring = get_common_substring(a, a_len, b); | |
result = b_len - strlen(substring); | |
} else { | |
char* substring = get_common_substring(b, b_len, a); | |
result = a_len - strlen(substring); | |
} | |
return result; | |
} | |
int get_common_substring_len( | |
char* short_str, | |
int short_len, | |
char* long_str, | |
int long_len | |
) { | |
for (int end_idx = short_len; end_idx > 0; end_idx--) { | |
for (int start_idx = 0; start_idx < short_len; start_idx++) { | |
int substring_len = end_idx - start_idx; | |
for (int longer_idx = 0; longer_idx < long_len; longer_idx++) { | |
bool is_equal = true; | |
for (int offset = 0; offset < substring_len; offset++) { | |
if (long_str[longer_idx + offset] == short_str[start_idx + offset]) continue; | |
is_equal = false; | |
break; | |
} | |
if (is_equal) return substring_len; | |
} | |
} | |
} | |
return 0; | |
} | |
int get_distance(char* a, char* b) { | |
int a_len = strlen(a); | |
int b_len = strlen(b); | |
int result; | |
if (a_len <= b_len) { | |
int substring_len = get_common_substring_len(a, a_len, b, b_len); | |
result = b_len - substring_len; | |
} else { | |
int substring_len = get_common_substring_len(b, b_len, a, a_len); | |
result = a_len - substring_len; | |
} | |
return result; | |
} | |
double get_start_time() { | |
return (double)clock() / CLOCKS_PER_SEC; | |
} | |
double print_end_time(double start_time) { | |
double end_time = (double)clock() / CLOCKS_PER_SEC; | |
double diff = end_time - start_time; | |
printf("CPU time: %f\n", diff); | |
return diff; | |
} | |
double benchmark(char* header, int (*callback)(char*, char*)) { | |
printf(header); | |
double alloc_time = get_start_time(); | |
printf("1) %d\n", callback("abc", "abc")); | |
printf("2) %d\n", callback("abc", "abcd")); | |
printf("3) %d\n", callback("abc", "cba")); | |
printf("4) %d\n", callback("abcdefghijklmnopqrstuvwxyz", "zyxwvutsrqponmlkjihgfedcba")); | |
return print_end_time(alloc_time); | |
} | |
int main() { | |
double dist_alloc_time = benchmark("get_distance_alloc:\n", get_distance_alloc); | |
printf("\n"); | |
double dist_time = benchmark("get_distance:\n", get_distance); | |
printf( | |
"\nget_distance_alloc > get_distance: %s\n", | |
dist_alloc_time > dist_time ? "true" : "false" | |
); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment