original algorithm and code by @zevlg (https://github.com/zevlg): fixpath.c
-
-
Save little-arhat/fd05d60318ba741ed07d3924eb546496 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
normalize path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
clang -O3 lolpath.c -o lolpath | |
echo -n "c-strlen " | |
cat real.txt| ./lolpath | |
echo -n "c-strlen-jemalloc " | |
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./lolpath | |
clang -O3 fixpath.c -o fixpath | |
echo -n "c-preall " | |
cat real.txt| ./fixpath | |
echo -n "c-preall-jemalloc " | |
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./fixpath | |
clang -O3 walkpath.c -o walkpath | |
echo -n "c-noall " | |
cat real.txt| ./walkpath | |
echo -n "c-noall-jemalloc " | |
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./walkpath | |
clang++ -std=c++14 -fno-exceptions -O2 normpath.cpp -o normpath | |
echo -n "cpp " | |
cat real.txt| ./normpath | |
echo -n "cpp-jemalloc " | |
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./normpath | |
cargo build --quiet --release | |
echo -n "rust " | |
cat real.txt| ./target/release/rspath | |
echo -n "rust-jemalloc " | |
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./target/release/rspath |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "rspath" | |
version = "0.1.0" | |
[[bin]] | |
name = "rspath" | |
path = "rspath.rs" | |
[dependencies] | |
time = "*" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Running tests: | |
* | |
* $ cat << EOF > fptest.c | |
* #include "fixpath.c" | |
* int | |
* main(void) | |
* { | |
* run_tests(); | |
* return 0; | |
* } | |
* EOF | |
* | |
* $ gcc -O2 fptest.c -o fptest | |
* $ ./fptest | |
*/ | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <string.h> /* strcmp() */ | |
#include <stdio.h> | |
#include <time.h> | |
char* | |
normalize(const char* path) | |
{ | |
size_t ret_size = 512; | |
char* ret = (char*)malloc(ret_size); | |
assert(ret != NULL); | |
unsigned ret_idx = 0; | |
/* Alloc resources */ | |
size_t slash_pos_size = 128; | |
unsigned* slash_pos = (unsigned*)malloc(slash_pos_size * sizeof(unsigned)); | |
assert(slash_pos != NULL); | |
unsigned slash_idx = 0; | |
slash_pos[0] = 0; | |
#define STATE_NONE 0 | |
#define STATE_SLASH 1 | |
#define STATE_SLASH_DOT 2 | |
#define STATE_SLASH_DOTDOT 3 | |
int state = STATE_SLASH; | |
for (const char* p = &path[0]; *p != '\0'; p++) { | |
switch (*p) { | |
case '.': | |
switch (state) { | |
case STATE_SLASH: | |
state = STATE_SLASH_DOT; | |
break; | |
case STATE_SLASH_DOT: | |
state = STATE_SLASH_DOTDOT; | |
break; | |
case STATE_SLASH_DOTDOT: | |
/* NOTE: '...' is perfect name for the directory */ | |
state = STATE_NONE; | |
break; | |
} | |
break; | |
case '/': | |
switch (state) { | |
case STATE_SLASH_DOTDOT: | |
/* Reduce "/../" by removing last path part */ | |
if (slash_idx > 0) | |
slash_idx--; | |
/* FALLTHROUGHT */ | |
case STATE_SLASH: | |
/* Reduce "//" to "/" */ | |
/* FALLTHROUGHT */ | |
case STATE_SLASH_DOT: | |
/* Reduce "/./" to "/" */ | |
state = STATE_SLASH; | |
ret_idx = slash_pos[slash_idx]; | |
break; | |
default: | |
state = STATE_SLASH; | |
slash_idx++; | |
assert(slash_idx <= slash_pos_size); | |
if (slash_idx == slash_pos_size) { | |
slash_pos_size = (slash_pos_size << 1); | |
slash_pos = (unsigned*)realloc(slash_pos, slash_pos_size * sizeof(unsigned)); | |
assert(slash_pos != NULL); | |
} | |
slash_pos[slash_idx] = ret_idx; | |
break; | |
} | |
break; | |
default: | |
state = STATE_NONE; | |
break; | |
} | |
ret[ret_idx++] = *p; | |
/* Ensure '\0' fits into ret string */ | |
assert(ret_idx <= ret_size); | |
if (ret_idx == ret_size) { | |
ret_size = (ret_size << 1); | |
ret = (char*)realloc(ret, ret_size); | |
assert(ret != NULL); | |
} | |
} | |
#undef STATE_NONE | |
#undef STATE_SLASH | |
#undef STATE_SLASH_DOT | |
#undef STATE_SLASH_DOTDOT | |
/* Free used resources */ | |
free(slash_pos); | |
ret[ret_idx] = '\0'; | |
return ret; | |
} | |
const char* test_path[][2] = { | |
{"", ""}, | |
{"foo/./bar", "foo/bar"}, | |
{"../bar", "/bar"}, | |
{"/foo/bar", "/foo/bar"}, | |
{"/foo/bar/../baz", "/foo/baz"}, | |
{"/foo/bar/./baz/", "/foo/bar/baz/"}, | |
{"/foo/../../baz", "/baz"}, | |
{"/...///bar///here/./././/../test", "/.../bar/test"}, | |
/* Long line (> 512) with many slashes (> 128) */ | |
{"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/", | |
"/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"}, | |
{NULL, NULL} | |
}; | |
void | |
run_tests(void) | |
{ | |
for (int i = 0; test_path[i][0] != NULL; i++) { | |
char* npath = normalize(test_path[i][0]); | |
assert(npath != NULL); | |
assert(strcmp(npath, test_path[i][1]) == 0); | |
free(npath); | |
} | |
} | |
double monoTime() { | |
struct timespec time; | |
clock_gettime(CLOCK_MONOTONIC, &time); | |
return (time.tv_sec + time.tv_nsec / 1e9); | |
} | |
int main(void) { | |
char** testlines = malloc(1000000 * sizeof(char*)); | |
int i = 0; | |
char *line = NULL; | |
while(!feof(stdin)) { | |
size_t cap = 0; | |
ssize_t size = getline(&line, &cap, stdin); | |
if (size < 0) { | |
break; | |
} | |
testlines[i] = malloc(size * sizeof(char)); | |
strncpy(testlines[i], line, size - 1); | |
testlines[i][size-1] = '\0'; | |
line = NULL; | |
i++; | |
} | |
double start = monoTime(); | |
for (int k = 0; k < i; k++) { | |
normalize(testlines[k]); | |
} | |
double end = monoTime(); | |
double diff = end - start; | |
printf("%.10f\n", diff); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
./gen_test.py 1000000 > real.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import random | |
import string | |
random.seed(123456789) | |
PATH_COMP_L = (1, 32) | |
PATH_COMP_N = (6, 24) | |
PATH_COMP_BIG_N = (6, 514) | |
BIG_P = 0.2 | |
DOT_P = 0.15 | |
DOTDOT_P = 0.3 | |
DOUBLE_SLASH = 0.18 | |
s = string.ascii_lowercase + string.digits | |
def rand_string(n): | |
return ''.join(random.choice(s) for _ in range(n)) | |
def gen_comp(): | |
c = None | |
r = random.random() | |
if r < DOT_P: | |
c = "." | |
elif r < DOTDOT_P: | |
c = ".." | |
else: | |
l = random.randint(*PATH_COMP_L) | |
c = rand_string(l) | |
if random.random() < DOUBLE_SLASH: | |
return "/" + c | |
else: | |
return c | |
def gen_path(): | |
if random.random() < BIG_P: | |
n = random.randint(*PATH_COMP_BIG_N) | |
else: | |
n = random.randint(*PATH_COMP_N) | |
return '/' + '/'.join(gen_comp() for _ in range(n)) | |
def main(c): | |
for _ in range(c): | |
print(gen_path()) | |
if __name__ == '__main__': | |
main(int(sys.argv[1])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
*/ | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <string.h> /* strcmp() */ | |
#include <stdio.h> | |
#include <time.h> | |
char* | |
normalize(const char* path) | |
{ | |
size_t ret_size = strlen(path) + 1; | |
char* ret = (char*)malloc(ret_size); | |
memcpy(ret, path, ret_size); | |
assert(ret != NULL); | |
unsigned ret_idx = 0; | |
/* Alloc resources */ | |
size_t slash_pos_size = 128; | |
unsigned* slash_pos = (unsigned*)malloc(slash_pos_size * sizeof(unsigned)); | |
assert(slash_pos != NULL); | |
unsigned slash_idx = 0; | |
slash_pos[0] = 0; | |
#define STATE_NONE 0 | |
#define STATE_SLASH 1 | |
#define STATE_SLASH_DOT 2 | |
#define STATE_SLASH_DOTDOT 3 | |
int state = STATE_SLASH; | |
for (const char* p = &ret[0]; *p != '\0'; p++) { | |
switch (*p) { | |
case '.': | |
switch (state) { | |
case STATE_SLASH: | |
state = STATE_SLASH_DOT; | |
break; | |
case STATE_SLASH_DOT: | |
state = STATE_SLASH_DOTDOT; | |
break; | |
case STATE_SLASH_DOTDOT: | |
/* NOTE: '...' is perfect name for the directory */ | |
state = STATE_NONE; | |
break; | |
} | |
break; | |
case '/': | |
switch (state) { | |
case STATE_SLASH_DOTDOT: | |
/* Reduce "/../" by removing last path part */ | |
if (slash_idx > 0) | |
slash_idx--; | |
/* FALLTHROUGHT */ | |
case STATE_SLASH: | |
/* Reduce "//" to "/" */ | |
/* FALLTHROUGHT */ | |
case STATE_SLASH_DOT: | |
/* Reduce "/./" to "/" */ | |
state = STATE_SLASH; | |
ret_idx = slash_pos[slash_idx]; | |
break; | |
default: | |
state = STATE_SLASH; | |
slash_idx++; | |
assert(slash_idx <= slash_pos_size); | |
if (slash_idx == slash_pos_size) { | |
slash_pos_size = (slash_pos_size << 1); | |
slash_pos = (unsigned*)realloc(slash_pos, slash_pos_size * sizeof(unsigned)); | |
assert(slash_pos != NULL); | |
} | |
slash_pos[slash_idx] = ret_idx; | |
break; | |
} | |
break; | |
default: | |
state = STATE_NONE; | |
break; | |
} | |
ret[ret_idx++] = *p; | |
} | |
#undef STATE_NONE | |
#undef STATE_SLASH | |
#undef STATE_SLASH_DOT | |
#undef STATE_SLASH_DOTDOT | |
/* Free used resources */ | |
free(slash_pos); | |
ret[ret_idx] = '\0'; | |
return ret; | |
} | |
const char* test_path[][2] = { | |
{"", ""}, | |
{"foo/./bar", "foo/bar"}, | |
{"../bar", "/bar"}, | |
{"/foo/bar", "/foo/bar"}, | |
{"/foo/bar/../baz", "/foo/baz"}, | |
{"/foo/bar/./baz/", "/foo/bar/baz/"}, | |
{"/foo/../../baz", "/baz"}, | |
{"/...///bar///here/./././/../test", "/.../bar/test"}, | |
/* Long line (> 512) with many slashes (> 128) */ | |
{"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/", | |
"/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"}, | |
{NULL, NULL} | |
}; | |
void | |
run_tests(void) | |
{ | |
for (int i = 0; test_path[i][0] != NULL; i++) { | |
char* npath = normalize(test_path[i][0]); | |
assert(npath != NULL); | |
assert(strcmp(npath, test_path[i][1]) == 0); | |
free(npath); | |
} | |
} | |
double monoTime() { | |
struct timespec time; | |
clock_gettime(CLOCK_MONOTONIC, &time); | |
return (time.tv_sec + time.tv_nsec / 1e9); | |
} | |
int main(void) { | |
char** testlines = malloc(1000000 * sizeof(char*)); | |
int i = 0; | |
char *line = NULL; | |
while(!feof(stdin)) { | |
size_t cap = 0; | |
ssize_t size = getline(&line, &cap, stdin); | |
if (size < 0) { | |
break; | |
} | |
testlines[i] = malloc(size * sizeof(char)); | |
strncpy(testlines[i], line, size - 1); | |
testlines[i][size-1] = '\0'; | |
line = NULL; | |
i++; | |
} | |
double start = monoTime(); | |
for (int k = 0; k < i; k++) { | |
normalize(testlines[k]); | |
} | |
double end = monoTime(); | |
double diff = end - start; | |
printf("%.10f\n", diff); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <iostream> | |
#include <vector> | |
#include <chrono> | |
enum ParsingState {none, slash, slash_dot, slash_dotdot}; | |
std::string normalize(const std::string &path) { | |
std::string result{path}; | |
std::vector<int> slashes {0}; | |
slashes.reserve(path.length() / 4); | |
unsigned slash_idx = 0; | |
unsigned ret_idx = 0; | |
ParsingState state = slash; | |
slashes.push_back(0); | |
for(const auto& c : result) { | |
switch (c) { | |
case '.': | |
switch (state) { | |
case slash: | |
state = slash_dot; | |
break; | |
case slash_dot: | |
state = slash_dotdot; | |
break; | |
case slash_dotdot: | |
state = none; | |
break; | |
default: | |
break; | |
} | |
break; | |
case '/': | |
switch (state) { | |
case slash_dotdot: | |
if (slash_idx > 0) | |
slash_idx--; | |
case slash: | |
case slash_dot: | |
state = slash; | |
ret_idx = slashes[slash_idx]; | |
break; | |
default: | |
state = slash; | |
slash_idx++; | |
if (slash_idx >= slashes.size()) | |
slashes.push_back(ret_idx); | |
else | |
slashes[slash_idx] = ret_idx; | |
break; | |
} | |
break; | |
default: | |
state = none; | |
break; | |
} | |
result[ret_idx++] = c; | |
} | |
result.resize(ret_idx); | |
return result; | |
} | |
double monoTime() { | |
timespec time; | |
clock_gettime(CLOCK_MONOTONIC, &time); | |
return (time.tv_sec + time.tv_nsec / 1e9); | |
} | |
int main(void) { | |
// const vector<pair<string, string>> tests = { | |
// {"", ""}, | |
// {"foo/./bar", "foo/bar"}, | |
// {"../bar", "/bar"}, | |
// {"/foo/bar", "/foo/bar"}, | |
// {"/foo/bar/../baz", "/foo/baz"}, | |
// {"/foo/bar/./baz/", "/foo/bar/baz/"}, | |
// {"/foo/../../baz", "/baz"}, | |
// {"/...///bar///here/./././/../test", "/.../bar/test"}, | |
// /* Long line (> 512) with many slashes (> 128) */ | |
// {"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/", | |
// "/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"}, | |
// }; | |
// for (const auto& p : tests) { | |
// string r = normalize(p.first); | |
// if (p.second != r) { | |
// cout << "Test failed for: " << p.first << ";\nexpected: " << p.second << "\ngot:" << r << endl; | |
// } | |
// } | |
// cout << normalize("/./..//.//gszk3/83/t/./2wl"); | |
std::vector<std::string> testlines; | |
testlines.reserve(65536); | |
int i = 0; | |
char *line = NULL; | |
while(!feof(stdin)) { | |
size_t cap = 0; | |
ssize_t size = getline(&line, &cap, stdin); | |
if (size < 0) { | |
break; | |
} | |
std::string st(line); | |
testlines.push_back(st); | |
line = NULL; | |
i++; | |
} | |
auto start = monoTime(); | |
for (auto& s : testlines) { | |
normalize(s); | |
} | |
auto end = monoTime(); | |
auto diff = end - start; | |
std::cout.precision(10); | |
std::cout << diff << std::endl; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate time; | |
enum State { | |
Nothing, | |
Slash, | |
SlashDot, | |
SlashDotDot | |
} | |
fn normalize(path: &str) -> String { | |
unsafe { | |
let l = path.len() / 4 ; | |
let mut slashes:Vec<usize> = Vec::with_capacity(l); | |
slashes.set_len(l); | |
let mut s = String::from(path); | |
let mut ret_idx = 0usize; | |
let mut slash_idx = 0; | |
let mut state = State::Slash; | |
slashes[0] = 0; | |
{ | |
let mut bs = s.as_mut_vec(); | |
for c in path.bytes() { | |
match c as char { | |
'.' => match state { | |
State::Slash => state = State::SlashDot, | |
State::SlashDot => state = State::SlashDotDot, | |
State::SlashDotDot => state = State::Nothing, | |
_ => {} | |
}, | |
'/' => { | |
match state { | |
State::SlashDotDot => { | |
if slash_idx > 0 { | |
slash_idx -= 1; | |
}; | |
ret_idx = slashes[slash_idx]; | |
}, | |
State::Slash => { | |
ret_idx = slashes[slash_idx]; | |
}, | |
State::SlashDot => { | |
ret_idx = slashes[slash_idx]; | |
}, | |
_ => { | |
slash_idx += 1; | |
if slash_idx == slashes.len() { | |
slashes.reserve(slash_idx); | |
slashes.set_len(2 * slash_idx); | |
} | |
slashes[slash_idx] = ret_idx | |
} | |
}; | |
state = State::Slash; | |
}, | |
_ => state = State::Nothing | |
}; | |
bs[ret_idx] = c; | |
ret_idx += 1; | |
}; | |
} | |
s.truncate(ret_idx); | |
s | |
} | |
} | |
use std::io; | |
use std::io::BufReader; | |
use std::io::BufRead; | |
fn main() { | |
// let tests = vec![ | |
// ("foo/./bar", "foo/bar"), | |
// ("../bar", "/bar"), | |
// ("/foo/bar", "/foo/bar"), | |
// ("/foo/bar/../baz", "/foo/baz"), | |
// ("/foo/bar/./baz/", "/foo/bar/baz/"), | |
// ("/foo/../../baz", "/baz"), | |
// ("/...///bar///here/./././/../test", "/.../bar/test"), | |
// ("/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/", | |
// "/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/") | |
// ]; | |
// for (i, o) in tests { | |
// let r = normalize(i); | |
// if o != r { | |
// println!("Invalid: {}; wants: {}; got: {}", i, o, r); | |
// } | |
// } | |
let mut testlines = Vec::with_capacity(65536); | |
let stdin = BufReader::new(io::stdin()); | |
for line in stdin.lines() { | |
testlines.push(line.unwrap()); | |
} | |
let start = time::precise_time_s(); | |
for line in testlines { | |
let _ = normalize(&line); | |
} | |
let finish = time::precise_time_s(); | |
let diff = finish - start; | |
println!("{:.10}", diff); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
./all.sh > res.txt | |
cat res.txt | sort -n -k2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rust-jemalloc 1.9398220181 | |
rust 1.9639518170 | |
c-strlen-jemalloc 2.2102649999 | |
c-noall-jemalloc 2.2220930001 | |
c-noall 2.2386530000 | |
c-strlen 2.2561820000 | |
cpp-jemalloc 2.336584 | |
cpp 2.357312 | |
c-preall 2.7188820001 | |
c-preall-jemalloc 2.7222179999 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Running tests: | |
* | |
* $ cat << EOF > fptest.c | |
* #include "fixpath.c" | |
* int | |
* main(void) | |
* { | |
* run_tests(); | |
* return 0; | |
* } | |
* EOF | |
* | |
* $ gcc -O2 fptest.c -o fptest | |
* $ ./fptest | |
*/ | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <string.h> /* strcmp() */ | |
#include <stdio.h> | |
#include <time.h> | |
char* | |
normalize(const char* path) | |
{ | |
size_t ret_size = strlen(path) + 1; | |
char* ret = (char*)malloc(ret_size); | |
memcpy(ret, path, ret_size); | |
assert(ret != NULL); | |
unsigned ret_idx = 0; | |
#define STATE_NONE 0 | |
#define STATE_SLASH 1 | |
#define STATE_SLASH_DOT 2 | |
#define STATE_SLASH_DOTDOT 3 | |
int state = STATE_SLASH; | |
unsigned num_skip = 0; | |
for (const char* p = &path[0]; *p != '\0'; p++) { | |
switch (*p) { | |
case '.': | |
switch (state) { | |
case STATE_SLASH: | |
state = STATE_SLASH_DOT; | |
break; | |
case STATE_SLASH_DOT: | |
state = STATE_SLASH_DOTDOT; | |
break; | |
case STATE_SLASH_DOTDOT: | |
/* NOTE: '...' is perfect name for the directory */ | |
state = STATE_NONE; | |
break; | |
} | |
break; | |
case '/': | |
switch (state) { | |
case STATE_SLASH_DOTDOT: | |
/* Reduce "/../" by removing last path part */ | |
num_skip = 0; | |
for (const char* for_slash = &ret[ret_idx]-1; num_skip < 2 && ret_idx > 0 ; for_slash--){ | |
if (*for_slash == '/') | |
num_skip++; | |
ret_idx--; | |
} | |
state = STATE_SLASH; | |
break; | |
case STATE_SLASH: | |
/* Reduce "//" to "/" */ | |
if (ret_idx > 0) { | |
ret_idx--; | |
} | |
break; | |
case STATE_SLASH_DOT: | |
/* Reduce "/./" to "/" */ | |
state = STATE_SLASH; | |
if (ret_idx > 0) | |
ret_idx--; | |
if (ret_idx > 0) | |
ret_idx--; | |
break; | |
default: | |
state = STATE_SLASH; | |
break; | |
} | |
break; | |
default: | |
state = STATE_NONE; | |
break; | |
} | |
ret[ret_idx++] = *p; | |
} | |
#undef STATE_NONE | |
#undef STATE_SLASH | |
#undef STATE_SLASH_DOT | |
#undef STATE_SLASH_DOTDOT | |
ret[ret_idx] = '\0'; | |
return ret; | |
} | |
const char* test_path[][2] = { | |
{"", ""}, | |
{"foo/./bar", "foo/bar"}, | |
{"../bar", "/bar"}, | |
{"/foo/bar", "/foo/bar"}, | |
{"/foo/bar/../baz", "/foo/baz"}, | |
{"/foo/bar/./baz/", "/foo/bar/baz/"}, | |
{"/foo/../../baz", "/baz"}, | |
{"/...///bar///here/./././/../test", "/.../bar/test"}, | |
/* Long line (> 512) with many slashes (> 128) */ | |
{"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/", | |
"/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"}, | |
{NULL, NULL} | |
}; | |
void | |
run_tests(void) | |
{ | |
for (int i = 0; test_path[i][0] != NULL; i++) { | |
char* npath = normalize(test_path[i][0]); | |
assert(npath != NULL); | |
assert(strcmp(npath, test_path[i][1]) == 0); | |
free(npath); | |
} | |
} | |
double monoTime() { | |
struct timespec time; | |
clock_gettime(CLOCK_MONOTONIC, &time); | |
return (time.tv_sec + time.tv_nsec / 1e9); | |
} | |
int main(void) { | |
/* run_tests(); */ | |
char** testlines = malloc(1000000 * sizeof(char*)); | |
int i = 0; | |
char *line = NULL; | |
while(!feof(stdin)) { | |
size_t cap = 0; | |
ssize_t size = getline(&line, &cap, stdin); | |
if (size < 0) { | |
break; | |
} | |
testlines[i] = malloc(size * sizeof(char)); | |
strncpy(testlines[i], line, size - 1); | |
testlines[i][size-1] = '\0'; | |
line = NULL; | |
i++; | |
} | |
double start = monoTime(); | |
for (int k = 0; k < i; k++) { | |
normalize(testlines[k]); | |
} | |
double end = monoTime(); | |
double diff = end - start; | |
printf("%.10f\n", diff); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment