Skip to content

Instantly share code, notes, and snippets.

@little-arhat
Last active February 21, 2017 13:24
Show Gist options
  • Save little-arhat/fd05d60318ba741ed07d3924eb546496 to your computer and use it in GitHub Desktop.
Save little-arhat/fd05d60318ba741ed07d3924eb546496 to your computer and use it in GitHub Desktop.
normalize path
#!/usr/bin/env bash
clang -O3 lolpath.c -o lolpath
echo -n "c-strlen "
cat real.txt| ./lolpath
echo -n "c-strlen-jemalloc "
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./lolpath
clang -O3 fixpath.c -o fixpath
echo -n "c-preall "
cat real.txt| ./fixpath
echo -n "c-preall-jemalloc "
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./fixpath
clang -O3 walkpath.c -o walkpath
echo -n "c-noall "
cat real.txt| ./walkpath
echo -n "c-noall-jemalloc "
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./walkpath
clang++ -std=c++14 -fno-exceptions -O2 normpath.cpp -o normpath
echo -n "cpp "
cat real.txt| ./normpath
echo -n "cpp-jemalloc "
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./normpath
cargo build --quiet --release
echo -n "rust "
cat real.txt| ./target/release/rspath
echo -n "rust-jemalloc "
cat real.txt| LD_PRELOAD=./jemalloc/lib/libjemalloc.2.dylib ./target/release/rspath
[package]
name = "rspath"
version = "0.1.0"
[[bin]]
name = "rspath"
path = "rspath.rs"
[dependencies]
time = "*"
/*
* Running tests:
*
* $ cat << EOF > fptest.c
* #include "fixpath.c"
* int
* main(void)
* {
* run_tests();
* return 0;
* }
* EOF
*
* $ gcc -O2 fptest.c -o fptest
* $ ./fptest
*/
#include <stdlib.h>
#include <assert.h>
#include <string.h> /* strcmp() */
#include <stdio.h>
#include <time.h>
char*
normalize(const char* path)
{
size_t ret_size = 512;
char* ret = (char*)malloc(ret_size);
assert(ret != NULL);
unsigned ret_idx = 0;
/* Alloc resources */
size_t slash_pos_size = 128;
unsigned* slash_pos = (unsigned*)malloc(slash_pos_size * sizeof(unsigned));
assert(slash_pos != NULL);
unsigned slash_idx = 0;
slash_pos[0] = 0;
#define STATE_NONE 0
#define STATE_SLASH 1
#define STATE_SLASH_DOT 2
#define STATE_SLASH_DOTDOT 3
int state = STATE_SLASH;
for (const char* p = &path[0]; *p != '\0'; p++) {
switch (*p) {
case '.':
switch (state) {
case STATE_SLASH:
state = STATE_SLASH_DOT;
break;
case STATE_SLASH_DOT:
state = STATE_SLASH_DOTDOT;
break;
case STATE_SLASH_DOTDOT:
/* NOTE: '...' is perfect name for the directory */
state = STATE_NONE;
break;
}
break;
case '/':
switch (state) {
case STATE_SLASH_DOTDOT:
/* Reduce "/../" by removing last path part */
if (slash_idx > 0)
slash_idx--;
/* FALLTHROUGHT */
case STATE_SLASH:
/* Reduce "//" to "/" */
/* FALLTHROUGHT */
case STATE_SLASH_DOT:
/* Reduce "/./" to "/" */
state = STATE_SLASH;
ret_idx = slash_pos[slash_idx];
break;
default:
state = STATE_SLASH;
slash_idx++;
assert(slash_idx <= slash_pos_size);
if (slash_idx == slash_pos_size) {
slash_pos_size = (slash_pos_size << 1);
slash_pos = (unsigned*)realloc(slash_pos, slash_pos_size * sizeof(unsigned));
assert(slash_pos != NULL);
}
slash_pos[slash_idx] = ret_idx;
break;
}
break;
default:
state = STATE_NONE;
break;
}
ret[ret_idx++] = *p;
/* Ensure '\0' fits into ret string */
assert(ret_idx <= ret_size);
if (ret_idx == ret_size) {
ret_size = (ret_size << 1);
ret = (char*)realloc(ret, ret_size);
assert(ret != NULL);
}
}
#undef STATE_NONE
#undef STATE_SLASH
#undef STATE_SLASH_DOT
#undef STATE_SLASH_DOTDOT
/* Free used resources */
free(slash_pos);
ret[ret_idx] = '\0';
return ret;
}
const char* test_path[][2] = {
{"", ""},
{"foo/./bar", "foo/bar"},
{"../bar", "/bar"},
{"/foo/bar", "/foo/bar"},
{"/foo/bar/../baz", "/foo/baz"},
{"/foo/bar/./baz/", "/foo/bar/baz/"},
{"/foo/../../baz", "/baz"},
{"/...///bar///here/./././/../test", "/.../bar/test"},
/* Long line (> 512) with many slashes (> 128) */
{"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/",
"/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"},
{NULL, NULL}
};
void
run_tests(void)
{
for (int i = 0; test_path[i][0] != NULL; i++) {
char* npath = normalize(test_path[i][0]);
assert(npath != NULL);
assert(strcmp(npath, test_path[i][1]) == 0);
free(npath);
}
}
double monoTime() {
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return (time.tv_sec + time.tv_nsec / 1e9);
}
int main(void) {
char** testlines = malloc(1000000 * sizeof(char*));
int i = 0;
char *line = NULL;
while(!feof(stdin)) {
size_t cap = 0;
ssize_t size = getline(&line, &cap, stdin);
if (size < 0) {
break;
}
testlines[i] = malloc(size * sizeof(char));
strncpy(testlines[i], line, size - 1);
testlines[i][size-1] = '\0';
line = NULL;
i++;
}
double start = monoTime();
for (int k = 0; k < i; k++) {
normalize(testlines[k]);
}
double end = monoTime();
double diff = end - start;
printf("%.10f\n", diff);
return 0;
}
#!/usr/bin/env bash
./gen_test.py 1000000 > real.txt
#!/usr/bin/env python3
import sys
import random
import string
random.seed(123456789)
PATH_COMP_L = (1, 32)
PATH_COMP_N = (6, 24)
PATH_COMP_BIG_N = (6, 514)
BIG_P = 0.2
DOT_P = 0.15
DOTDOT_P = 0.3
DOUBLE_SLASH = 0.18
s = string.ascii_lowercase + string.digits
def rand_string(n):
return ''.join(random.choice(s) for _ in range(n))
def gen_comp():
c = None
r = random.random()
if r < DOT_P:
c = "."
elif r < DOTDOT_P:
c = ".."
else:
l = random.randint(*PATH_COMP_L)
c = rand_string(l)
if random.random() < DOUBLE_SLASH:
return "/" + c
else:
return c
def gen_path():
if random.random() < BIG_P:
n = random.randint(*PATH_COMP_BIG_N)
else:
n = random.randint(*PATH_COMP_N)
return '/' + '/'.join(gen_comp() for _ in range(n))
def main(c):
for _ in range(c):
print(gen_path())
if __name__ == '__main__':
main(int(sys.argv[1]))
/*
*/
#include <stdlib.h>
#include <assert.h>
#include <string.h> /* strcmp() */
#include <stdio.h>
#include <time.h>
char*
normalize(const char* path)
{
size_t ret_size = strlen(path) + 1;
char* ret = (char*)malloc(ret_size);
memcpy(ret, path, ret_size);
assert(ret != NULL);
unsigned ret_idx = 0;
/* Alloc resources */
size_t slash_pos_size = 128;
unsigned* slash_pos = (unsigned*)malloc(slash_pos_size * sizeof(unsigned));
assert(slash_pos != NULL);
unsigned slash_idx = 0;
slash_pos[0] = 0;
#define STATE_NONE 0
#define STATE_SLASH 1
#define STATE_SLASH_DOT 2
#define STATE_SLASH_DOTDOT 3
int state = STATE_SLASH;
for (const char* p = &ret[0]; *p != '\0'; p++) {
switch (*p) {
case '.':
switch (state) {
case STATE_SLASH:
state = STATE_SLASH_DOT;
break;
case STATE_SLASH_DOT:
state = STATE_SLASH_DOTDOT;
break;
case STATE_SLASH_DOTDOT:
/* NOTE: '...' is perfect name for the directory */
state = STATE_NONE;
break;
}
break;
case '/':
switch (state) {
case STATE_SLASH_DOTDOT:
/* Reduce "/../" by removing last path part */
if (slash_idx > 0)
slash_idx--;
/* FALLTHROUGHT */
case STATE_SLASH:
/* Reduce "//" to "/" */
/* FALLTHROUGHT */
case STATE_SLASH_DOT:
/* Reduce "/./" to "/" */
state = STATE_SLASH;
ret_idx = slash_pos[slash_idx];
break;
default:
state = STATE_SLASH;
slash_idx++;
assert(slash_idx <= slash_pos_size);
if (slash_idx == slash_pos_size) {
slash_pos_size = (slash_pos_size << 1);
slash_pos = (unsigned*)realloc(slash_pos, slash_pos_size * sizeof(unsigned));
assert(slash_pos != NULL);
}
slash_pos[slash_idx] = ret_idx;
break;
}
break;
default:
state = STATE_NONE;
break;
}
ret[ret_idx++] = *p;
}
#undef STATE_NONE
#undef STATE_SLASH
#undef STATE_SLASH_DOT
#undef STATE_SLASH_DOTDOT
/* Free used resources */
free(slash_pos);
ret[ret_idx] = '\0';
return ret;
}
const char* test_path[][2] = {
{"", ""},
{"foo/./bar", "foo/bar"},
{"../bar", "/bar"},
{"/foo/bar", "/foo/bar"},
{"/foo/bar/../baz", "/foo/baz"},
{"/foo/bar/./baz/", "/foo/bar/baz/"},
{"/foo/../../baz", "/baz"},
{"/...///bar///here/./././/../test", "/.../bar/test"},
/* Long line (> 512) with many slashes (> 128) */
{"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/",
"/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"},
{NULL, NULL}
};
void
run_tests(void)
{
for (int i = 0; test_path[i][0] != NULL; i++) {
char* npath = normalize(test_path[i][0]);
assert(npath != NULL);
assert(strcmp(npath, test_path[i][1]) == 0);
free(npath);
}
}
double monoTime() {
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return (time.tv_sec + time.tv_nsec / 1e9);
}
int main(void) {
char** testlines = malloc(1000000 * sizeof(char*));
int i = 0;
char *line = NULL;
while(!feof(stdin)) {
size_t cap = 0;
ssize_t size = getline(&line, &cap, stdin);
if (size < 0) {
break;
}
testlines[i] = malloc(size * sizeof(char));
strncpy(testlines[i], line, size - 1);
testlines[i][size-1] = '\0';
line = NULL;
i++;
}
double start = monoTime();
for (int k = 0; k < i; k++) {
normalize(testlines[k]);
}
double end = monoTime();
double diff = end - start;
printf("%.10f\n", diff);
return 0;
}
#include <string>
#include <iostream>
#include <vector>
#include <chrono>
enum ParsingState {none, slash, slash_dot, slash_dotdot};
std::string normalize(const std::string &path) {
std::string result{path};
std::vector<int> slashes {0};
slashes.reserve(path.length() / 4);
unsigned slash_idx = 0;
unsigned ret_idx = 0;
ParsingState state = slash;
slashes.push_back(0);
for(const auto& c : result) {
switch (c) {
case '.':
switch (state) {
case slash:
state = slash_dot;
break;
case slash_dot:
state = slash_dotdot;
break;
case slash_dotdot:
state = none;
break;
default:
break;
}
break;
case '/':
switch (state) {
case slash_dotdot:
if (slash_idx > 0)
slash_idx--;
case slash:
case slash_dot:
state = slash;
ret_idx = slashes[slash_idx];
break;
default:
state = slash;
slash_idx++;
if (slash_idx >= slashes.size())
slashes.push_back(ret_idx);
else
slashes[slash_idx] = ret_idx;
break;
}
break;
default:
state = none;
break;
}
result[ret_idx++] = c;
}
result.resize(ret_idx);
return result;
}
double monoTime() {
timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return (time.tv_sec + time.tv_nsec / 1e9);
}
int main(void) {
// const vector<pair<string, string>> tests = {
// {"", ""},
// {"foo/./bar", "foo/bar"},
// {"../bar", "/bar"},
// {"/foo/bar", "/foo/bar"},
// {"/foo/bar/../baz", "/foo/baz"},
// {"/foo/bar/./baz/", "/foo/bar/baz/"},
// {"/foo/../../baz", "/baz"},
// {"/...///bar///here/./././/../test", "/.../bar/test"},
// /* Long line (> 512) with many slashes (> 128) */
// {"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/",
// "/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"},
// };
// for (const auto& p : tests) {
// string r = normalize(p.first);
// if (p.second != r) {
// cout << "Test failed for: " << p.first << ";\nexpected: " << p.second << "\ngot:" << r << endl;
// }
// }
// cout << normalize("/./..//.//gszk3/83/t/./2wl");
std::vector<std::string> testlines;
testlines.reserve(65536);
int i = 0;
char *line = NULL;
while(!feof(stdin)) {
size_t cap = 0;
ssize_t size = getline(&line, &cap, stdin);
if (size < 0) {
break;
}
std::string st(line);
testlines.push_back(st);
line = NULL;
i++;
}
auto start = monoTime();
for (auto& s : testlines) {
normalize(s);
}
auto end = monoTime();
auto diff = end - start;
std::cout.precision(10);
std::cout << diff << std::endl;
return 0;
}
extern crate time;
enum State {
Nothing,
Slash,
SlashDot,
SlashDotDot
}
fn normalize(path: &str) -> String {
unsafe {
let l = path.len() / 4 ;
let mut slashes:Vec<usize> = Vec::with_capacity(l);
slashes.set_len(l);
let mut s = String::from(path);
let mut ret_idx = 0usize;
let mut slash_idx = 0;
let mut state = State::Slash;
slashes[0] = 0;
{
let mut bs = s.as_mut_vec();
for c in path.bytes() {
match c as char {
'.' => match state {
State::Slash => state = State::SlashDot,
State::SlashDot => state = State::SlashDotDot,
State::SlashDotDot => state = State::Nothing,
_ => {}
},
'/' => {
match state {
State::SlashDotDot => {
if slash_idx > 0 {
slash_idx -= 1;
};
ret_idx = slashes[slash_idx];
},
State::Slash => {
ret_idx = slashes[slash_idx];
},
State::SlashDot => {
ret_idx = slashes[slash_idx];
},
_ => {
slash_idx += 1;
if slash_idx == slashes.len() {
slashes.reserve(slash_idx);
slashes.set_len(2 * slash_idx);
}
slashes[slash_idx] = ret_idx
}
};
state = State::Slash;
},
_ => state = State::Nothing
};
bs[ret_idx] = c;
ret_idx += 1;
};
}
s.truncate(ret_idx);
s
}
}
use std::io;
use std::io::BufReader;
use std::io::BufRead;
fn main() {
// let tests = vec![
// ("foo/./bar", "foo/bar"),
// ("../bar", "/bar"),
// ("/foo/bar", "/foo/bar"),
// ("/foo/bar/../baz", "/foo/baz"),
// ("/foo/bar/./baz/", "/foo/bar/baz/"),
// ("/foo/../../baz", "/baz"),
// ("/...///bar///here/./././/../test", "/.../bar/test"),
// ("/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/",
// "/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/")
// ];
// for (i, o) in tests {
// let r = normalize(i);
// if o != r {
// println!("Invalid: {}; wants: {}; got: {}", i, o, r);
// }
// }
let mut testlines = Vec::with_capacity(65536);
let stdin = BufReader::new(io::stdin());
for line in stdin.lines() {
testlines.push(line.unwrap());
}
let start = time::precise_time_s();
for line in testlines {
let _ = normalize(&line);
}
let finish = time::precise_time_s();
let diff = finish - start;
println!("{:.10}", diff);
}
#!/usr/bin/env bash
./all.sh > res.txt
cat res.txt | sort -n -k2
rust-jemalloc 1.9398220181
rust 1.9639518170
c-strlen-jemalloc 2.2102649999
c-noall-jemalloc 2.2220930001
c-noall 2.2386530000
c-strlen 2.2561820000
cpp-jemalloc 2.336584
cpp 2.357312
c-preall 2.7188820001
c-preall-jemalloc 2.7222179999
/*
* Running tests:
*
* $ cat << EOF > fptest.c
* #include "fixpath.c"
* int
* main(void)
* {
* run_tests();
* return 0;
* }
* EOF
*
* $ gcc -O2 fptest.c -o fptest
* $ ./fptest
*/
#include <stdlib.h>
#include <assert.h>
#include <string.h> /* strcmp() */
#include <stdio.h>
#include <time.h>
char*
normalize(const char* path)
{
size_t ret_size = strlen(path) + 1;
char* ret = (char*)malloc(ret_size);
memcpy(ret, path, ret_size);
assert(ret != NULL);
unsigned ret_idx = 0;
#define STATE_NONE 0
#define STATE_SLASH 1
#define STATE_SLASH_DOT 2
#define STATE_SLASH_DOTDOT 3
int state = STATE_SLASH;
unsigned num_skip = 0;
for (const char* p = &path[0]; *p != '\0'; p++) {
switch (*p) {
case '.':
switch (state) {
case STATE_SLASH:
state = STATE_SLASH_DOT;
break;
case STATE_SLASH_DOT:
state = STATE_SLASH_DOTDOT;
break;
case STATE_SLASH_DOTDOT:
/* NOTE: '...' is perfect name for the directory */
state = STATE_NONE;
break;
}
break;
case '/':
switch (state) {
case STATE_SLASH_DOTDOT:
/* Reduce "/../" by removing last path part */
num_skip = 0;
for (const char* for_slash = &ret[ret_idx]-1; num_skip < 2 && ret_idx > 0 ; for_slash--){
if (*for_slash == '/')
num_skip++;
ret_idx--;
}
state = STATE_SLASH;
break;
case STATE_SLASH:
/* Reduce "//" to "/" */
if (ret_idx > 0) {
ret_idx--;
}
break;
case STATE_SLASH_DOT:
/* Reduce "/./" to "/" */
state = STATE_SLASH;
if (ret_idx > 0)
ret_idx--;
if (ret_idx > 0)
ret_idx--;
break;
default:
state = STATE_SLASH;
break;
}
break;
default:
state = STATE_NONE;
break;
}
ret[ret_idx++] = *p;
}
#undef STATE_NONE
#undef STATE_SLASH
#undef STATE_SLASH_DOT
#undef STATE_SLASH_DOTDOT
ret[ret_idx] = '\0';
return ret;
}
const char* test_path[][2] = {
{"", ""},
{"foo/./bar", "foo/bar"},
{"../bar", "/bar"},
{"/foo/bar", "/foo/bar"},
{"/foo/bar/../baz", "/foo/baz"},
{"/foo/bar/./baz/", "/foo/bar/baz/"},
{"/foo/../../baz", "/baz"},
{"/...///bar///here/./././/../test", "/.../bar/test"},
/* Long line (> 512) with many slashes (> 128) */
{"/foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb//foo/bar/test/hree/baz/../log/../../many/s/l/a/s/h///e//s/ab/abb/",
"/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/foo/bar/test/many/s/l/a/s/h/e/s/ab/abb/"},
{NULL, NULL}
};
void
run_tests(void)
{
for (int i = 0; test_path[i][0] != NULL; i++) {
char* npath = normalize(test_path[i][0]);
assert(npath != NULL);
assert(strcmp(npath, test_path[i][1]) == 0);
free(npath);
}
}
double monoTime() {
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return (time.tv_sec + time.tv_nsec / 1e9);
}
int main(void) {
/* run_tests(); */
char** testlines = malloc(1000000 * sizeof(char*));
int i = 0;
char *line = NULL;
while(!feof(stdin)) {
size_t cap = 0;
ssize_t size = getline(&line, &cap, stdin);
if (size < 0) {
break;
}
testlines[i] = malloc(size * sizeof(char));
strncpy(testlines[i], line, size - 1);
testlines[i][size-1] = '\0';
line = NULL;
i++;
}
double start = monoTime();
for (int k = 0; k < i; k++) {
normalize(testlines[k]);
}
double end = monoTime();
double diff = end - start;
printf("%.10f\n", diff);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment