nascheme/Py_normpath rewrite

## Py_normpath rewrite
/* In-place path normalisation. Returns the start of the normalized
   path, which will be within the original buffer. Guaranteed to not
   make the path longer, and will not fail. 'size' is the length of
   the path, if known. If -1, the first null character will be assumed
   to be the end of the path. */
wchar_t *
_Py_normpath(wchar_t *path, Py_ssize_t size)
{
    if (!path[0] || size == 0) {
        return path;
    }
    if (size < 0) {
        size = wcslen(path);
    }

#define CHAR(idx) ((idx >= 0 && idx < size) ? path[idx] : 0)
#ifdef ALTSEP
#define IS_SEP(idx) (CHAR(idx) == SEP || CHAR(idx) == ALTSEP)
#else
#define IS_SEP(idx) (CHAR(idx) == SEP)
#endif
#define SEP_OR_END(idx) (IS_SEP(idx) || CHAR(idx) == 0)

    Py_ssize_t i = 0; // path read index
    Py_ssize_t j = 0; // path write index

    // POSIX allows one or two initial slashes, but treats three or more as
    // single slash.
    //( see http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)
    bool initial_slashes = IS_SEP(i);
    if (initial_slashes) {
        path[j++] = path[i++];
        if (IS_SEP(i) && !IS_SEP(i+1)) {
            // double slash case, keep both of them
            path[j++] = path[i++];
        }
    }

    int num_comps = 0; // num of components in output path

    // loop through each component of path
    while (i < size) {
        if (IS_SEP(i)) {
            if (IS_SEP(j-1)) {
                // slash after another slash, drop it
                i++;
            }
            else {
                // slash after path component, add canonicalized slash
                if (j > 0) {
                    path[j++] = SEP;
                    num_comps++;
                }
                i++;
            }
            continue;
        }
        // check if component is "."
        bool dot = CHAR(i) == L'.';
        if (dot) {
            if (SEP_OR_END(i+1)) {
                // discard single dot path component
                i += 1;
                continue;
            }
        }
        // check if component is ".."
        bool remove_last = false;
        bool last_dotdot = false;
        bool dotdot = (dot && CHAR(i+1) == L'.' && SEP_OR_END(i+2));
        if (dotdot) {
            // complex case, we can't remove parent component if it is ..
            last_dotdot = CHAR(j-2) == L'.' && CHAR(j-3) == L'.' && SEP_OR_END(j-4);
            remove_last = (initial_slashes || num_comps) && (!num_comps || !last_dotdot);
        }
        if (remove_last) {
            // remove last component of path
            if (num_comps > 0) {
                j--; // remove trailing slash
                while (j > 0 && !IS_SEP(j-1)) {
                    // remove characters of last component
                    j--;
                }
                num_comps--;
            }
            // skip the ..
            i += 2;
            continue;
        }
        else {
            // component is not .. or we can't remove parent, add component
            while (true) {
                path[j++] = path[i++];
                if (SEP_OR_END(i)) {
                    break;
                }
            }
            continue;
        }
    }

    if (num_comps > 0) {
        // remove trailing slashes
        while (IS_SEP(j-1)) {
            --j;
        }
    }

    if (j < size) {
        path[j] = L'\0';
    }

#undef CHAR
#undef SEP_OR_END
#undef IS_SEP
    return path;
}
	/* In-place path normalisation. Returns the start of the normalized
	path, which will be within the original buffer. Guaranteed to not
	make the path longer, and will not fail. 'size' is the length of
	the path, if known. If -1, the first null character will be assumed
	to be the end of the path. */
	wchar_t *
	_Py_normpath(wchar_t *path, Py_ssize_t size)
	{
	if (!path[0] \|\| size == 0) {
	return path;
	}
	if (size < 0) {
	size = wcslen(path);
	}

	#define CHAR(idx) ((idx >= 0 && idx < size) ? path[idx] : 0)
	#ifdef ALTSEP
	#define IS_SEP(idx) (CHAR(idx) == SEP \|\| CHAR(idx) == ALTSEP)
	#else
	#define IS_SEP(idx) (CHAR(idx) == SEP)
	#endif
	#define SEP_OR_END(idx) (IS_SEP(idx) \|\| CHAR(idx) == 0)

	Py_ssize_t i = 0; // path read index
	Py_ssize_t j = 0; // path write index

	// POSIX allows one or two initial slashes, but treats three or more as
	// single slash.
	//( see http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)
	bool initial_slashes = IS_SEP(i);
	if (initial_slashes) {
	path[j++] = path[i++];
	if (IS_SEP(i) && !IS_SEP(i+1)) {
	// double slash case, keep both of them
	path[j++] = path[i++];
	}
	}

	int num_comps = 0; // num of components in output path

	// loop through each component of path
	while (i < size) {
	if (IS_SEP(i)) {
	if (IS_SEP(j-1)) {
	// slash after another slash, drop it
	i++;
	}
	else {
	// slash after path component, add canonicalized slash
	if (j > 0) {
	path[j++] = SEP;
	num_comps++;
	}
	i++;
	}
	continue;
	}
	// check if component is "."
	bool dot = CHAR(i) == L'.';
	if (dot) {
	if (SEP_OR_END(i+1)) {
	// discard single dot path component
	i += 1;
	continue;
	}
	}
	// check if component is ".."
	bool remove_last = false;
	bool last_dotdot = false;
	bool dotdot = (dot && CHAR(i+1) == L'.' && SEP_OR_END(i+2));
	if (dotdot) {
	// complex case, we can't remove parent component if it is ..
	last_dotdot = CHAR(j-2) == L'.' && CHAR(j-3) == L'.' && SEP_OR_END(j-4);
	remove_last = (initial_slashes \|\| num_comps) && (!num_comps \|\| !last_dotdot);
	}
	if (remove_last) {
	// remove last component of path
	if (num_comps > 0) {
	j--; // remove trailing slash
	while (j > 0 && !IS_SEP(j-1)) {
	// remove characters of last component
	j--;
	}
	num_comps--;
	}
	// skip the ..
	i += 2;
	continue;
	}
	else {
	// component is not .. or we can't remove parent, add component
	while (true) {
	path[j++] = path[i++];
	if (SEP_OR_END(i)) {
	break;
	}
	}
	continue;
	}
	}

	if (num_comps > 0) {
	// remove trailing slashes
	while (IS_SEP(j-1)) {
	--j;
	}
	}

	if (j < size) {
	path[j] = L'\0';
	}

	#undef CHAR
	#undef SEP_OR_END
	#undef IS_SEP
	return path;
	}