Skip to content

Instantly share code, notes, and snippets.

@denilsonsa
Last active February 27, 2024 07:59
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denilsonsa/9b433ee8fe548d0388fec4de19e7d8d6 to your computer and use it in GitHub Desktop.
Save denilsonsa/9b433ee8fe548d0388fec4de19e7d8d6 to your computer and use it in GitHub Desktop.
How to normalize a path in Perl? (without checking the filesystem)
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
# https://stackoverflow.com/questions/45631519/how-to-normalize-a-path-in-perl-without-checking-the-filesystem
my %solutions = (
denilson_answer => sub {
# This is the only solution that passes all the tests listed at the end of this file.
# https://stackoverflow.com/a/45635706
require File::Spec;
my $path = shift;
# canonpath does string manipulation, but does not remove "..".
my $ret = File::Spec->canonpath($path);
# Let's remove ".." by using a regex.
while ($ret =~ s{
(^|/) # Either the beginning of the string, or a slash, save as $1
( # Followed by one of these:
[^/]| # * Any one character (except slash, obviously)
[^./][^/]| # * Two characters where
[^/][^./]| # they are not ".."
[^/][^/][^/]+ # * Three or more characters
) # Followed by:
/\.\./ # "/", followed by "../"
}{$1}x
) {
# Repeat this substitution until not possible anymore.
}
# Re-adding the trailing slash, if needed.
if ($path =~ m!/$! && $ret !~ m!/$!) {
$ret .= '/';
}
return $ret;
},
file_spec_canonpath => sub {
require File::Spec;
my $path = shift;
my $ret = File::Spec->canonpath($path);
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; }
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; }
return $ret;
},
matt_answer_path_tiny => sub {
# https://stackoverflow.com/a/45640854
require Path::Tiny;
my $path = shift;
if ($path eq '') {
# Path::Tiny paths require defined, positive-length parts
return $path;
}
my $ret = '' . Path::Tiny::path($path);
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; }
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; }
return $ret;
},
tom_answer => sub {
# https://stackoverflow.com/a/45631808
my $path = shift;
my @c= reverse split m@/@, $path;
my @c_new;
while (@c) {
my $component= shift @c;
next unless length($component);
if ($component eq ".") { next; }
if ($component eq "..") { shift @c; next }
push @c_new, $component;
}
my $ret = join("/", reverse @c_new);
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; }
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; }
return $ret;
},
georg_answer => sub {
# https://stackoverflow.com/a/70081898
my $path = shift;
my $absolute = $path =~ m!^/!;
my @c = reverse split m@/@, $path;
my @c_new;
while (@c) {
my $component= shift @c;
next unless length($component);
if ($component eq ".") { next; }
if ($component eq "..") {
my $i=0;
while ($c[$i] && $c[$i] =~ m/^\.{1,2}$/) {
$i++
}
if ($i > $#c) {
push @c_new, $component unless $absolute;
} else {
splice(@c, $i, 1);
}
next
}
push @c_new, $component;
}
my $ret = join("/", reverse @c_new);
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; }
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; }
return $ret;
},
j_l_answer => sub {
# https://stackoverflow.com/a/76568741/124946
require File::Spec::Win32;
my $path = shift;
$path =~ s[\\][\0]g; # Converts backslashes to null bytes.
$path = File::Spec::Win32->canonpath($path);
$path =~ s[\\][/]g; # Converts \ to / characters.
$path =~ s[\0][\\]g; # Converts null bytes back to backslashes.
# $path is now set to: /b/c/d
return $path;
},
j_l_answer_modified => sub {
# https://stackoverflow.com/a/76568741/124946
require File::Spec::Win32;
my $path = shift;
my $ret = $path;
$ret =~ s[\\][\0]g; # Converts backslashes to null bytes.
$ret = File::Spec::Win32->canonpath($ret);
$ret =~ s[\\][/]g; # Converts \ to / characters.
$ret =~ s[\0][\\]g; # Converts null bytes back to backslashes.
# Re-adding the trailing slash, if needed.
if ($path =~ m!/$! && $ret !~ m!/$!) {
$ret .= '/';
}
return $ret;
},
);
# Which one you want to test?
my $func;
my @options = sort keys %solutions;
my $selected_index;
if (defined $ARGV[0] && $options[$ARGV[0]]) {
$selected_index = 0 + $ARGV[0];
}
if (!defined $selected_index) {
while (1) {
print "Which solution do you want to test?\n";
for my $i (0..$#options) {
print " $i $options[$i]\n";
}
my $input = <STDIN>;
my $number = 0 + $input;
if ($options[$number]) {
$selected_index = $number;
last;
}
}
}
print "Testing $options[$selected_index]\n";
$func = $solutions{$options[$selected_index]};
# To quickly format these lines in vim: :'<,'>Tabularize /), \|,/l0
is( $func->('' ), '' ,'empty');
is( $func->('/' ), '/' ,'root');
is( $func->('foo' ), 'foo' ,'simple');
is( $func->('foo/' ), 'foo/' ,'simple dir');
is( $func->('/foo' ), '/foo' ,'root simple');
is( $func->('/foo/'), '/foo/','root dir');
is( $func->('foo/bar' ), 'foo/bar' ,'two components');
is( $func->('foo/bar/' ), 'foo/bar/' ,'two components');
is( $func->('/foo/bar' ), '/foo/bar' ,'two components');
is( $func->('/foo/bar/'), '/foo/bar/','two components');
is( $func->('foo//bar' ), 'foo/bar' ,'double slash');
is( $func->('foo//bar/' ), 'foo/bar/' ,'double slash');
is( $func->('/foo//bar' ), '/foo/bar' ,'double slash');
is( $func->('/foo//bar/' ), '/foo/bar/','double slash');
is( $func->('foo///bar' ), 'foo/bar' ,'triple slash');
is( $func->('foo///bar/' ), 'foo/bar/' ,'triple slash');
is( $func->('/foo///bar' ), '/foo/bar' ,'triple slash');
is( $func->('/foo///bar/' ), '/foo/bar/','triple slash');
is( $func->('foo////bar' ), 'foo/bar' ,'four slashes');
is( $func->('foo////bar/' ), 'foo/bar/' ,'four slashes');
is( $func->('/foo////bar' ), '/foo/bar' ,'four slashes');
is( $func->('/foo////bar/'), '/foo/bar/','four slashes');
is( $func->('./foo' ), 'foo' ,'leading dot');
is( $func->('./foo/' ), 'foo/' ,'leading dot');
is( $func->('/./foo' ), '/foo' ,'leading dot');
is( $func->('/./foo/'), '/foo/','leading dot');
is( $func->('foo/./bar' ), 'foo/bar' ,'dot');
is( $func->('foo/./bar/' ), 'foo/bar/' ,'dot');
is( $func->('/foo/./bar' ), '/foo/bar' ,'dot');
is( $func->('/foo/./bar/' ), '/foo/bar/','dot');
is( $func->('foo/././bar' ), 'foo/bar' ,'dot dot');
is( $func->('foo/././bar/' ), 'foo/bar/' ,'dot dot');
is( $func->('/foo/././bar' ), '/foo/bar' ,'dot dot');
is( $func->('/foo/././bar/' ), '/foo/bar/','dot dot');
is( $func->('foo/./././bar' ), 'foo/bar' ,'dot dot dot');
is( $func->('foo/./././bar/' ), 'foo/bar/' ,'dot dot dot');
is( $func->('/foo/./././bar' ), '/foo/bar' ,'dot dot dot');
is( $func->('/foo/./././bar/'), '/foo/bar/','dot dot dot');
is( $func->('a/b/c/d' ), 'a/b/c/d' ,'four components');
is( $func->('a/b/c/d/' ), 'a/b/c/d/' ,'four components');
is( $func->('/a/b/c/d' ), '/a/b/c/d' ,'four components');
is( $func->('/a/b/c/d/'), '/a/b/c/d/','four components');
is( $func->('a/b/../c/d' ), 'a/c/d' ,'parent');
is( $func->('a/b/../c/d/' ), 'a/c/d/' ,'parent');
is( $func->('/a/b/../c/d' ), '/a/c/d' ,'parent');
is( $func->('/a/b/../c/d/'), '/a/c/d/','parent');
is( $func->('a/b/../../c/d' ), 'c/d' ,'parent parent');
is( $func->('a/b/../../c/d/' ), 'c/d/' ,'parent parent');
is( $func->('/a/b/../../c/d' ), '/c/d' ,'parent parent');
is( $func->('/a/b/../../c/d/'), '/c/d/','parent parent');
is( $func->('a/../b/../c/../d' ), 'd' ,'several parents');
is( $func->('a/../b/../c/../d/' ), 'd/' ,'several parents');
is( $func->('/a/../b/../c/../d' ), '/d' ,'several parents');
is( $func->('/a/../b/../c/../d/'), '/d/','several parents');
is( $func->('/../a/b' ), '/a/b' ,'root parent');
is( $func->('/../a/b/' ), '/a/b/','root parent');
is( $func->('/../../a/b' ), '/a/b' ,'root parent parent');
is( $func->('/../../a/b/'), '/a/b/','root parent parent');
is( $func->('../foo' ), '../foo' ,'leading parent');
is( $func->('../foo/' ), '../foo/' ,'leading parent');
is( $func->('../../foo' ), '../../foo' ,'leading parent parent');
is( $func->('../../foo/' ), '../../foo/','leading parent parent');
is( $func->('a/../../foo' ), '../foo' ,'too many parents');
is( $func->('a/../../foo/'), '../foo/' ,'too many parents');
done_testing();
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment