Last active
February 27, 2024 07:59
-
-
Save denilsonsa/9b433ee8fe548d0388fec4de19e7d8d6 to your computer and use it in GitHub Desktop.
How to normalize a path in Perl? (without checking the filesystem)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Test::More; | |
# https://stackoverflow.com/questions/45631519/how-to-normalize-a-path-in-perl-without-checking-the-filesystem | |
my %solutions = ( | |
denilson_answer => sub { | |
# This is the only solution that passes all the tests listed at the end of this file. | |
# https://stackoverflow.com/a/45635706 | |
require File::Spec; | |
my $path = shift; | |
# canonpath does string manipulation, but does not remove "..". | |
my $ret = File::Spec->canonpath($path); | |
# Let's remove ".." by using a regex. | |
while ($ret =~ s{ | |
(^|/) # Either the beginning of the string, or a slash, save as $1 | |
( # Followed by one of these: | |
[^/]| # * Any one character (except slash, obviously) | |
[^./][^/]| # * Two characters where | |
[^/][^./]| # they are not ".." | |
[^/][^/][^/]+ # * Three or more characters | |
) # Followed by: | |
/\.\./ # "/", followed by "../" | |
}{$1}x | |
) { | |
# Repeat this substitution until not possible anymore. | |
} | |
# Re-adding the trailing slash, if needed. | |
if ($path =~ m!/$! && $ret !~ m!/$!) { | |
$ret .= '/'; | |
} | |
return $ret; | |
}, | |
file_spec_canonpath => sub { | |
require File::Spec; | |
my $path = shift; | |
my $ret = File::Spec->canonpath($path); | |
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; } | |
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; } | |
return $ret; | |
}, | |
matt_answer_path_tiny => sub { | |
# https://stackoverflow.com/a/45640854 | |
require Path::Tiny; | |
my $path = shift; | |
if ($path eq '') { | |
# Path::Tiny paths require defined, positive-length parts | |
return $path; | |
} | |
my $ret = '' . Path::Tiny::path($path); | |
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; } | |
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; } | |
return $ret; | |
}, | |
tom_answer => sub { | |
# https://stackoverflow.com/a/45631808 | |
my $path = shift; | |
my @c= reverse split m@/@, $path; | |
my @c_new; | |
while (@c) { | |
my $component= shift @c; | |
next unless length($component); | |
if ($component eq ".") { next; } | |
if ($component eq "..") { shift @c; next } | |
push @c_new, $component; | |
} | |
my $ret = join("/", reverse @c_new); | |
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; } | |
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; } | |
return $ret; | |
}, | |
georg_answer => sub { | |
# https://stackoverflow.com/a/70081898 | |
my $path = shift; | |
my $absolute = $path =~ m!^/!; | |
my @c = reverse split m@/@, $path; | |
my @c_new; | |
while (@c) { | |
my $component= shift @c; | |
next unless length($component); | |
if ($component eq ".") { next; } | |
if ($component eq "..") { | |
my $i=0; | |
while ($c[$i] && $c[$i] =~ m/^\.{1,2}$/) { | |
$i++ | |
} | |
if ($i > $#c) { | |
push @c_new, $component unless $absolute; | |
} else { | |
splice(@c, $i, 1); | |
} | |
next | |
} | |
push @c_new, $component; | |
} | |
my $ret = join("/", reverse @c_new); | |
if ($path =~ m!^/! && $ret !~ m!^/!) { $ret = '/' . $ret; } | |
if ($path =~ m!/$! && $ret !~ m!/$!) { $ret .= '/'; } | |
return $ret; | |
}, | |
j_l_answer => sub { | |
# https://stackoverflow.com/a/76568741/124946 | |
require File::Spec::Win32; | |
my $path = shift; | |
$path =~ s[\\][\0]g; # Converts backslashes to null bytes. | |
$path = File::Spec::Win32->canonpath($path); | |
$path =~ s[\\][/]g; # Converts \ to / characters. | |
$path =~ s[\0][\\]g; # Converts null bytes back to backslashes. | |
# $path is now set to: /b/c/d | |
return $path; | |
}, | |
j_l_answer_modified => sub { | |
# https://stackoverflow.com/a/76568741/124946 | |
require File::Spec::Win32; | |
my $path = shift; | |
my $ret = $path; | |
$ret =~ s[\\][\0]g; # Converts backslashes to null bytes. | |
$ret = File::Spec::Win32->canonpath($ret); | |
$ret =~ s[\\][/]g; # Converts \ to / characters. | |
$ret =~ s[\0][\\]g; # Converts null bytes back to backslashes. | |
# Re-adding the trailing slash, if needed. | |
if ($path =~ m!/$! && $ret !~ m!/$!) { | |
$ret .= '/'; | |
} | |
return $ret; | |
}, | |
); | |
# Which one you want to test? | |
my $func; | |
my @options = sort keys %solutions; | |
my $selected_index; | |
if (defined $ARGV[0] && $options[$ARGV[0]]) { | |
$selected_index = 0 + $ARGV[0]; | |
} | |
if (!defined $selected_index) { | |
while (1) { | |
print "Which solution do you want to test?\n"; | |
for my $i (0..$#options) { | |
print " $i $options[$i]\n"; | |
} | |
my $input = <STDIN>; | |
my $number = 0 + $input; | |
if ($options[$number]) { | |
$selected_index = $number; | |
last; | |
} | |
} | |
} | |
print "Testing $options[$selected_index]\n"; | |
$func = $solutions{$options[$selected_index]}; | |
# To quickly format these lines in vim: :'<,'>Tabularize /), \|,/l0 | |
is( $func->('' ), '' ,'empty'); | |
is( $func->('/' ), '/' ,'root'); | |
is( $func->('foo' ), 'foo' ,'simple'); | |
is( $func->('foo/' ), 'foo/' ,'simple dir'); | |
is( $func->('/foo' ), '/foo' ,'root simple'); | |
is( $func->('/foo/'), '/foo/','root dir'); | |
is( $func->('foo/bar' ), 'foo/bar' ,'two components'); | |
is( $func->('foo/bar/' ), 'foo/bar/' ,'two components'); | |
is( $func->('/foo/bar' ), '/foo/bar' ,'two components'); | |
is( $func->('/foo/bar/'), '/foo/bar/','two components'); | |
is( $func->('foo//bar' ), 'foo/bar' ,'double slash'); | |
is( $func->('foo//bar/' ), 'foo/bar/' ,'double slash'); | |
is( $func->('/foo//bar' ), '/foo/bar' ,'double slash'); | |
is( $func->('/foo//bar/' ), '/foo/bar/','double slash'); | |
is( $func->('foo///bar' ), 'foo/bar' ,'triple slash'); | |
is( $func->('foo///bar/' ), 'foo/bar/' ,'triple slash'); | |
is( $func->('/foo///bar' ), '/foo/bar' ,'triple slash'); | |
is( $func->('/foo///bar/' ), '/foo/bar/','triple slash'); | |
is( $func->('foo////bar' ), 'foo/bar' ,'four slashes'); | |
is( $func->('foo////bar/' ), 'foo/bar/' ,'four slashes'); | |
is( $func->('/foo////bar' ), '/foo/bar' ,'four slashes'); | |
is( $func->('/foo////bar/'), '/foo/bar/','four slashes'); | |
is( $func->('./foo' ), 'foo' ,'leading dot'); | |
is( $func->('./foo/' ), 'foo/' ,'leading dot'); | |
is( $func->('/./foo' ), '/foo' ,'leading dot'); | |
is( $func->('/./foo/'), '/foo/','leading dot'); | |
is( $func->('foo/./bar' ), 'foo/bar' ,'dot'); | |
is( $func->('foo/./bar/' ), 'foo/bar/' ,'dot'); | |
is( $func->('/foo/./bar' ), '/foo/bar' ,'dot'); | |
is( $func->('/foo/./bar/' ), '/foo/bar/','dot'); | |
is( $func->('foo/././bar' ), 'foo/bar' ,'dot dot'); | |
is( $func->('foo/././bar/' ), 'foo/bar/' ,'dot dot'); | |
is( $func->('/foo/././bar' ), '/foo/bar' ,'dot dot'); | |
is( $func->('/foo/././bar/' ), '/foo/bar/','dot dot'); | |
is( $func->('foo/./././bar' ), 'foo/bar' ,'dot dot dot'); | |
is( $func->('foo/./././bar/' ), 'foo/bar/' ,'dot dot dot'); | |
is( $func->('/foo/./././bar' ), '/foo/bar' ,'dot dot dot'); | |
is( $func->('/foo/./././bar/'), '/foo/bar/','dot dot dot'); | |
is( $func->('a/b/c/d' ), 'a/b/c/d' ,'four components'); | |
is( $func->('a/b/c/d/' ), 'a/b/c/d/' ,'four components'); | |
is( $func->('/a/b/c/d' ), '/a/b/c/d' ,'four components'); | |
is( $func->('/a/b/c/d/'), '/a/b/c/d/','four components'); | |
is( $func->('a/b/../c/d' ), 'a/c/d' ,'parent'); | |
is( $func->('a/b/../c/d/' ), 'a/c/d/' ,'parent'); | |
is( $func->('/a/b/../c/d' ), '/a/c/d' ,'parent'); | |
is( $func->('/a/b/../c/d/'), '/a/c/d/','parent'); | |
is( $func->('a/b/../../c/d' ), 'c/d' ,'parent parent'); | |
is( $func->('a/b/../../c/d/' ), 'c/d/' ,'parent parent'); | |
is( $func->('/a/b/../../c/d' ), '/c/d' ,'parent parent'); | |
is( $func->('/a/b/../../c/d/'), '/c/d/','parent parent'); | |
is( $func->('a/../b/../c/../d' ), 'd' ,'several parents'); | |
is( $func->('a/../b/../c/../d/' ), 'd/' ,'several parents'); | |
is( $func->('/a/../b/../c/../d' ), '/d' ,'several parents'); | |
is( $func->('/a/../b/../c/../d/'), '/d/','several parents'); | |
is( $func->('/../a/b' ), '/a/b' ,'root parent'); | |
is( $func->('/../a/b/' ), '/a/b/','root parent'); | |
is( $func->('/../../a/b' ), '/a/b' ,'root parent parent'); | |
is( $func->('/../../a/b/'), '/a/b/','root parent parent'); | |
is( $func->('../foo' ), '../foo' ,'leading parent'); | |
is( $func->('../foo/' ), '../foo/' ,'leading parent'); | |
is( $func->('../../foo' ), '../../foo' ,'leading parent parent'); | |
is( $func->('../../foo/' ), '../../foo/','leading parent parent'); | |
is( $func->('a/../../foo' ), '../foo' ,'too many parents'); | |
is( $func->('a/../../foo/'), '../foo/' ,'too many parents'); | |
done_testing(); | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment