-
-
Save ugexe/cadebd4e3e47180f8f15e54f288c06ac to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# see: https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats | |
grammar IO::Spec::Win32::Parser { | |
rule TOP { ^^ $<spec>=<.device-path> || $<spec>=<.unc-path> || $<spec>=<.dos-path> $$ } | |
token guid-character { <[0..9 a..f A..F]> } | |
token guid { [<.guid-character> ** 8] '-' [<.guid-character> ** 4 '-'] ** 3 [<.guid-character> ** 12] } | |
token not-backslash { <-[\\]> } | |
token not-slash { <-[\\ /]> } | |
token backslash { <[\\]> } | |
token slash { <[\\ /]> } | |
token drive { <drive-letter> || <drive-volume> } | |
token drive-letter { <[A..Z a..z]> ':' } | |
token drive-volume { 'Volume{' <.guid> '}' } | |
# Win32 APIs normalize paths (such as changing forward slashes to backslashes) when using non-device paths. | |
token path-part { <.slash>+ <( <.not-slash>* } | |
token strict-path-part { <.backslash> <( <.not-backslash>* } | |
# DOS paths - example: C:\foo\bar.txt | |
# DOS paths are the types of paths most people are familiar with on Windows. They do have limitations | |
# though -- such as which characters can be used in the path name, or how many characters a given path | |
# part can contain -- so we can't just normalize all paths to this format. | |
# DOS paths can use both / and \ to separate the various parts of the path. | |
token dos-path { $<volume>=<.drive-letter> $<parts>=<.path-part>+ } | |
# UNC paths - example: \\MyServer\C:\foo\bar.txt | |
# Universal naming convention (UNC) paths are used to access network resources. | |
# UNC paths can use / for separating the path parts after the share/volume, but the share/volume itself must use \. | |
token unc-path-prefix { [<.backslash> ** 2] <-[.?]> <.not-backslash>+ <.backslash> <.drive-letter> } | |
token unc-path { $<volume>=<.unc-path-prefix> $<parts>=<.path-part>+ } | |
# Device paths - example: \\?\C:\foo\bar.txt | |
# Device paths are like UNC paths, but use the special namespace '?' or '.'. They are are particularly | |
# useful for enabling long path support when e.g. sending paths to libuv/MoarVM. | |
# Device paths can only use \ for separating the various parts of the path. | |
token device-path-identifier { <[.?]> } | |
token device-path-prefix { [<.backslash> ** 2] <.device-path-identifier> <.backslash> <.drive> } | |
token device-path { $<volume>=<.device-path-prefix> $<parts>=<.strict-path-part>+ } | |
} | |
class IO::Spec::Win32::Parser::Actions { | |
my $dir-sep = BEGIN $*SPEC.dir-sep; | |
method TOP($/) { | |
my @path-parts = make $/<spec><parts>; | |
my ($basename, @parts-reversed) = @path-parts.reverse.map(*.Str); | |
my $dirname = @parts-reversed.reverse.join($dir-sep); | |
make %( | |
original => $/<spec>.Str, | |
volume => $/<spec><volume>.Str, | |
dirname => $dirname || '', | |
basename => $basename || '', | |
) | |
} | |
} | |
my $parser = IO::Spec::Win32::Parser.new; | |
my $actions = IO::Spec::Win32::Parser::Actions.new; | |
say $parser.parse(q|C:\\foo\\\\bar\\baz.txt|, :$actions).made; | |
say $parser.parse(q|\\\\MyServer\\C:\\foo\\bar\\baz.txt|, :$actions).made; | |
say $parser.parse(q|\\\\?\\C:\\foo\\bar\\baz.txt|, :$actions).made; | |
say $parser.parse(q|\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\foo\\bar\\baz.txt|, :$actions).made; | |
say $parser.parse(q|\\\\?\\C:\\..\\foo\\bar\\baz.txt|, :$actions).made; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment