Skip to content

Instantly share code, notes, and snippets.

@ugexe
Created March 17, 2023 22:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ugexe/cadebd4e3e47180f8f15e54f288c06ac to your computer and use it in GitHub Desktop.
Save ugexe/cadebd4e3e47180f8f15e54f288c06ac to your computer and use it in GitHub Desktop.
# see: https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
grammar IO::Spec::Win32::Parser {
rule TOP { ^^ $<spec>=<.device-path> || $<spec>=<.unc-path> || $<spec>=<.dos-path> $$ }
token guid-character { <[0..9 a..f A..F]> }
token guid { [<.guid-character> ** 8] '-' [<.guid-character> ** 4 '-'] ** 3 [<.guid-character> ** 12] }
token not-backslash { <-[\\]> }
token not-slash { <-[\\ /]> }
token backslash { <[\\]> }
token slash { <[\\ /]> }
token drive { <drive-letter> || <drive-volume> }
token drive-letter { <[A..Z a..z]> ':' }
token drive-volume { 'Volume{' <.guid> '}' }
# Win32 APIs normalize paths (such as changing forward slashes to backslashes) when using non-device paths.
token path-part { <.slash>+ <( <.not-slash>* }
token strict-path-part { <.backslash> <( <.not-backslash>* }
# DOS paths - example: C:\foo\bar.txt
# DOS paths are the types of paths most people are familiar with on Windows. They do have limitations
# though -- such as which characters can be used in the path name, or how many characters a given path
# part can contain -- so we can't just normalize all paths to this format.
# DOS paths can use both / and \ to separate the various parts of the path.
token dos-path { $<volume>=<.drive-letter> $<parts>=<.path-part>+ }
# UNC paths - example: \\MyServer\C:\foo\bar.txt
# Universal naming convention (UNC) paths are used to access network resources.
# UNC paths can use / for separating the path parts after the share/volume, but the share/volume itself must use \.
token unc-path-prefix { [<.backslash> ** 2] <-[.?]> <.not-backslash>+ <.backslash> <.drive-letter> }
token unc-path { $<volume>=<.unc-path-prefix> $<parts>=<.path-part>+ }
# Device paths - example: \\?\C:\foo\bar.txt
# Device paths are like UNC paths, but use the special namespace '?' or '.'. They are are particularly
# useful for enabling long path support when e.g. sending paths to libuv/MoarVM.
# Device paths can only use \ for separating the various parts of the path.
token device-path-identifier { <[.?]> }
token device-path-prefix { [<.backslash> ** 2] <.device-path-identifier> <.backslash> <.drive> }
token device-path { $<volume>=<.device-path-prefix> $<parts>=<.strict-path-part>+ }
}
class IO::Spec::Win32::Parser::Actions {
my $dir-sep = BEGIN $*SPEC.dir-sep;
method TOP($/) {
my @path-parts = make $/<spec><parts>;
my ($basename, @parts-reversed) = @path-parts.reverse.map(*.Str);
my $dirname = @parts-reversed.reverse.join($dir-sep);
make %(
original => $/<spec>.Str,
volume => $/<spec><volume>.Str,
dirname => $dirname || '',
basename => $basename || '',
)
}
}
my $parser = IO::Spec::Win32::Parser.new;
my $actions = IO::Spec::Win32::Parser::Actions.new;
say $parser.parse(q|C:\\foo\\\\bar\\baz.txt|, :$actions).made;
say $parser.parse(q|\\\\MyServer\\C:\\foo\\bar\\baz.txt|, :$actions).made;
say $parser.parse(q|\\\\?\\C:\\foo\\bar\\baz.txt|, :$actions).made;
say $parser.parse(q|\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\foo\\bar\\baz.txt|, :$actions).made;
say $parser.parse(q|\\\\?\\C:\\..\\foo\\bar\\baz.txt|, :$actions).made;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment