Skip to content

Instantly share code, notes, and snippets.

@Xliff
Last active September 29, 2019 07:53
Show Gist options
  • Save Xliff/236bca60acd97cdb74fddde2abd2ba54 to your computer and use it in GitHub Desktop.
Save Xliff/236bca60acd97cdb74fddde2abd2ba54 to your computer and use it in GitHub Desktop.
This is why I love Perl6....

This was a fun little script to write.

Note, for this to work, you curerntly need my forks of:

What it does:

  • Opens an XLXS file
  • Extracts all XML files into memory
  • Parses the sharedStrings.xml file and stores the data in an array.
  • Parses the first worksheet and stores the data in a hash, with the cell address as the key.

Less than 100 lines!

use NativeCall;
use LibZip::NativeCall;
use XML::Actions;

my $filename = "/path/to/file.xlsx";

my $err = Pointer[int32].new;
die "Could not open Zip file: { $err ?? $err.deref !! 'UNKNOWN'}" 
    unless ( my $z = zip_open($filename, 0, $err) );

my %zip-entries;
for ^zip_get_num_entries($z, 0) {
  my $sb = zip_stat.new;
  unless zip_stat_index($z, $_, 0, $sb)  {
      next unless $sb.name.ends-with('.xml');
      
      my $zf = zip_fopen_index($z, $_, 0);
      my $b = Buf.allocate($sb.size);
      zip_fread($zf, $b, $b.elems);
      %zip-entries{$sb.name} = $b;
  }
}
zip_close($z);

my @strings;
my class SharedStrings is XML::Actions::Work {
    has $!text;
    
    method t:start (Array $parent-path) {
        $!text = '';
    }
    
    method t:end (Array $parent-path) {
        @strings.push: $!text;
    }
    
    method xml:text (Array $parent-path, Str $text) {
        $!text ~= $text;
    }

}

my %cellData;
my class XLSX is XML::Actions::Work {
    has $!text;
    
    method c:start (Array $parent-path, :$r, :$s, :$t) {
        $!text = '';
    }
    
    method c:end (Array $parent-path, :$r, :$s, :$t) {
        %cellData{$r} = ($t // '') eq 's' ??
            @strings[$!text]
            !!
            $!text;
            
    }
    
    method xml:text (Array $parent-path, Str $text) {
        $!text ~= $text;
    }
    
}

my $x = XML::Actions.new( xml => %zip-entries<xl/sharedStrings.xml>.decode );
$x.process( actions => SharedStrings.new );

$x = XML::Actions.new( xml => %zip-entries<xl/worksheets/sheet1.xml>.decode );
$x.process( actions => XLSX.new() );
my regex cell { (\w+) (\d+) }
.gist.say for %cellData.pairs.sort({ 
    my $am = $^a.key ~~ &cell; 
    my $bm = $^b.key ~~ &cell; 
    $am[0] cmp $bm[0] || $am[1] <=> $bm[1]
});

This is why I love perl6.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment