Last active
September 15, 2021 06:25
Star
You must be signed in to star a gist
self-contained version of git-xlsx-textconv.pl for use in Git under Windows
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Self-contained version of git-xlsx-textconv.pl from | |
# https://github.com/yappo/p5-git-xlsx-textconv.pl | |
# for use in Git under Windows. | |
# created by [fatpacker](https://www.perladvent.org/2012/2012-12-14.html). | |
# In contrast xls2csv from https://github.com/xevo/xls2csv cannot be made | |
# self-contained by fatpacker because of binary dependencies (such as Iconv). | |
# This chunk of stuff was generated by App::FatPacker. To find the original | |
# file's code, look for the end of this BEGIN block or the string 'FATPACK' | |
BEGIN { | |
my %fatpacked; | |
$fatpacked{"Archive/Zip.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP'; | |
package Archive::Zip; | |
use 5.006; | |
use strict; | |
use Carp (); | |
use Cwd (); | |
use IO::File (); | |
use IO::Seekable (); | |
use Compress::Raw::Zlib (); | |
use File::Spec (); | |
use File::Temp (); | |
use FileHandle (); | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
require Exporter; | |
@ISA = qw( Exporter ); | |
} | |
use vars qw( $ChunkSize $ErrorHandler ); | |
BEGIN { | |
# This is the size we'll try to read, write, and (de)compress. | |
# You could set it to something different if you had lots of memory | |
# and needed more speed. | |
$ChunkSize ||= 32768; | |
$ErrorHandler = \&Carp::carp; | |
} | |
# BEGIN block is necessary here so that other modules can use the constants. | |
use vars qw( @EXPORT_OK %EXPORT_TAGS ); | |
BEGIN { | |
@EXPORT_OK = ('computeCRC32'); | |
%EXPORT_TAGS = ( | |
CONSTANTS => [ | |
qw( | |
ZIP64_SUPPORTED | |
FA_MSDOS | |
FA_UNIX | |
GPBF_ENCRYPTED_MASK | |
GPBF_DEFLATING_COMPRESSION_MASK | |
GPBF_HAS_DATA_DESCRIPTOR_MASK | |
COMPRESSION_STORED | |
COMPRESSION_DEFLATED | |
COMPRESSION_LEVEL_NONE | |
COMPRESSION_LEVEL_DEFAULT | |
COMPRESSION_LEVEL_FASTEST | |
COMPRESSION_LEVEL_BEST_COMPRESSION | |
IFA_TEXT_FILE_MASK | |
IFA_TEXT_FILE | |
IFA_BINARY_FILE | |
ZIP64_AS_NEEDED | |
ZIP64_EOCD | |
ZIP64_HEADERS | |
) | |
], | |
MISC_CONSTANTS => [ | |
qw( | |
FA_AMIGA | |
FA_VAX_VMS | |
FA_VM_CMS | |
FA_ATARI_ST | |
FA_OS2_HPFS | |
FA_MACINTOSH | |
FA_Z_SYSTEM | |
FA_CPM | |
FA_TOPS20 | |
FA_WINDOWS_NTFS | |
FA_QDOS | |
FA_ACORN | |
FA_VFAT | |
FA_MVS | |
FA_BEOS | |
FA_TANDEM | |
FA_THEOS | |
GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK | |
GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK | |
GPBF_IS_COMPRESSED_PATCHED_DATA_MASK | |
COMPRESSION_SHRUNK | |
DEFLATING_COMPRESSION_NORMAL | |
DEFLATING_COMPRESSION_MAXIMUM | |
DEFLATING_COMPRESSION_FAST | |
DEFLATING_COMPRESSION_SUPER_FAST | |
COMPRESSION_REDUCED_1 | |
COMPRESSION_REDUCED_2 | |
COMPRESSION_REDUCED_3 | |
COMPRESSION_REDUCED_4 | |
COMPRESSION_IMPLODED | |
COMPRESSION_TOKENIZED | |
COMPRESSION_DEFLATED_ENHANCED | |
COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED | |
) | |
], | |
ERROR_CODES => [ | |
qw( | |
AZ_OK | |
AZ_STREAM_END | |
AZ_ERROR | |
AZ_FORMAT_ERROR | |
AZ_IO_ERROR | |
) | |
], | |
# For Internal Use Only | |
PKZIP_CONSTANTS => [ | |
qw( | |
SIGNATURE_FORMAT | |
SIGNATURE_LENGTH | |
LOCAL_FILE_HEADER_SIGNATURE | |
LOCAL_FILE_HEADER_FORMAT | |
LOCAL_FILE_HEADER_LENGTH | |
DATA_DESCRIPTOR_SIGNATURE | |
DATA_DESCRIPTOR_FORMAT | |
DATA_DESCRIPTOR_LENGTH | |
DATA_DESCRIPTOR_ZIP64_FORMAT | |
DATA_DESCRIPTOR_ZIP64_LENGTH | |
DATA_DESCRIPTOR_FORMAT_NO_SIG | |
DATA_DESCRIPTOR_LENGTH_NO_SIG | |
DATA_DESCRIPTOR_ZIP64_FORMAT_NO_SIG | |
DATA_DESCRIPTOR_ZIP64_LENGTH_NO_SIG | |
CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE | |
CENTRAL_DIRECTORY_FILE_HEADER_FORMAT | |
CENTRAL_DIRECTORY_FILE_HEADER_LENGTH | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH | |
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE | |
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT | |
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH | |
END_OF_CENTRAL_DIRECTORY_SIGNATURE | |
END_OF_CENTRAL_DIRECTORY_FORMAT | |
END_OF_CENTRAL_DIRECTORY_LENGTH | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE_STRING | |
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE_STRING | |
END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING | |
) | |
], | |
# For Internal Use Only | |
UTILITY_METHODS => [ | |
qw( | |
_error | |
_printError | |
_ioError | |
_formatError | |
_zip64NotSupported | |
_subclassResponsibility | |
_binmode | |
_isSeekable | |
_newFileHandle | |
_readSignature | |
_asZipDirName | |
) | |
], | |
); | |
# Add all the constant names and error code names to @EXPORT_OK | |
Exporter::export_ok_tags( | |
qw( | |
CONSTANTS | |
ERROR_CODES | |
PKZIP_CONSTANTS | |
UTILITY_METHODS | |
MISC_CONSTANTS | |
)); | |
} | |
# Zip64 format support status | |
use constant ZIP64_SUPPORTED => !! eval { pack("Q<", 1) }; | |
# Error codes | |
use constant AZ_OK => 0; | |
use constant AZ_STREAM_END => 1; | |
use constant AZ_ERROR => 2; | |
use constant AZ_FORMAT_ERROR => 3; | |
use constant AZ_IO_ERROR => 4; | |
# File types | |
# Values of Archive::Zip::Member->fileAttributeFormat() | |
use constant FA_MSDOS => 0; | |
use constant FA_AMIGA => 1; | |
use constant FA_VAX_VMS => 2; | |
use constant FA_UNIX => 3; | |
use constant FA_VM_CMS => 4; | |
use constant FA_ATARI_ST => 5; | |
use constant FA_OS2_HPFS => 6; | |
use constant FA_MACINTOSH => 7; | |
use constant FA_Z_SYSTEM => 8; | |
use constant FA_CPM => 9; | |
use constant FA_TOPS20 => 10; | |
use constant FA_WINDOWS_NTFS => 11; | |
use constant FA_QDOS => 12; | |
use constant FA_ACORN => 13; | |
use constant FA_VFAT => 14; | |
use constant FA_MVS => 15; | |
use constant FA_BEOS => 16; | |
use constant FA_TANDEM => 17; | |
use constant FA_THEOS => 18; | |
# general-purpose bit flag masks | |
# Found in Archive::Zip::Member->bitFlag() | |
use constant GPBF_ENCRYPTED_MASK => 1 << 0; | |
use constant GPBF_DEFLATING_COMPRESSION_MASK => 3 << 1; | |
use constant GPBF_HAS_DATA_DESCRIPTOR_MASK => 1 << 3; | |
# deflating compression types, if compressionMethod == COMPRESSION_DEFLATED | |
# ( Archive::Zip::Member->bitFlag() & GPBF_DEFLATING_COMPRESSION_MASK ) | |
use constant DEFLATING_COMPRESSION_NORMAL => 0 << 1; | |
use constant DEFLATING_COMPRESSION_MAXIMUM => 1 << 1; | |
use constant DEFLATING_COMPRESSION_FAST => 2 << 1; | |
use constant DEFLATING_COMPRESSION_SUPER_FAST => 3 << 1; | |
# compression method | |
# these two are the only ones supported in this module | |
use constant COMPRESSION_STORED => 0; # file is stored (no compression) | |
use constant COMPRESSION_DEFLATED => 8; # file is Deflated | |
use constant COMPRESSION_LEVEL_NONE => 0; | |
use constant COMPRESSION_LEVEL_DEFAULT => -1; | |
use constant COMPRESSION_LEVEL_FASTEST => 1; | |
use constant COMPRESSION_LEVEL_BEST_COMPRESSION => 9; | |
# internal file attribute bits | |
# Found in Archive::Zip::Member::internalFileAttributes() | |
use constant IFA_TEXT_FILE_MASK => 1; | |
use constant IFA_TEXT_FILE => 1; | |
use constant IFA_BINARY_FILE => 0; | |
# desired zip64 structures for archive creation | |
use constant ZIP64_AS_NEEDED => 0; | |
use constant ZIP64_EOCD => 1; | |
use constant ZIP64_HEADERS => 2; | |
# PKZIP file format miscellaneous constants (for internal use only) | |
use constant SIGNATURE_FORMAT => "V"; | |
use constant SIGNATURE_LENGTH => 4; | |
# these lengths are without the signature. | |
use constant LOCAL_FILE_HEADER_SIGNATURE => 0x04034b50; | |
use constant LOCAL_FILE_HEADER_FORMAT => "v3 V4 v2"; | |
use constant LOCAL_FILE_HEADER_LENGTH => 26; | |
# PKZIP docs don't mention the signature, but Info-Zip writes it. | |
use constant DATA_DESCRIPTOR_SIGNATURE => 0x08074b50; | |
use constant DATA_DESCRIPTOR_FORMAT => "V3"; | |
use constant DATA_DESCRIPTOR_LENGTH => 12; | |
use constant DATA_DESCRIPTOR_ZIP64_FORMAT => "L< Q<2"; | |
use constant DATA_DESCRIPTOR_ZIP64_LENGTH => 20; | |
# but the signature is apparently optional. | |
use constant DATA_DESCRIPTOR_FORMAT_NO_SIG => "V2"; | |
use constant DATA_DESCRIPTOR_LENGTH_NO_SIG => 8; | |
use constant DATA_DESCRIPTOR_ZIP64_FORMAT_NO_SIG => "Q<2"; | |
use constant DATA_DESCRIPTOR_ZIP64_LENGTH_NO_SIG => 16; | |
use constant CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE => 0x02014b50; | |
use constant CENTRAL_DIRECTORY_FILE_HEADER_FORMAT => "C2 v3 V4 v5 V2"; | |
use constant CENTRAL_DIRECTORY_FILE_HEADER_LENGTH => 42; | |
# zip64 support | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE => 0x06064b50; | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE_STRING => | |
pack("V", ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE); | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT => "Q< S<2 L<2 Q<4"; | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH => 52; | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE => 0x07064b50; | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE_STRING => | |
pack("V", ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE); | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT => "L< Q< L<"; | |
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH => 16; | |
use constant END_OF_CENTRAL_DIRECTORY_SIGNATURE => 0x06054b50; | |
use constant END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING => | |
pack("V", END_OF_CENTRAL_DIRECTORY_SIGNATURE); | |
use constant END_OF_CENTRAL_DIRECTORY_FORMAT => "v4 V2 v"; | |
use constant END_OF_CENTRAL_DIRECTORY_LENGTH => 18; | |
use constant GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK => 1 << 1; | |
use constant GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK => 1 << 2; | |
use constant GPBF_IS_COMPRESSED_PATCHED_DATA_MASK => 1 << 5; | |
# the rest of these are not supported in this module | |
use constant COMPRESSION_SHRUNK => 1; # file is Shrunk | |
use constant COMPRESSION_REDUCED_1 => 2; # file is Reduced CF=1 | |
use constant COMPRESSION_REDUCED_2 => 3; # file is Reduced CF=2 | |
use constant COMPRESSION_REDUCED_3 => 4; # file is Reduced CF=3 | |
use constant COMPRESSION_REDUCED_4 => 5; # file is Reduced CF=4 | |
use constant COMPRESSION_IMPLODED => 6; # file is Imploded | |
use constant COMPRESSION_TOKENIZED => 7; # reserved for Tokenizing compr. | |
use constant COMPRESSION_DEFLATED_ENHANCED => 9; # reserved for enh. Deflating | |
use constant COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED => 10; | |
# Load the various required classes | |
require Archive::Zip::Archive; | |
require Archive::Zip::Member; | |
require Archive::Zip::FileMember; | |
require Archive::Zip::DirectoryMember; | |
require Archive::Zip::ZipFileMember; | |
require Archive::Zip::NewFileMember; | |
require Archive::Zip::StringMember; | |
# Convenience functions | |
sub _ISA ($$) { | |
# Can't rely on Scalar::Util, so use the next best way | |
local $@; | |
!!eval { ref $_[0] and $_[0]->isa($_[1]) }; | |
} | |
sub _CAN ($$) { | |
local $@; | |
!!eval { ref $_[0] and $_[0]->can($_[1]) }; | |
} | |
##################################################################### | |
# Methods | |
sub new { | |
my $class = shift; | |
return Archive::Zip::Archive->new(@_); | |
} | |
sub computeCRC32 { | |
my ($data, $crc); | |
if (ref($_[0]) eq 'HASH') { | |
$data = $_[0]->{string}; | |
$crc = $_[0]->{checksum}; | |
} else { | |
$data = shift; | |
$data = shift if ref($data); | |
$crc = shift; | |
} | |
return Compress::Raw::Zlib::crc32($data, $crc); | |
} | |
# Report or change chunk size used for reading and writing. | |
# Also sets Zlib's default buffer size (eventually). | |
sub setChunkSize { | |
shift if ref($_[0]) eq 'Archive::Zip::Archive'; | |
my $chunkSize = (ref($_[0]) eq 'HASH') ? shift->{chunkSize} : shift; | |
my $oldChunkSize = $Archive::Zip::ChunkSize; | |
$Archive::Zip::ChunkSize = $chunkSize if ($chunkSize); | |
return $oldChunkSize; | |
} | |
sub chunkSize { | |
return $Archive::Zip::ChunkSize; | |
} | |
sub setErrorHandler { | |
my $errorHandler = (ref($_[0]) eq 'HASH') ? shift->{subroutine} : shift; | |
$errorHandler = \&Carp::carp unless defined($errorHandler); | |
my $oldErrorHandler = $Archive::Zip::ErrorHandler; | |
$Archive::Zip::ErrorHandler = $errorHandler; | |
return $oldErrorHandler; | |
} | |
###################################################################### | |
# Private utility functions (not methods). | |
sub _printError { | |
my $string = join(' ', @_, "\n"); | |
my $oldCarpLevel = $Carp::CarpLevel; | |
$Carp::CarpLevel += 2; | |
&{$ErrorHandler}($string); | |
$Carp::CarpLevel = $oldCarpLevel; | |
} | |
# This is called on format errors. | |
sub _formatError { | |
shift if ref($_[0]); | |
_printError('format error:', @_); | |
return AZ_FORMAT_ERROR; | |
} | |
# This is called on IO errors. | |
sub _ioError { | |
shift if ref($_[0]); | |
_printError('IO error:', @_, ':', $!); | |
return AZ_IO_ERROR; | |
} | |
# This is called on generic errors. | |
sub _error { | |
shift if ref($_[0]); | |
_printError('error:', @_); | |
return AZ_ERROR; | |
} | |
# This is called if zip64 format is not supported but would be | |
# required. | |
sub _zip64NotSupported { | |
shift if ref($_[0]); | |
_printError('zip64 format not supported on this Perl interpreter'); | |
return AZ_ERROR; | |
} | |
# Called when a subclass should have implemented | |
# something but didn't | |
sub _subclassResponsibility { | |
Carp::croak("subclass Responsibility\n"); | |
} | |
# Try to set the given file handle or object into binary mode. | |
sub _binmode { | |
my $fh = shift; | |
return _CAN($fh, 'binmode') ? $fh->binmode() : binmode($fh); | |
} | |
# Attempt to guess whether file handle is seekable. | |
# Because of problems with Windows, this only returns true when | |
# the file handle is a real file. | |
sub _isSeekable { | |
my $fh = shift; | |
return 0 unless ref $fh; | |
_ISA($fh, "IO::Scalar") # IO::Scalar objects are brokenly-seekable | |
and return 0; | |
_ISA($fh, "IO::String") | |
and return 1; | |
if (_ISA($fh, "IO::Seekable")) { | |
# Unfortunately, some things like FileHandle objects | |
# return true for Seekable, but AREN'T!!!!! | |
_ISA($fh, "FileHandle") | |
and return 0; | |
return 1; | |
} | |
# open my $fh, "+<", \$data; | |
ref $fh eq "GLOB" && eval { seek $fh, 0, 1 } and return 1; | |
_CAN($fh, "stat") | |
and return -f $fh; | |
return (_CAN($fh, "seek") and _CAN($fh, "tell")) ? 1 : 0; | |
} | |
# Print to the filehandle, while making sure the pesky Perl special global | |
# variables don't interfere. | |
sub _print { | |
my ($self, $fh, @data) = @_; | |
local $\; | |
return $fh->print(@data); | |
} | |
# Return an opened IO::Handle | |
# my ( $status, fh ) = _newFileHandle( 'fileName', 'w' ); | |
# Can take a filename, file handle, or ref to GLOB | |
# Or, if given something that is a ref but not an IO::Handle, | |
# passes back the same thing. | |
sub _newFileHandle { | |
my $fd = shift; | |
my $status = 1; | |
my $handle; | |
if (ref($fd)) { | |
if (_ISA($fd, 'IO::Scalar') or _ISA($fd, 'IO::String')) { | |
$handle = $fd; | |
} elsif (_ISA($fd, 'IO::Handle') or ref($fd) eq 'GLOB') { | |
$handle = IO::File->new; | |
$status = $handle->fdopen($fd, @_); | |
} else { | |
$handle = $fd; | |
} | |
} else { | |
$handle = IO::File->new; | |
$status = $handle->open($fd, @_); | |
} | |
return ($status, $handle); | |
} | |
# Returns next signature from given file handle, leaves | |
# file handle positioned afterwards. | |
# | |
# In list context, returns ($status, $signature) | |
# ( $status, $signature ) = _readSignature( $fh, $fileName ); | |
# | |
# This function returns one of AZ_OK, AZ_IO_ERROR, or | |
# AZ_FORMAT_ERROR and calls the respective error handlers in the | |
# latter two cases. If optional $noFormatError is true, it does | |
# not call the error handler on format error, but only returns | |
# AZ_FORMAT_ERROR. | |
sub _readSignature { | |
my $fh = shift; | |
my $fileName = shift; | |
my $expectedSignature = shift; # optional | |
my $noFormatError = shift; # optional | |
my $signatureData; | |
my $bytesRead = $fh->read($signatureData, SIGNATURE_LENGTH); | |
if ($bytesRead != SIGNATURE_LENGTH) { | |
return _ioError("reading header signature"); | |
} | |
my $signature = unpack(SIGNATURE_FORMAT, $signatureData); | |
my $status = AZ_OK; | |
# compare with expected signature, if any, or any known signature. | |
if ( | |
(defined($expectedSignature) && $signature != $expectedSignature) | |
|| ( !defined($expectedSignature) | |
&& $signature != CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE | |
&& $signature != LOCAL_FILE_HEADER_SIGNATURE | |
&& $signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE | |
&& $signature != DATA_DESCRIPTOR_SIGNATURE | |
&& $signature != ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE | |
&& $signature != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE | |
) | |
) { | |
if (! $noFormatError ) { | |
my $errmsg = sprintf("bad signature: 0x%08x", $signature); | |
if (_isSeekable($fh)) { | |
$errmsg .= sprintf(" at offset %d", $fh->tell() - SIGNATURE_LENGTH); | |
} | |
$status = _formatError("$errmsg in file $fileName"); | |
} | |
else { | |
$status = AZ_FORMAT_ERROR; | |
} | |
} | |
return ($status, $signature); | |
} | |
# Utility method to make and open a temp file. | |
# Will create $temp_dir if it does not exist. | |
# Returns file handle and name: | |
# | |
# my ($fh, $name) = Archive::Zip::tempFile(); | |
# my ($fh, $name) = Archive::Zip::tempFile('mytempdir'); | |
# | |
sub tempFile { | |
my $dir = (ref($_[0]) eq 'HASH') ? shift->{tempDir} : shift; | |
my ($fh, $filename) = File::Temp::tempfile( | |
SUFFIX => '.zip', | |
UNLINK => 1, | |
$dir ? (DIR => $dir) : ()); | |
return (undef, undef) unless $fh; | |
my ($status, $newfh) = _newFileHandle($fh, 'w+'); | |
$fh->close(); | |
return ($newfh, $filename); | |
} | |
# Return the normalized directory name as used in a zip file (path | |
# separators become slashes, etc.). | |
# Will translate internal slashes in path components (i.e. on Macs) to | |
# underscores. Discards volume names. | |
# When $forceDir is set, returns paths with trailing slashes (or arrays | |
# with trailing blank members). | |
# | |
# If third argument is a reference, returns volume information there. | |
# | |
# input output | |
# . ('.') '.' | |
# ./a ('a') a | |
# ./a/b ('a','b') a/b | |
# ./a/b/ ('a','b') a/b | |
# a/b/ ('a','b') a/b | |
# /a/b/ ('','a','b') a/b | |
# c:\a\b\c.doc ('','a','b','c.doc') a/b/c.doc # on Windows | |
# "i/o maps:whatever" ('i_o maps', 'whatever') "i_o maps/whatever" # on Macs | |
sub _asZipDirName { | |
my $name = shift; | |
my $forceDir = shift; | |
my $volReturn = shift; | |
my ($volume, $directories, $file) = | |
File::Spec->splitpath(File::Spec->canonpath($name), $forceDir); | |
$$volReturn = $volume if (ref($volReturn)); | |
my @dirs = map { $_ =~ y{/}{_}; $_ } File::Spec->splitdir($directories); | |
if (@dirs > 0) { pop(@dirs) unless $dirs[-1] } # remove empty component | |
push(@dirs, defined($file) ? $file : ''); | |
#return wantarray ? @dirs : join ( '/', @dirs ); | |
my $normalised_path = join '/', @dirs; | |
# Leading directory separators should not be stored in zip archives. | |
# Example: | |
# C:\a\b\c\ a/b/c | |
# C:\a\b\c.txt a/b/c.txt | |
# /a/b/c/ a/b/c | |
# /a/b/c.txt a/b/c.txt | |
$normalised_path =~ s{^/}{}; # remove leading separator | |
return $normalised_path; | |
} | |
# Return an absolute local name for a zip name. | |
# Assume a directory if zip name has trailing slash. | |
# Takes an optional volume name in FS format (like 'a:'). | |
# | |
sub _asLocalName { | |
my $name = shift; # zip format | |
my $volume = shift; | |
$volume = '' unless defined($volume); # local FS format | |
my @paths = split(/\//, $name); | |
my $filename = pop(@paths); | |
$filename = '' unless defined($filename); | |
my $localDirs = @paths ? File::Spec->catdir(@paths) : ''; | |
my $localName = File::Spec->catpath($volume, $localDirs, $filename); | |
unless ($volume) { | |
$localName = File::Spec->rel2abs($localName, Cwd::getcwd()); | |
} | |
return $localName; | |
} | |
1; | |
__END__ | |
=pod | |
=encoding utf8 | |
=head1 NAME | |
Archive::Zip - Provide an interface to ZIP archive files. | |
=head1 SYNOPSIS | |
# Create a Zip file | |
use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); | |
my $zip = Archive::Zip->new(); | |
# Add a directory | |
my $dir_member = $zip->addDirectory( 'dirname/' ); | |
# Add a file from a string with compression | |
my $string_member = $zip->addString( 'This is a test', 'stringMember.txt' ); | |
$string_member->desiredCompressionMethod( COMPRESSION_DEFLATED ); | |
# Add a file from disk | |
my $file_member = $zip->addFile( 'xyz.pl', 'AnotherName.pl' ); | |
# Save the Zip file | |
unless ( $zip->writeToFileNamed('someZip.zip') == AZ_OK ) { | |
die 'write error'; | |
} | |
# Read a Zip file | |
my $somezip = Archive::Zip->new(); | |
unless ( $somezip->read( 'someZip.zip' ) == AZ_OK ) { | |
die 'read error'; | |
} | |
# Change the compression type for a file in the Zip | |
my $member = $somezip->memberNamed( 'stringMember.txt' ); | |
$member->desiredCompressionMethod( COMPRESSION_STORED ); | |
unless ( $zip->writeToFileNamed( 'someOtherZip.zip' ) == AZ_OK ) { | |
die 'write error'; | |
} | |
=head1 DESCRIPTION | |
The Archive::Zip module allows a Perl program to create, manipulate, read, | |
and write Zip archive files. | |
Zip archives can be created, or you can read from existing zip files. | |
Once created, they can be written to files, streams, or strings. Members | |
can be added, removed, extracted, replaced, rearranged, and enumerated. | |
They can also be renamed or have their dates, comments, or other attributes | |
queried or modified. Their data can be compressed or uncompressed as needed. | |
Members can be created from members in existing Zip files, or from existing | |
directories, files, or strings. | |
This module uses the L<Compress::Raw::Zlib> library to read and write the | |
compressed streams inside the files. | |
One can use L<Archive::Zip::MemberRead> to read the zip file archive members | |
as if they were files. | |
=head2 File Naming | |
Regardless of what your local file system uses for file naming, names in a | |
Zip file are in Unix format (I<forward> slashes (/) separating directory | |
names, etc.). | |
C<Archive::Zip> tries to be consistent with file naming conventions, and will | |
translate back and forth between native and Zip file names. | |
However, it can't guess which format names are in. So two rules control what | |
kind of file name you must pass various routines: | |
=over 4 | |
=item Names of files are in local format. | |
C<File::Spec> and C<File::Basename> are used for various file | |
operations. When you're referring to a file on your system, use its | |
file naming conventions. | |
=item Names of archive members are in Unix format. | |
This applies to every method that refers to an archive member, or | |
provides a name for new archive members. The C<extract()> methods | |
that can take one or two names will convert from local to zip names | |
if you call them with a single name. | |
=back | |
=head2 Archive::Zip Object Model | |
=head3 Overview | |
Archive::Zip::Archive objects are what you ordinarily deal with. | |
These maintain the structure of a zip file, without necessarily | |
holding data. When a zip is read from a disk file, the (possibly | |
compressed) data still lives in the file, not in memory. Archive | |
members hold information about the individual members, but not | |
(usually) the actual member data. When the zip is written to a | |
(different) file, the member data is compressed or copied as needed. | |
It is possible to make archive members whose data is held in a string | |
in memory, but this is not done when a zip file is read. Directory | |
members don't have any data. | |
=head2 Inheritance | |
Exporter | |
Archive::Zip Common base class, has defs. | |
Archive::Zip::Archive A Zip archive. | |
Archive::Zip::Member Abstract superclass for all members. | |
Archive::Zip::StringMember Member made from a string | |
Archive::Zip::FileMember Member made from an external file | |
Archive::Zip::ZipFileMember Member that lives in a zip file | |
Archive::Zip::NewFileMember Member whose data is in a file | |
Archive::Zip::DirectoryMember Member that is a directory | |
=head1 EXPORTS | |
=over 4 | |
=item :CONSTANTS | |
Exports the following constants: | |
FA_MSDOS FA_UNIX GPBF_ENCRYPTED_MASK | |
GPBF_DEFLATING_COMPRESSION_MASK GPBF_HAS_DATA_DESCRIPTOR_MASK | |
COMPRESSION_STORED COMPRESSION_DEFLATED IFA_TEXT_FILE_MASK | |
IFA_TEXT_FILE IFA_BINARY_FILE COMPRESSION_LEVEL_NONE | |
COMPRESSION_LEVEL_DEFAULT COMPRESSION_LEVEL_FASTEST | |
COMPRESSION_LEVEL_BEST_COMPRESSION | |
ZIP64_SUPPORTED ZIP64_AS_NEEDED ZIP64_EOCD ZIP64_HEADERS | |
=item :MISC_CONSTANTS | |
Exports the following constants (only necessary for extending the | |
module): | |
FA_AMIGA FA_VAX_VMS FA_VM_CMS FA_ATARI_ST FA_OS2_HPFS | |
FA_MACINTOSH FA_Z_SYSTEM FA_CPM FA_WINDOWS_NTFS | |
GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK | |
GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK | |
GPBF_IS_COMPRESSED_PATCHED_DATA_MASK COMPRESSION_SHRUNK | |
DEFLATING_COMPRESSION_NORMAL DEFLATING_COMPRESSION_MAXIMUM | |
DEFLATING_COMPRESSION_FAST DEFLATING_COMPRESSION_SUPER_FAST | |
COMPRESSION_REDUCED_1 COMPRESSION_REDUCED_2 COMPRESSION_REDUCED_3 | |
COMPRESSION_REDUCED_4 COMPRESSION_IMPLODED COMPRESSION_TOKENIZED | |
COMPRESSION_DEFLATED_ENHANCED | |
COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED | |
=item :ERROR_CODES | |
Explained below. Returned from most methods. | |
AZ_OK AZ_STREAM_END AZ_ERROR AZ_FORMAT_ERROR AZ_IO_ERROR | |
=back | |
=head1 ERROR CODES | |
Many of the methods in Archive::Zip return error codes. These are implemented | |
as inline subroutines, using the C<use constant> pragma. They can be imported | |
into your namespace using the C<:ERROR_CODES> tag: | |
use Archive::Zip qw( :ERROR_CODES ); | |
... | |
unless ( $zip->read( 'myfile.zip' ) == AZ_OK ) { | |
die "whoops!"; | |
} | |
=over 4 | |
=item AZ_OK (0) | |
Everything is fine. | |
=item AZ_STREAM_END (1) | |
The read stream (or central directory) ended normally. | |
=item AZ_ERROR (2) | |
There was some generic kind of error. | |
=item AZ_FORMAT_ERROR (3) | |
There is a format error in a ZIP file being read. | |
=item AZ_IO_ERROR (4) | |
There was an IO error. | |
=back | |
=head2 Compression | |
Archive::Zip allows each member of a ZIP file to be compressed (using the | |
Deflate algorithm) or uncompressed. | |
Other compression algorithms that some versions of ZIP have been able to | |
produce are not supported. Each member has two compression methods: the | |
one it's stored as (this is always COMPRESSION_STORED for string and external | |
file members), and the one you desire for the member in the zip file. | |
These can be different, of course, so you can make a zip member that is not | |
compressed out of one that is, and vice versa. | |
You can inquire about the current compression and set the desired | |
compression method: | |
my $member = $zip->memberNamed( 'xyz.txt' ); | |
$member->compressionMethod(); # return current compression | |
# set to read uncompressed | |
$member->desiredCompressionMethod( COMPRESSION_STORED ); | |
# set to read compressed | |
$member->desiredCompressionMethod( COMPRESSION_DEFLATED ); | |
There are two different compression methods: | |
=over 4 | |
=item COMPRESSION_STORED | |
File is stored (no compression) | |
=item COMPRESSION_DEFLATED | |
File is Deflated | |
=back | |
=head2 Compression Levels | |
If a member's desiredCompressionMethod is COMPRESSION_DEFLATED, you | |
can choose different compression levels. This choice may affect the | |
speed of compression and decompression, as well as the size of the | |
compressed member data. | |
$member->desiredCompressionLevel( 9 ); | |
The levels given can be: | |
=over 4 | |
=item * 0 or COMPRESSION_LEVEL_NONE | |
This is the same as saying | |
$member->desiredCompressionMethod( COMPRESSION_STORED ); | |
=item * 1 .. 9 | |
1 gives the best speed and worst compression, and 9 gives the | |
best compression and worst speed. | |
=item * COMPRESSION_LEVEL_FASTEST | |
This is a synonym for level 1. | |
=item * COMPRESSION_LEVEL_BEST_COMPRESSION | |
This is a synonym for level 9. | |
=item * COMPRESSION_LEVEL_DEFAULT | |
This gives a good compromise between speed and compression, | |
and is currently equivalent to 6 (this is in the zlib code). | |
This is the level that will be used if not specified. | |
=back | |
=head1 Archive::Zip Methods | |
The Archive::Zip class (and its invisible subclass Archive::Zip::Archive) | |
implement generic zip file functionality. Creating a new Archive::Zip object | |
actually makes an Archive::Zip::Archive object, but you don't have to worry | |
about this unless you're subclassing. | |
=head2 Constructor | |
=over 4 | |
=item new( [$fileName] ) | |
=item new( { filename => $fileName } ) | |
Make a new, empty zip archive. | |
my $zip = Archive::Zip->new(); | |
If an additional argument is passed, new() will call read() | |
to read the contents of an archive: | |
my $zip = Archive::Zip->new( 'xyz.zip' ); | |
If a filename argument is passed and the read fails for any | |
reason, new will return undef. For this reason, it may be | |
better to call read separately. | |
=back | |
=head2 Zip Archive Utility Methods | |
These Archive::Zip methods may be called as functions or as object | |
methods. Do not call them as class methods: | |
$zip = Archive::Zip->new(); | |
$crc = Archive::Zip::computeCRC32( 'ghijkl' ); # OK | |
$crc = $zip->computeCRC32( 'ghijkl' ); # also OK | |
$crc = Archive::Zip->computeCRC32( 'ghijkl' ); # NOT OK | |
=over 4 | |
=item Archive::Zip::computeCRC32( $string [, $crc] ) | |
=item Archive::Zip::computeCRC32( { string => $string [, checksum => $crc ] } ) | |
This is a utility function that uses the Compress::Raw::Zlib CRC | |
routine to compute a CRC-32. You can get the CRC of a string: | |
$crc = Archive::Zip::computeCRC32( $string ); | |
Or you can compute the running CRC: | |
$crc = 0; | |
$crc = Archive::Zip::computeCRC32( 'abcdef', $crc ); | |
$crc = Archive::Zip::computeCRC32( 'ghijkl', $crc ); | |
=item Archive::Zip::setChunkSize( $number ) | |
=item Archive::Zip::setChunkSize( { chunkSize => $number } ) | |
Report or change chunk size used for reading and writing. | |
This can make big differences in dealing with large files. | |
Currently, this defaults to 32K. This also changes the chunk | |
size used for Compress::Raw::Zlib. You must call setChunkSize() | |
before reading or writing. This is not exportable, so you | |
must call it like: | |
Archive::Zip::setChunkSize( 4096 ); | |
or as a method on a zip (though this is a global setting). | |
Returns old chunk size. | |
=item Archive::Zip::chunkSize() | |
Returns the current chunk size: | |
my $chunkSize = Archive::Zip::chunkSize(); | |
=item Archive::Zip::setErrorHandler( \&subroutine ) | |
=item Archive::Zip::setErrorHandler( { subroutine => \&subroutine } ) | |
Change the subroutine called with error strings. This | |
defaults to \&Carp::carp, but you may want to change it to | |
get the error strings. This is not exportable, so you must | |
call it like: | |
Archive::Zip::setErrorHandler( \&myErrorHandler ); | |
If myErrorHandler is undef, resets handler to default. | |
Returns old error handler. Note that if you call Carp::carp | |
or a similar routine or if you're chaining to the default | |
error handler from your error handler, you may want to | |
increment the number of caller levels that are skipped (do | |
not just set it to a number): | |
$Carp::CarpLevel++; | |
=item Archive::Zip::tempFile( [ $tmpdir ] ) | |
=item Archive::Zip::tempFile( { tempDir => $tmpdir } ) | |
Create a uniquely named temp file. It will be returned open | |
for read/write. If C<$tmpdir> is given, it is used as the | |
name of a directory to create the file in. If not given, | |
creates the file using C<File::Spec::tmpdir()>. Generally, you can | |
override this choice using the | |
$ENV{TMPDIR} | |
environment variable. But see the L<File::Spec|File::Spec> | |
documentation for your system. Note that on many systems, if you're | |
running in taint mode, then you must make sure that C<$ENV{TMPDIR}> is | |
untainted for it to be used. | |
Will I<NOT> create C<$tmpdir> if it does not exist (this is a change | |
from prior versions!). Returns file handle and name: | |
my ($fh, $name) = Archive::Zip::tempFile(); | |
my ($fh, $name) = Archive::Zip::tempFile('myTempDir'); | |
my $fh = Archive::Zip::tempFile(); # if you don't need the name | |
=back | |
=head2 Zip Archive Accessors | |
=over 4 | |
=item members() | |
Return a copy of the members array | |
my @members = $zip->members(); | |
=item numberOfMembers() | |
Return the number of members I have | |
=item memberNames() | |
Return a list of the (internal) file names of the zip members | |
=item memberNamed( $string ) | |
=item memberNamed( { zipName => $string } ) | |
Return ref to member whose filename equals given filename or | |
undef. C<$string> must be in Zip (Unix) filename format. | |
=item membersMatching( $regex ) | |
=item membersMatching( { regex => $regex } ) | |
Return array of members whose filenames match given regular | |
expression in list context. Returns number of matching | |
members in scalar context. | |
my @textFileMembers = $zip->membersMatching( '.*\.txt' ); | |
# or | |
my $numberOfTextFiles = $zip->membersMatching( '.*\.txt' ); | |
=item zip64() | |
Returns whether the previous read or write of the archive has | |
been done in zip64 format. | |
=item desiredZip64Mode() | |
Gets or sets which parts of the archive should be written in | |
zip64 format: All parts as needed (ZIP64_AS_NEEDED), the default, | |
force writing the zip64 end of central directory record | |
(ZIP64_EOCD), force writing the zip64 EOCD record and all headers | |
in zip64 format (ZIP64_HEADERS). | |
=item versionMadeBy() | |
=item versionNeededToExtract() | |
Gets the fields from the zip64 end of central directory | |
record. These are always 0 if the archive is not in zip64 format. | |
=item diskNumber() | |
Return the disk that I start on. Not used for writing zips, | |
but might be interesting if you read a zip in. This should be | |
0, as Archive::Zip does not handle multi-volume archives. | |
=item diskNumberWithStartOfCentralDirectory() | |
Return the disk number that holds the beginning of the | |
central directory. Not used for writing zips, but might be | |
interesting if you read a zip in. This should be 0, as | |
Archive::Zip does not handle multi-volume archives. | |
=item numberOfCentralDirectoriesOnThisDisk() | |
Return the number of CD structures in the zipfile last read in. | |
Not used for writing zips, but might be interesting if you read a zip | |
in. | |
=item numberOfCentralDirectories() | |
Return the number of CD structures in the zipfile last read in. | |
Not used for writing zips, but might be interesting if you read a zip | |
in. | |
=item centralDirectorySize() | |
Returns central directory size, as read from an external zip | |
file. Not used for writing zips, but might be interesting if | |
you read a zip in. | |
=item centralDirectoryOffsetWRTStartingDiskNumber() | |
Returns the offset into the zip file where the CD begins. Not | |
used for writing zips, but might be interesting if you read a | |
zip in. | |
=item zipfileComment( [ $string ] ) | |
=item zipfileComment( [ { comment => $string } ] ) | |
Get or set the zipfile comment. Returns the old comment. | |
print $zip->zipfileComment(); | |
$zip->zipfileComment( 'New Comment' ); | |
=item eocdOffset() | |
Returns the (unexpected) number of bytes between where the | |
EOCD was found and where it expected to be. This is normally | |
0, but would be positive if something (a virus, perhaps) had | |
added bytes somewhere before the EOCD. Not used for writing | |
zips, but might be interesting if you read a zip in. Here is | |
an example of how you can diagnose this: | |
my $zip = Archive::Zip->new('somefile.zip'); | |
if ($zip->eocdOffset()) | |
{ | |
warn "A virus has added ", $zip->eocdOffset, " bytes of garbage\n"; | |
} | |
The C<eocdOffset()> is used to adjust the starting position of member | |
headers, if necessary. | |
=item fileName() | |
Returns the name of the file last read from. If nothing has | |
been read yet, returns an empty string; if read from a file | |
handle, returns the handle in string form. | |
=back | |
=head2 Zip Archive Member Operations | |
Various operations on a zip file modify members. When a member is | |
passed as an argument, you can either use a reference to the member | |
itself, or the name of a member. Of course, using the name requires | |
that names be unique within a zip (this is not enforced). | |
=over 4 | |
=item removeMember( $memberOrName ) | |
=item removeMember( { memberOrZipName => $memberOrName } ) | |
Remove and return the given member, or match its name and | |
remove it. Returns undef if member or name does not exist in this | |
Zip. No-op if member does not belong to this zip. | |
=item replaceMember( $memberOrName, $newMember ) | |
=item replaceMember( { memberOrZipName => $memberOrName, | |
newMember => $newMember } ) | |
Remove and return the given member, or match its name and | |
remove it. Replace with new member. Returns undef if member or | |
name does not exist in this Zip, or if C<$newMember> is undefined. | |
It is an (undiagnosed) error to provide a C<$newMember> that is a | |
member of the zip being modified. | |
my $member1 = $zip->removeMember( 'xyz' ); | |
my $member2 = $zip->replaceMember( 'abc', $member1 ); | |
# now, $member2 (named 'abc') is not in $zip, | |
# and $member1 (named 'xyz') is, having taken $member2's place. | |
=item extractMember( $memberOrName [, $extractedName ] ) | |
=item extractMember( { memberOrZipName => $memberOrName | |
[, name => $extractedName ] } ) | |
Extract the given member, or match its name and extract it. | |
Returns undef if member does not exist in this Zip. If | |
optional second arg is given, use it as the name of the | |
extracted member. Otherwise, the internal filename of the | |
member is used as the name of the extracted file or | |
directory. | |
If you pass C<$extractedName>, it should be in the local file | |
system's format. | |
If you do not pass C<$extractedName> and the internal filename traverses | |
a parent directory or a symbolic link, the extraction will be aborted with | |
C<AC_ERROR> for security reason. | |
All necessary directories will be created. Returns C<AZ_OK> | |
on success. | |
=item extractMemberWithoutPaths( $memberOrName [, $extractedName ] ) | |
=item extractMemberWithoutPaths( { memberOrZipName => $memberOrName | |
[, name => $extractedName ] } ) | |
Extract the given member, or match its name and extract it. | |
Does not use path information (extracts into the current | |
directory). Returns undef if member does not exist in this | |
Zip. | |
If optional second arg is given, use it as the name of the | |
extracted member (its paths will be deleted too). Otherwise, | |
the internal filename of the member (minus paths) is used as | |
the name of the extracted file or directory. Returns C<AZ_OK> | |
on success. | |
If you do not pass C<$extractedName> and the internal filename is equalled | |
to a local symbolic link, the extraction will be aborted with C<AC_ERROR> for | |
security reason. | |
=item addMember( $member ) | |
=item addMember( { member => $member } ) | |
Append a member (possibly from another zip file) to the zip | |
file. Returns the new member. Generally, you will use | |
addFile(), addDirectory(), addFileOrDirectory(), addString(), | |
or read() to add members. | |
# Move member named 'abc' to end of zip: | |
my $member = $zip->removeMember( 'abc' ); | |
$zip->addMember( $member ); | |
=item updateMember( $memberOrName, $fileName ) | |
=item updateMember( { memberOrZipName => $memberOrName, name => $fileName } ) | |
Update a single member from the file or directory named C<$fileName>. | |
Returns the (possibly added or updated) member, if any; C<undef> on | |
errors. | |
The comparison is based on C<lastModTime()> and (in the case of a | |
non-directory) the size of the file. | |
=item addFile( $fileName [, $newName, $compressionLevel ] ) | |
=item addFile( { filename => $fileName | |
[, zipName => $newName, compressionLevel => $compressionLevel } ] ) | |
Append a member whose data comes from an external file, | |
returning the member or undef. The member will have its file | |
name set to the name of the external file, and its | |
desiredCompressionMethod set to COMPRESSION_DEFLATED. The | |
file attributes and last modification time will be set from | |
the file. | |
If the name given does not represent a readable plain file or | |
symbolic link, undef will be returned. C<$fileName> must be | |
in the format required for the local file system. | |
The optional C<$newName> argument sets the internal file name | |
to something different than the given $fileName. C<$newName>, | |
if given, must be in Zip name format (i.e. Unix). | |
The text mode bit will be set if the contents appears to be | |
text (as returned by the C<-T> perl operator). | |
I<NOTE> that you should not (generally) use absolute path names | |
in zip member names, as this will cause problems with some zip | |
tools as well as introduce a security hole and make the zip | |
harder to use. | |
=item addDirectory( $directoryName [, $fileName ] ) | |
=item addDirectory( { directoryName => $directoryName | |
[, zipName => $fileName ] } ) | |
Append a member created from the given directory name. The | |
directory name does not have to name an existing directory. | |
If the named directory exists, the file modification time and | |
permissions are set from the existing directory, otherwise | |
they are set to now and permissive default permissions. | |
C<$directoryName> must be in local file system format. | |
The optional second argument sets the name of the archive | |
member (which defaults to C<$directoryName>). If given, it | |
must be in Zip (Unix) format. | |
Returns the new member. | |
=item addFileOrDirectory( $name [, $newName, $compressionLevel ] ) | |
=item addFileOrDirectory( { name => $name [, zipName => $newName, | |
compressionLevel => $compressionLevel ] } ) | |
Append a member from the file or directory named $name. If | |
$newName is given, use it for the name of the new member. | |
Will add or remove trailing slashes from $newName as needed. | |
C<$name> must be in local file system format. | |
The optional second argument sets the name of the archive | |
member (which defaults to C<$name>). If given, it must be in | |
Zip (Unix) format. | |
=item addString( $stringOrStringRef, $name, [$compressionLevel] ) | |
=item addString( { string => $stringOrStringRef [, zipName => $name, | |
compressionLevel => $compressionLevel ] } ) | |
Append a member created from the given string or string | |
reference. The name is given by the second argument. | |
Returns the new member. The last modification time will be | |
set to now, and the file attributes will be set to permissive | |
defaults. | |
my $member = $zip->addString( 'This is a test', 'test.txt' ); | |
=item contents( $memberOrMemberName [, $newContents ] ) | |
=item contents( { memberOrZipName => $memberOrMemberName | |
[, contents => $newContents ] } ) | |
Returns the uncompressed data for a particular member, or | |
undef. | |
print "xyz.txt contains " . $zip->contents( 'xyz.txt' ); | |
Also can change the contents of a member: | |
$zip->contents( 'xyz.txt', 'This is the new contents' ); | |
If called expecting an array as the return value, it will include | |
the status as the second value in the array. | |
($content, $status) = $zip->contents( 'xyz.txt'); | |
=back | |
=head2 Zip Archive I/O operations | |
A Zip archive can be written to a file or file handle, or read from | |
one. | |
=over 4 | |
=item writeToFileNamed( $fileName ) | |
=item writeToFileNamed( { fileName => $fileName } ) | |
Write a zip archive to named file. Returns C<AZ_OK> on | |
success. | |
my $status = $zip->writeToFileNamed( 'xx.zip' ); | |
die "error somewhere" if $status != AZ_OK; | |
Note that if you use the same name as an existing zip file | |
that you read in, you will clobber ZipFileMembers. So | |
instead, write to a different file name, then delete the | |
original. | |
If you use the C<overwrite()> or C<overwriteAs()> methods, you can | |
re-write the original zip in this way. | |
C<$fileName> should be a valid file name on your system. | |
=item writeToFileHandle( $fileHandle [, $seekable] ) | |
Write a zip archive to a file handle. Return AZ_OK on | |
success. The optional second arg tells whether or not to try | |
to seek backwards to re-write headers. If not provided, it is | |
set if the Perl C<-f> test returns true. This could fail on | |
some operating systems, though. | |
my $fh = IO::File->new( 'someFile.zip', 'w' ); | |
unless ( $zip->writeToFileHandle( $fh ) == AZ_OK ) { | |
# error handling | |
} | |
If you pass a file handle that is not seekable (like if | |
you're writing to a pipe or a socket), pass a false second | |
argument: | |
my $fh = IO::File->new( '| cat > somefile.zip', 'w' ); | |
$zip->writeToFileHandle( $fh, 0 ); # fh is not seekable | |
If this method fails during the write of a member, that | |
member and all following it will return false from | |
C<wasWritten()>. See writeCentralDirectory() for a way to | |
deal with this. | |
If you want, you can write data to the file handle before | |
passing it to writeToFileHandle(); this could be used (for | |
instance) for making self-extracting archives. However, this | |
only works reliably when writing to a real file (as opposed | |
to STDOUT or some other possible non-file). | |
See examples/selfex.pl for how to write a self-extracting | |
archive. | |
=item writeCentralDirectory( $fileHandle [, $offset ] ) | |
=item writeCentralDirectory( { fileHandle => $fileHandle | |
[, offset => $offset ] } ) | |
Writes the central directory structure to the given file | |
handle. | |
Returns AZ_OK on success. If given an $offset, will | |
seek to that point before writing. This can be used for | |
recovery in cases where writeToFileHandle or writeToFileNamed | |
returns an IO error because of running out of space on the | |
destination file. | |
You can truncate the zip by seeking backwards and then writing the | |
directory: | |
my $fh = IO::File->new( 'someFile.zip', 'w' ); | |
my $retval = $zip->writeToFileHandle( $fh ); | |
if ( $retval == AZ_IO_ERROR ) { | |
my @unwritten = grep { not $_->wasWritten() } $zip->members(); | |
if (@unwritten) { | |
$zip->removeMember( $member ) foreach my $member ( @unwritten ); | |
$zip->writeCentralDirectory( $fh, | |
$unwritten[0]->writeLocalHeaderRelativeOffset()); | |
} | |
} | |
=item overwriteAs( $newName ) | |
=item overwriteAs( { filename => $newName } ) | |
Write the zip to the specified file, as safely as possible. | |
This is done by first writing to a temp file, then renaming | |
the original if it exists, then renaming the temp file, then | |
deleting the renamed original if it exists. Returns AZ_OK if | |
successful. | |
=item overwrite() | |
Write back to the original zip file. See overwriteAs() above. | |
If the zip was not ever read from a file, this generates an | |
error. | |
=item read( $fileName ) | |
=item read( { filename => $fileName } ) | |
Read zipfile headers from a zip file, appending new members. | |
Returns C<AZ_OK> or error code. | |
my $zipFile = Archive::Zip->new(); | |
my $status = $zipFile->read( '/some/FileName.zip' ); | |
=item readFromFileHandle( $fileHandle, $filename ) | |
=item readFromFileHandle( { fileHandle => $fileHandle, filename => $filename } ) | |
Read zipfile headers from an already-opened file handle, | |
appending new members. Does not close the file handle. | |
Returns C<AZ_OK> or error code. Note that this requires a | |
seekable file handle; reading from a stream is not yet | |
supported, but using in-memory data is. | |
my $fh = IO::File->new( '/some/FileName.zip', 'r' ); | |
my $zip1 = Archive::Zip->new(); | |
my $status = $zip1->readFromFileHandle( $fh ); | |
my $zip2 = Archive::Zip->new(); | |
$status = $zip2->readFromFileHandle( $fh ); | |
Read zip using in-memory data (recursable): | |
open my $fh, "<", "archive.zip" or die $!; | |
my $zip_data = do { local $.; <$fh> }; | |
my $zip = Archive::Zip->new; | |
open my $dh, "+<", \$zip_data; | |
$zip->readFromFileHandle ($dh); | |
=back | |
=head2 Zip Archive Tree operations | |
These used to be in Archive::Zip::Tree but got moved into | |
Archive::Zip. They enable operation on an entire tree of members or | |
files. | |
A usage example: | |
use Archive::Zip; | |
my $zip = Archive::Zip->new(); | |
# add all readable files and directories below . as xyz/* | |
$zip->addTree( '.', 'xyz' ); | |
# add all readable plain files below /abc as def/* | |
$zip->addTree( '/abc', 'def', sub { -f && -r } ); | |
# add all .c files below /tmp as stuff/* | |
$zip->addTreeMatching( '/tmp', 'stuff', '\.c$' ); | |
# add all .o files below /tmp as stuff/* if they aren't writable | |
$zip->addTreeMatching( '/tmp', 'stuff', '\.o$', sub { ! -w } ); | |
# add all .so files below /tmp that are smaller than 200 bytes as stuff/* | |
$zip->addTreeMatching( '/tmp', 'stuff', '\.o$', sub { -s < 200 } ); | |
# and write them into a file | |
$zip->writeToFileNamed('xxx.zip'); | |
# now extract the same files into /tmpx | |
$zip->extractTree( 'stuff', '/tmpx' ); | |
=over 4 | |
=item $zip->addTree( $root, $dest [, $pred, $compressionLevel ] ) -- Add tree of files to a zip | |
=item $zip->addTree( { root => $root, zipName => $dest [, select => $pred, | |
compressionLevel => $compressionLevel ] ) | |
C<$root> is the root of the tree of files and directories to be | |
added. It is a valid directory name on your system. C<$dest> is | |
the name for the root in the zip file (undef or blank means | |
to use relative pathnames). It is a valid ZIP directory name | |
(that is, it uses forward slashes (/) for separating | |
directory components). C<$pred> is an optional subroutine | |
reference to select files: it is passed the name of the | |
prospective file or directory using C<$_>, and if it returns | |
true, the file or directory will be included. The default is | |
to add all readable files and directories. For instance, | |
using | |
my $pred = sub { /\.txt/ }; | |
$zip->addTree( '.', '', $pred ); | |
will add all the .txt files in and below the current | |
directory, using relative names, and making the names | |
identical in the zipfile: | |
original name zip member name | |
./xyz xyz | |
./a/ a/ | |
./a/b a/b | |
To translate absolute to relative pathnames, just pass them | |
in: $zip->addTree( '/c/d', 'a' ); | |
original name zip member name | |
/c/d/xyz a/xyz | |
/c/d/a/ a/a/ | |
/c/d/a/b a/a/b | |
Returns AZ_OK on success. Note that this will not follow | |
symbolic links to directories. Note also that this does not | |
check for the validity of filenames. | |
Note that you generally I<don't> want to make zip archive member names | |
absolute. | |
=item $zip->addTreeMatching( $root, $dest, $pattern [, $pred, $compressionLevel ] ) | |
=item $zip->addTreeMatching( { root => $root, zipName => $dest, pattern => | |
$pattern [, select => $pred, compressionLevel => $compressionLevel ] } ) | |
$root is the root of the tree of files and directories to be | |
added $dest is the name for the root in the zip file (undef | |
means to use relative pathnames) $pattern is a (non-anchored) | |
regular expression for filenames to match $pred is an | |
optional subroutine reference to select files: it is passed | |
the name of the prospective file or directory in C<$_>, and | |
if it returns true, the file or directory will be included. | |
The default is to add all readable files and directories. To | |
add all files in and below the current directory whose names | |
end in C<.pl>, and make them extract into a subdirectory | |
named C<xyz>, do this: | |
$zip->addTreeMatching( '.', 'xyz', '\.pl$' ) | |
To add all I<writable> files in and below the directory named | |
C</abc> whose names end in C<.pl>, and make them extract into | |
a subdirectory named C<xyz>, do this: | |
$zip->addTreeMatching( '/abc', 'xyz', '\.pl$', sub { -w } ) | |
Returns AZ_OK on success. Note that this will not follow | |
symbolic links to directories. | |
=item $zip->updateTree( $root [, $dest , $pred , $mirror, $compressionLevel ] ); | |
=item $zip->updateTree( { root => $root [, zipName => $dest, select => $pred, | |
mirror => $mirror, compressionLevel => $compressionLevel ] } ); | |
Update a zip file from a directory tree. | |
C<updateTree()> takes the same arguments as C<addTree()>, but first | |
checks to see whether the file or directory already exists in the zip | |
file, and whether it has been changed. | |
If the fourth argument C<$mirror> is true, then delete all my members | |
if corresponding files were not found. | |
Returns an error code or AZ_OK if all is well. | |
=item $zip->extractTree( [ $root, $dest, $volume } ] ) | |
=item $zip->extractTree( [ { root => $root, zipName => $dest, volume => $volume } ] ) | |
If you don't give any arguments at all, will extract all the | |
files in the zip with their original names. | |
If you supply one argument for C<$root>, C<extractTree> will extract | |
all the members whose names start with C<$root> into the current | |
directory, stripping off C<$root> first. | |
C<$root> is in Zip (Unix) format. | |
For instance, | |
$zip->extractTree( 'a' ); | |
when applied to a zip containing the files: | |
a/x a/b/c ax/d/e d/e will extract: | |
a/x as ./x | |
a/b/c as ./b/c | |
If you give two arguments, C<extractTree> extracts all the members | |
whose names start with C<$root>. It will translate C<$root> into | |
C<$dest> to construct the destination file name. | |
C<$root> and C<$dest> are in Zip (Unix) format. | |
For instance, | |
$zip->extractTree( 'a', 'd/e' ); | |
when applied to a zip containing the files: | |
a/x a/b/c ax/d/e d/e will extract: | |
a/x to d/e/x | |
a/b/c to d/e/b/c and ignore ax/d/e and d/e | |
If you give three arguments, C<extractTree> extracts all the members | |
whose names start with C<$root>. It will translate C<$root> into | |
C<$dest> to construct the destination file name, and then it will | |
convert to local file system format, using C<$volume> as the name of | |
the destination volume. | |
C<$root> and C<$dest> are in Zip (Unix) format. | |
C<$volume> is in local file system format. | |
For instance, under Windows, | |
$zip->extractTree( 'a', 'd/e', 'f:' ); | |
when applied to a zip containing the files: | |
a/x a/b/c ax/d/e d/e will extract: | |
a/x to f:d/e/x | |
a/b/c to f:d/e/b/c and ignore ax/d/e and d/e | |
If you want absolute paths (the prior example used paths relative to | |
the current directory on the destination volume, you can specify these | |
in C<$dest>: | |
$zip->extractTree( 'a', '/d/e', 'f:' ); | |
when applied to a zip containing the files: | |
a/x a/b/c ax/d/e d/e will extract: | |
a/x to f:\d\e\x | |
a/b/c to f:\d\e\b\c and ignore ax/d/e and d/e | |
If the path to the extracted file traverses a parent directory or a symbolic | |
link, the extraction will be aborted with C<AC_ERROR> for security reason. | |
Returns an error code or AZ_OK if everything worked OK. | |
=back | |
=head1 Archive::Zip Global Variables | |
=over 4 | |
=item $Archive::Zip::UNICODE | |
This variable governs how Unicode file and directory names are added | |
to or extracted from an archive. If set, file and directory names are considered | |
to be UTF-8 encoded. This is I<EXPERIMENTAL AND BUGGY (there are some edge cases | |
on Win32)>. Please report problems. | |
{ | |
local $Archive::Zip::UNICODE = 1; | |
$zip->addFile('Déjà vu.txt'); | |
} | |
=back | |
=head1 MEMBER OPERATIONS | |
=head2 Member Class Methods | |
Several constructors allow you to construct members without adding | |
them to a zip archive. These work the same as the addFile(), | |
addDirectory(), and addString() zip instance methods described above, | |
but they don't add the new members to a zip. | |
=over 4 | |
=item Archive::Zip::Member->newFromString( $stringOrStringRef [, $fileName ] ) | |
=item Archive::Zip::Member->newFromString( { string => $stringOrStringRef | |
[, zipName => $fileName ] ) | |
Construct a new member from the given string. Returns undef | |
on error. | |
my $member = Archive::Zip::Member->newFromString( 'This is a test' ); | |
my $member = Archive::Zip::Member->newFromString( 'This is a test', 'test.txt' ); | |
my $member = Archive::Zip::Member->newFromString( { string => 'This is a test', zipName => 'test.txt' } ); | |
=item newFromFile( $fileName [, $zipName ] ) | |
=item newFromFile( { filename => $fileName [, zipName => $zipName ] } ) | |
Construct a new member from the given file. Returns undef on | |
error. | |
my $member = Archive::Zip::Member->newFromFile( 'xyz.txt' ); | |
=item newDirectoryNamed( $directoryName [, $zipname ] ) | |
=item newDirectoryNamed( { directoryName => $directoryName | |
[, zipName => $zipname ] } ) | |
Construct a new member from the given directory. | |
C<$directoryName> must be a valid name on your file system; it does not | |
have to exist. | |
If given, C<$zipname> will be the name of the zip member; it must be a | |
valid Zip (Unix) name. If not given, it will be converted from | |
C<$directoryName>. | |
Returns undef on error. | |
my $member = Archive::Zip::Member->newDirectoryNamed( 'CVS/' ); | |
=back | |
=head2 Member Simple Accessors | |
These methods get (and/or set) member attribute values. | |
The zip64 format requires parts of the member data to be stored | |
in the so-called extra fields. You cannot get nor set this zip64 | |
data through the extra field accessors described in this section. | |
In fact, the low-level member methods ensure that the zip64 data | |
in the extra fields is handled completely transparently and | |
invisibly to the user when members are read or written. | |
=over 4 | |
=item zip64() | |
Returns whether the previous read or write of the member has been | |
done in zip64 format. | |
=item desiredZip64Mode() | |
Gets or sets whether the member's headers should be written in | |
zip64 format: As needed (ZIP64_AS_NEEDED), the default, or always | |
(ZIP64_HEADERS). | |
=item versionMadeBy() | |
Gets the field from the member header. | |
=item fileAttributeFormat( [ $format ] ) | |
=item fileAttributeFormat( [ { format => $format ] } ) | |
Gets or sets the field from the member header. These are | |
C<FA_*> values. | |
=item versionNeededToExtract() | |
Gets the field from the member header. | |
=item bitFlag() | |
Gets the general purpose bit field from the member header. | |
This is where the C<GPBF_*> bits live. | |
=item compressionMethod() | |
Returns the member compression method. This is the method | |
that is currently being used to compress the member data. | |
This will be COMPRESSION_STORED for added string or file | |
members, or any of the C<COMPRESSION_*> values for members | |
from a zip file. However, this module can only handle members | |
whose data is in COMPRESSION_STORED or COMPRESSION_DEFLATED | |
format. | |
=item desiredCompressionMethod( [ $method ] ) | |
=item desiredCompressionMethod( [ { compressionMethod => $method } ] ) | |
Get or set the member's C<desiredCompressionMethod>. This is | |
the compression method that will be used when the member is | |
written. Returns prior desiredCompressionMethod. Only | |
COMPRESSION_DEFLATED or COMPRESSION_STORED are valid | |
arguments. Changing to COMPRESSION_STORED will change the | |
member desiredCompressionLevel to 0; changing to | |
COMPRESSION_DEFLATED will change the member | |
desiredCompressionLevel to COMPRESSION_LEVEL_DEFAULT. | |
=item desiredCompressionLevel( [ $level ] ) | |
=item desiredCompressionLevel( [ { compressionLevel => $level } ] ) | |
Get or set the member's desiredCompressionLevel This is the | |
method that will be used to write. Returns prior | |
desiredCompressionLevel. Valid arguments are 0 through 9, | |
COMPRESSION_LEVEL_NONE, COMPRESSION_LEVEL_DEFAULT, | |
COMPRESSION_LEVEL_BEST_COMPRESSION, and | |
COMPRESSION_LEVEL_FASTEST. 0 or COMPRESSION_LEVEL_NONE will | |
change the desiredCompressionMethod to COMPRESSION_STORED. | |
All other arguments will change the desiredCompressionMethod | |
to COMPRESSION_DEFLATED. | |
=item externalFileName() | |
Return the member's external file name, if any, or undef. | |
=item fileName() | |
Get or set the member's internal filename. Returns the | |
(possibly new) filename. Names will have backslashes | |
converted to forward slashes, and will have multiple | |
consecutive slashes converted to single ones. | |
=item lastModFileDateTime() | |
Return the member's last modification date/time stamp in | |
MS-DOS format. | |
=item lastModTime() | |
Return the member's last modification date/time stamp, | |
converted to unix localtime format. | |
print "Mod Time: " . scalar( localtime( $member->lastModTime() ) ); | |
=item setLastModFileDateTimeFromUnix() | |
Set the member's lastModFileDateTime from the given unix | |
time. | |
$member->setLastModFileDateTimeFromUnix( time() ); | |
=item internalFileAttributes() | |
Return the internal file attributes field from the zip | |
header. This is only set for members read from a zip file. | |
=item externalFileAttributes() | |
Return member attributes as read from the ZIP file. Note that | |
these are NOT UNIX! | |
=item unixFileAttributes( [ $newAttributes ] ) | |
=item unixFileAttributes( [ { attributes => $newAttributes } ] ) | |
Get or set the member's file attributes using UNIX file | |
attributes. Returns old attributes. | |
my $oldAttribs = $member->unixFileAttributes( 0666 ); | |
Note that the return value has more than just the file | |
permissions, so you will have to mask off the lowest bits for | |
comparisons. | |
=item localExtraField( [ $newField ] ) | |
=item localExtraField( [ { field => $newField } ] ) | |
Gets or sets the extra field that was read from the local | |
header. The extra field must be in the proper format. If it is | |
not or if the new field contains data related to the zip64 | |
format, this method does not modify the extra field and returns | |
AZ_FORMAT_ERROR, otherwise it returns AZ_OK. | |
=item cdExtraField( [ $newField ] ) | |
=item cdExtraField( [ { field => $newField } ] ) | |
Gets or sets the extra field that was read from the central | |
directory header. The extra field must be in the proper format. | |
If it is not or if the new field contains data related to the | |
zip64 format, this method does not modify the extra field and | |
returns AZ_FORMAT_ERROR, otherwise it returns AZ_OK. | |
=item extraFields() | |
Return both local and CD extra fields, concatenated. | |
=item fileComment( [ $newComment ] ) | |
=item fileComment( [ { comment => $newComment } ] ) | |
Get or set the member's file comment. | |
=item hasDataDescriptor() | |
Get or set the data descriptor flag. If this is set, the | |
local header will not necessarily have the correct data | |
sizes. Instead, a small structure will be stored at the end | |
of the member data with these values. This should be | |
transparent in normal operation. | |
=item crc32() | |
Return the CRC-32 value for this member. This will not be set | |
for members that were constructed from strings or external | |
files until after the member has been written. | |
=item crc32String() | |
Return the CRC-32 value for this member as an 8 character | |
printable hex string. This will not be set for members that | |
were constructed from strings or external files until after | |
the member has been written. | |
=item compressedSize() | |
Return the compressed size for this member. This will not be | |
set for members that were constructed from strings or | |
external files until after the member has been written. | |
=item uncompressedSize() | |
Return the uncompressed size for this member. | |
=item password( [ $password ] ) | |
Returns the password for this member to be used on decryption. | |
If $password is given, it will set the password for the decryption. | |
=item isEncrypted() | |
Return true if this member is encrypted. The Archive::Zip | |
module does not currently support creation of encrypted | |
members. Decryption works more or less like this: | |
my $zip = Archive::Zip->new; | |
$zip->read ("encrypted.zip"); | |
for my $m (map { $zip->memberNamed ($_) } $zip->memberNames) { | |
$m->password ("secret"); | |
$m->contents; # is "" when password was wrong | |
That shows that the password has to be set per member, and not per | |
archive. This might change in the future. | |
=item isTextFile( [ $flag ] ) | |
=item isTextFile( [ { flag => $flag } ] ) | |
Returns true if I am a text file. Also can set the status if | |
given an argument (then returns old state). Note that this | |
module does not currently do anything with this flag upon | |
extraction or storage. That is, bytes are stored in native | |
format whether or not they came from a text file. | |
=item isBinaryFile() | |
Returns true if I am a binary file. Also can set the status | |
if given an argument (then returns old state). Note that this | |
module does not currently do anything with this flag upon | |
extraction or storage. That is, bytes are stored in native | |
format whether or not they came from a text file. | |
=item extractToFileNamed( $fileName ) | |
=item extractToFileNamed( { name => $fileName } ) | |
Extract me to a file with the given name. The file will be | |
created with default modes. Directories will be created as | |
needed. | |
The C<$fileName> argument should be a valid file name on your | |
file system. | |
Returns AZ_OK on success. | |
=item isDirectory() | |
Returns true if I am a directory. | |
=item isSymbolicLink() | |
Returns true if I am a symbolic link. | |
=item writeLocalHeaderRelativeOffset() | |
Returns the file offset in bytes the last time I was written. | |
=item wasWritten() | |
Returns true if I was successfully written. Reset at the | |
beginning of a write attempt. | |
=back | |
=head2 Low-level member data reading | |
It is possible to use lower-level routines to access member data | |
streams, rather than the extract* methods and contents(). For | |
instance, here is how to print the uncompressed contents of a member | |
in chunks using these methods: | |
my ( $member, $status, $bufferRef ); | |
$member = $zip->memberNamed( 'xyz.txt' ); | |
$member->desiredCompressionMethod( COMPRESSION_STORED ); | |
$status = $member->rewindData(); | |
die "error $status" unless $status == AZ_OK; | |
while ( ! $member->readIsDone() ) | |
{ | |
( $bufferRef, $status ) = $member->readChunk(); | |
die "error $status" | |
if $status != AZ_OK && $status != AZ_STREAM_END; | |
# do something with $bufferRef: | |
print $$bufferRef; | |
} | |
$member->endRead(); | |
=over 4 | |
=item readChunk( [ $chunkSize ] ) | |
=item readChunk( [ { chunkSize => $chunkSize } ] ) | |
This reads the next chunk of given size from the member's | |
data stream and compresses or uncompresses it as necessary, | |
returning a reference to the bytes read and a status. If size | |
argument is not given, defaults to global set by | |
Archive::Zip::setChunkSize. Status is AZ_OK on success until | |
the last chunk, where it returns AZ_STREAM_END. Returns C<( | |
\$bytes, $status)>. | |
my ( $outRef, $status ) = $self->readChunk(); | |
print $$outRef if $status != AZ_OK && $status != AZ_STREAM_END; | |
=item rewindData() | |
Rewind data and set up for reading data streams or writing | |
zip files. Can take options for C<inflateInit()> or | |
C<deflateInit()>, but this is not likely to be necessary. | |
Subclass overrides should call this method. Returns C<AZ_OK> | |
on success. | |
=item endRead() | |
Reset the read variables and free the inflater or deflater. | |
Must be called to close files, etc. Returns AZ_OK on success. | |
=item readIsDone() | |
Return true if the read has run out of data or encountered an error. | |
=item contents() | |
Return the entire uncompressed member data or undef in scalar | |
context. When called in array context, returns C<( $string, | |
$status )>; status will be AZ_OK on success: | |
my $string = $member->contents(); | |
# or | |
my ( $string, $status ) = $member->contents(); | |
die "error $status" unless $status == AZ_OK; | |
Can also be used to set the contents of a member (this may | |
change the class of the member): | |
$member->contents( "this is my new contents" ); | |
=item extractToFileHandle( $fh ) | |
=item extractToFileHandle( { fileHandle => $fh } ) | |
Extract (and uncompress, if necessary) the member's contents | |
to the given file handle. Return AZ_OK on success. | |
For members representing symbolic links, pass the name of the | |
symbolic link as file handle. Ensure that all directories in the | |
path to the symbolic link already exist. | |
=back | |
=head1 Archive::Zip::FileMember methods | |
The Archive::Zip::FileMember class extends Archive::Zip::Member. It is the | |
base class for both ZipFileMember and NewFileMember classes. This class adds | |
an C<externalFileName> and an C<fh> member to keep track of the external | |
file. | |
=over 4 | |
=item externalFileName() | |
Return the member's external filename. | |
=item fh() | |
Return the member's read file handle. Automatically opens file if | |
necessary. | |
=back | |
=head1 Archive::Zip::ZipFileMember methods | |
The Archive::Zip::ZipFileMember class represents members that have been read | |
from external zip files. | |
=over 4 | |
=item diskNumberStart() | |
Returns the disk number that the member's local header resides in. | |
Should be 0. | |
=item localHeaderRelativeOffset() | |
Returns the offset into the zip file where the member's local header | |
is. | |
=item dataOffset() | |
Returns the offset from the beginning of the zip file to the member's | |
data. | |
=back | |
=head1 REQUIRED MODULES | |
L<Archive::Zip> requires several other modules: | |
L<Carp> | |
L<Compress::Raw::Zlib> | |
L<Cwd> | |
L<File::Basename> | |
L<File::Copy> | |
L<File::Find> | |
L<File::Path> | |
L<File::Spec> | |
L<IO::File> | |
L<IO::Seekable> | |
L<Time::Local> | |
=head1 BUGS AND CAVEATS | |
=head2 When not to use Archive::Zip | |
If you are just going to be extracting zips (and/or other archives) you | |
are recommended to look at using L<Archive::Extract> instead, as it is much | |
easier to use and factors out archive-specific functionality. | |
=head2 Zip64 Format Support | |
Since version 1.66 Archive::Zip supports the so-called zip64 | |
format, which overcomes various limitations in the original zip | |
file format. On some Perl interpreters, however, even version | |
1.66 and newer of Archive::Zip cannot support the zip64 format. | |
Among these are all Perl interpreters that lack 64-bit support | |
and those older than version 5.10.0. | |
Constant C<ZIP64_SUPPORTED>, exported with tag L<:CONSTANTS>, | |
equals true if Archive::Zip on the current Perl interpreter | |
supports the zip64 format. If it does not and you try to read or | |
write an archive in zip64 format, anyway, Archive::Zip returns an | |
error C<AZ_ERROR> and reports an error message along the lines of | |
"zip64 format not supported on this Perl interpreter". | |
=head2 C<versionMadeBy> and C<versionNeededToExtract> | |
The zip64 format and the zip file format in general specify what | |
values to use for the C<versionMadeBy> and | |
C<versionNeededToExtract> fields in the local file header, | |
central directory file header, and zip64 EOCD record. In | |
practice however, these fields seem to be more or less randomly | |
used by various archiver implementations. | |
To achieve a compromise between backward compatibility and | |
(whatever) standard compliance, Archive::Zip handles them as | |
follows: | |
=over 4 | |
=item | |
For field C<versionMadeBy>, Archive::Zip uses default value 20 | |
(45 for the zip64 EOCD record) or any previously read value. It | |
never changes that value when writing a header, even if it is | |
written in zip64 format, or when writing the zip64 EOCD record. | |
=item | |
Likewise for field C<versionNeededToExtract>, but here | |
Archive::Zip forces a minimum value of 45 when writing a header | |
in zip64 format or the zip64 EOCD record. | |
=item | |
Finally, Archive::Zip never depends on the values of these fields | |
in any way when reading an archive from a file or file handle. | |
=back | |
=head2 Try to avoid IO::Scalar | |
One of the most common ways to use Archive::Zip is to generate Zip files | |
in-memory. Most people use L<IO::Scalar> for this purpose. | |
Unfortunately, as of 1.11 this module no longer works with L<IO::Scalar> | |
as it incorrectly implements seeking. | |
Anybody using L<IO::Scalar> should consider porting to L<IO::String>, | |
which is smaller, lighter, and is implemented to be perfectly compatible | |
with regular seekable filehandles. | |
Support for L<IO::Scalar> most likely will B<not> be restored in the | |
future, as L<IO::Scalar> itself cannot change the way it is implemented | |
due to back-compatibility issues. | |
=head2 Wrong password for encrypted members | |
When an encrypted member is read using the wrong password, you currently | |
have to re-read the entire archive to try again with the correct password. | |
=head1 TO DO | |
* auto-choosing storing vs compression | |
* extra field hooks (see notes.txt) | |
* check for duplicates on addition/renaming? | |
* Text file extraction (line end translation) | |
* Reading zip files from non-seekable inputs | |
(Perhaps by proxying through IO::String?) | |
* separate unused constants into separate module | |
* cookbook style docs | |
* Handle tainted paths correctly | |
* Work on better compatibility with other IO:: modules | |
* Support encryption | |
* More user-friendly decryption | |
=head1 SUPPORT | |
Bugs should be reported on GitHub | |
L<https://github.com/redhotpenguin/perl-Archive-Zip/issues> | |
For other issues contact the maintainer. | |
=head1 AUTHOR | |
Currently maintained by Fred Moyer <fred@redhotpenguin.com> | |
Previously maintained by Adam Kennedy <adamk@cpan.org> | |
Previously maintained by Steve Peters E<lt>steve@fisharerojo.orgE<gt>. | |
File attributes code by Maurice Aubrey E<lt>maurice@lovelyfilth.comE<gt>. | |
Originally by Ned Konz E<lt>nedkonz@cpan.orgE<gt>. | |
=head1 COPYRIGHT | |
Some parts copyright 2006 - 2012 Adam Kennedy. | |
Some parts copyright 2005 Steve Peters. | |
Original work copyright 2000 - 2004 Ned Konz. | |
This program is free software; you can redistribute it and/or modify | |
it under the same terms as Perl itself. | |
=head1 SEE ALSO | |
Look at L<Archive::Zip::MemberRead> which is a wrapper that allows one to | |
read Zip archive members as if they were files. | |
L<Compress::Raw::Zlib>, L<Archive::Tar>, L<Archive::Extract> | |
=cut | |
ARCHIVE_ZIP | |
$fatpacked{"Archive/Zip/Archive.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_ARCHIVE'; | |
package Archive::Zip::Archive; | |
# Represents a generic ZIP archive | |
use strict; | |
use File::Path; | |
use File::Find (); | |
use File::Spec (); | |
use File::Copy (); | |
use File::Basename; | |
use Cwd; | |
use Encode qw(encode_utf8 decode_utf8); | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw( Archive::Zip ); | |
} | |
use Archive::Zip qw( | |
:CONSTANTS | |
:ERROR_CODES | |
:PKZIP_CONSTANTS | |
:UTILITY_METHODS | |
); | |
our $UNICODE; | |
our $UNTAINT = qr/\A(.+)\z/; | |
# Note that this returns undef on read errors, else new zip object. | |
sub new { | |
my $class = shift; | |
# Info-Zip 3.0 (I guess) seems to use the following values | |
# for the version fields in the zip64 EOCD record: | |
# | |
# version made by: | |
# 30 (plus upper byte indicating host system) | |
# | |
# version needed to extract: | |
# 45 | |
my $self = bless( | |
{ | |
'zip64' => 0, | |
'desiredZip64Mode' => ZIP64_AS_NEEDED, | |
'versionMadeBy' => 0, | |
'versionNeededToExtract' => 0, | |
'diskNumber' => 0, | |
'diskNumberWithStartOfCentralDirectory' => | |
0, | |
'numberOfCentralDirectoriesOnThisDisk' => | |
0, # should be # of members | |
'numberOfCentralDirectories' => 0, # should be # of members | |
'centralDirectorySize' => 0, # must re-compute on write | |
'centralDirectoryOffsetWRTStartingDiskNumber' => | |
0, # must re-compute | |
'writeEOCDOffset' => 0, | |
'writeCentralDirectoryOffset' => 0, | |
'zipfileComment' => '', | |
'eocdOffset' => 0, | |
'fileName' => '' | |
}, | |
$class | |
); | |
$self->{'members'} = []; | |
my $fileName = (ref($_[0]) eq 'HASH') ? shift->{filename} : shift; | |
if ($fileName) { | |
my $status = $self->read($fileName); | |
return $status == AZ_OK ? $self : undef; | |
} | |
return $self; | |
} | |
sub storeSymbolicLink { | |
my $self = shift; | |
$self->{'storeSymbolicLink'} = shift; | |
} | |
sub members { | |
@{shift->{'members'}}; | |
} | |
sub numberOfMembers { | |
scalar(shift->members()); | |
} | |
sub memberNames { | |
my $self = shift; | |
return map { $_->fileName() } $self->members(); | |
} | |
# return ref to member with given name or undef | |
sub memberNamed { | |
my $self = shift; | |
my $fileName = (ref($_[0]) eq 'HASH') ? shift->{zipName} : shift; | |
foreach my $member ($self->members()) { | |
return $member if $member->fileName() eq $fileName; | |
} | |
return undef; | |
} | |
sub membersMatching { | |
my $self = shift; | |
my $pattern = (ref($_[0]) eq 'HASH') ? shift->{regex} : shift; | |
return grep { $_->fileName() =~ /$pattern/ } $self->members(); | |
} | |
sub zip64 { | |
shift->{'zip64'}; | |
} | |
sub desiredZip64Mode { | |
my $self = shift; | |
my $desiredZip64Mode = $self->{'desiredZip64Mode'}; | |
if (@_) { | |
$self->{'desiredZip64Mode'} = | |
ref($_[0]) eq 'HASH' ? shift->{desiredZip64Mode} : shift; | |
} | |
return $desiredZip64Mode; | |
} | |
sub versionMadeBy { | |
shift->{'versionMadeBy'}; | |
} | |
sub versionNeededToExtract { | |
shift->{'versionNeededToExtract'}; | |
} | |
sub diskNumber { | |
shift->{'diskNumber'}; | |
} | |
sub diskNumberWithStartOfCentralDirectory { | |
shift->{'diskNumberWithStartOfCentralDirectory'}; | |
} | |
sub numberOfCentralDirectoriesOnThisDisk { | |
shift->{'numberOfCentralDirectoriesOnThisDisk'}; | |
} | |
sub numberOfCentralDirectories { | |
shift->{'numberOfCentralDirectories'}; | |
} | |
sub centralDirectorySize { | |
shift->{'centralDirectorySize'}; | |
} | |
sub centralDirectoryOffsetWRTStartingDiskNumber { | |
shift->{'centralDirectoryOffsetWRTStartingDiskNumber'}; | |
} | |
sub zipfileComment { | |
my $self = shift; | |
my $comment = $self->{'zipfileComment'}; | |
if (@_) { | |
my $new_comment = (ref($_[0]) eq 'HASH') ? shift->{comment} : shift; | |
$self->{'zipfileComment'} = pack('C0a*', $new_comment); # avoid Unicode | |
} | |
return $comment; | |
} | |
sub eocdOffset { | |
shift->{'eocdOffset'}; | |
} | |
# Return the name of the file last read. | |
sub fileName { | |
shift->{'fileName'}; | |
} | |
sub removeMember { | |
my $self = shift; | |
my $member = (ref($_[0]) eq 'HASH') ? shift->{memberOrZipName} : shift; | |
$member = $self->memberNamed($member) unless ref($member); | |
return undef unless $member; | |
my @newMembers = grep { $_ != $member } $self->members(); | |
$self->{'members'} = \@newMembers; | |
return $member; | |
} | |
sub replaceMember { | |
my $self = shift; | |
my ($oldMember, $newMember); | |
if (ref($_[0]) eq 'HASH') { | |
$oldMember = $_[0]->{memberOrZipName}; | |
$newMember = $_[0]->{newMember}; | |
} else { | |
($oldMember, $newMember) = @_; | |
} | |
$oldMember = $self->memberNamed($oldMember) unless ref($oldMember); | |
return undef unless $oldMember; | |
return undef unless $newMember; | |
my @newMembers = | |
map { ($_ == $oldMember) ? $newMember : $_ } $self->members(); | |
$self->{'members'} = \@newMembers; | |
return $oldMember; | |
} | |
sub extractMember { | |
my $self = shift; | |
my ($member, $name); | |
if (ref($_[0]) eq 'HASH') { | |
$member = $_[0]->{memberOrZipName}; | |
$name = $_[0]->{name}; | |
} else { | |
($member, $name) = @_; | |
} | |
$member = $self->memberNamed($member) unless ref($member); | |
return _error('member not found') unless $member; | |
my $originalSize = $member->compressedSize(); | |
my ($volumeName, $dirName, $fileName); | |
if (defined($name)) { | |
($volumeName, $dirName, $fileName) = File::Spec->splitpath($name); | |
$dirName = File::Spec->catpath($volumeName, $dirName, ''); | |
} else { | |
$name = $member->fileName(); | |
if ((my $ret = _extractionNameIsSafe($name)) | |
!= AZ_OK) { return $ret; } | |
($dirName = $name) =~ s{[^/]*$}{}; | |
$dirName = Archive::Zip::_asLocalName($dirName); | |
$name = Archive::Zip::_asLocalName($name); | |
} | |
if ($dirName && !-d $dirName) { | |
mkpath($dirName); | |
return _ioError("can't create dir $dirName") if (!-d $dirName); | |
} | |
my $rc = $member->extractToFileNamed($name, @_); | |
# TODO refactor this fix into extractToFileNamed() | |
$member->{'compressedSize'} = $originalSize; | |
return $rc; | |
} | |
sub extractMemberWithoutPaths { | |
my $self = shift; | |
my ($member, $name); | |
if (ref($_[0]) eq 'HASH') { | |
$member = $_[0]->{memberOrZipName}; | |
$name = $_[0]->{name}; | |
} else { | |
($member, $name) = @_; | |
} | |
$member = $self->memberNamed($member) unless ref($member); | |
return _error('member not found') unless $member; | |
my $originalSize = $member->compressedSize(); | |
return AZ_OK if $member->isDirectory(); | |
unless ($name) { | |
$name = $member->fileName(); | |
$name =~ s{.*/}{}; # strip off directories, if any | |
if ((my $ret = _extractionNameIsSafe($name)) | |
!= AZ_OK) { return $ret; } | |
$name = Archive::Zip::_asLocalName($name); | |
} | |
my $rc = $member->extractToFileNamed($name, @_); | |
$member->{'compressedSize'} = $originalSize; | |
return $rc; | |
} | |
sub addMember { | |
my $self = shift; | |
my $newMember = (ref($_[0]) eq 'HASH') ? shift->{member} : shift; | |
push(@{$self->{'members'}}, $newMember) if $newMember; | |
if($newMember && ($newMember->{bitFlag} & 0x800) | |
&& !utf8::is_utf8($newMember->{fileName})){ | |
$newMember->{fileName} = Encode::decode_utf8($newMember->{fileName}); | |
} | |
return $newMember; | |
} | |
sub addFile { | |
my $self = shift; | |
my ($fileName, $newName, $compressionLevel); | |
if (ref($_[0]) eq 'HASH') { | |
$fileName = $_[0]->{filename}; | |
$newName = $_[0]->{zipName}; | |
$compressionLevel = $_[0]->{compressionLevel}; | |
} else { | |
($fileName, $newName, $compressionLevel) = @_; | |
} | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
$fileName = Win32::GetANSIPathName($fileName); | |
} | |
my $newMember = Archive::Zip::Member->newFromFile($fileName, $newName); | |
$newMember->desiredCompressionLevel($compressionLevel); | |
if ($self->{'storeSymbolicLink'} && -l $fileName) { | |
my $newMember = | |
Archive::Zip::Member->newFromString(readlink $fileName, $newName); | |
# For symbolic links, External File Attribute is set to 0xA1FF0000 by Info-ZIP | |
$newMember->{'externalFileAttributes'} = 0xA1FF0000; | |
$self->addMember($newMember); | |
} else { | |
$self->addMember($newMember); | |
} | |
return $newMember; | |
} | |
sub addString { | |
my $self = shift; | |
my ($stringOrStringRef, $name, $compressionLevel); | |
if (ref($_[0]) eq 'HASH') { | |
$stringOrStringRef = $_[0]->{string}; | |
$name = $_[0]->{zipName}; | |
$compressionLevel = $_[0]->{compressionLevel}; | |
} else { | |
($stringOrStringRef, $name, $compressionLevel) = @_; | |
} | |
my $newMember = | |
Archive::Zip::Member->newFromString($stringOrStringRef, $name); | |
$newMember->desiredCompressionLevel($compressionLevel); | |
return $self->addMember($newMember); | |
} | |
sub addDirectory { | |
my $self = shift; | |
my ($name, $newName); | |
if (ref($_[0]) eq 'HASH') { | |
$name = $_[0]->{directoryName}; | |
$newName = $_[0]->{zipName}; | |
} else { | |
($name, $newName) = @_; | |
} | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
$name = Win32::GetANSIPathName($name); | |
} | |
my $newMember = Archive::Zip::Member->newDirectoryNamed($name, $newName); | |
if ($self->{'storeSymbolicLink'} && -l $name) { | |
my $link = readlink $name; | |
($newName =~ s{/$}{}) if $newName; # Strip trailing / | |
my $newMember = Archive::Zip::Member->newFromString($link, $newName); | |
# For symbolic links, External File Attribute is set to 0xA1FF0000 by Info-ZIP | |
$newMember->{'externalFileAttributes'} = 0xA1FF0000; | |
$self->addMember($newMember); | |
} else { | |
$self->addMember($newMember); | |
} | |
return $newMember; | |
} | |
# add either a file or a directory. | |
sub addFileOrDirectory { | |
my $self = shift; | |
my ($name, $newName, $compressionLevel); | |
if (ref($_[0]) eq 'HASH') { | |
$name = $_[0]->{name}; | |
$newName = $_[0]->{zipName}; | |
$compressionLevel = $_[0]->{compressionLevel}; | |
} else { | |
($name, $newName, $compressionLevel) = @_; | |
} | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
$name = Win32::GetANSIPathName($name); | |
} | |
$name =~ s{/$}{}; | |
if ($newName) { | |
$newName =~ s{/$}{}; | |
} else { | |
$newName = $name; | |
} | |
if (-f $name) { | |
return $self->addFile($name, $newName, $compressionLevel); | |
} elsif (-d $name) { | |
return $self->addDirectory($name, $newName); | |
} else { | |
return _error("$name is neither a file nor a directory"); | |
} | |
} | |
sub contents { | |
my $self = shift; | |
my ($member, $newContents); | |
if (ref($_[0]) eq 'HASH') { | |
$member = $_[0]->{memberOrZipName}; | |
$newContents = $_[0]->{contents}; | |
} else { | |
($member, $newContents) = @_; | |
} | |
my ($contents, $status) = (undef, AZ_OK); | |
if ($status == AZ_OK) { | |
$status = _error('No member name given') unless defined($member); | |
} | |
if ($status == AZ_OK && ! ref($member)) { | |
my $memberName = $member; | |
$member = $self->memberNamed($memberName); | |
$status = _error('No member named $memberName') unless defined($member); | |
} | |
if ($status == AZ_OK) { | |
($contents, $status) = $member->contents($newContents); | |
} | |
return | |
wantarray | |
? ($contents, $status) | |
: $contents; | |
} | |
sub writeToFileNamed { | |
my $self = shift; | |
my $fileName = | |
(ref($_[0]) eq 'HASH') ? shift->{filename} : shift; # local FS format | |
foreach my $member ($self->members()) { | |
if ($member->_usesFileNamed($fileName)) { | |
return _error("$fileName is needed by member " | |
. $member->fileName() | |
. "; consider using overwrite() or overwriteAs() instead."); | |
} | |
} | |
my ($status, $fh) = _newFileHandle($fileName, 'w'); | |
return _ioError("Can't open $fileName for write") unless $status; | |
$status = $self->writeToFileHandle($fh, 1); | |
$fh->close(); | |
$fh = undef; | |
return $status; | |
} | |
# It is possible to write data to the FH before calling this, | |
# perhaps to make a self-extracting archive. | |
sub writeToFileHandle { | |
my $self = shift; | |
my ($fh, $fhIsSeekable); | |
if (ref($_[0]) eq 'HASH') { | |
$fh = $_[0]->{fileHandle}; | |
$fhIsSeekable = | |
exists($_[0]->{seek}) ? $_[0]->{seek} : _isSeekable($fh); | |
} else { | |
$fh = shift; | |
$fhIsSeekable = @_ ? shift : _isSeekable($fh); | |
} | |
return _error('No filehandle given') unless $fh; | |
return _ioError('filehandle not open') unless $fh->opened(); | |
_binmode($fh); | |
# Find out where the current position is. | |
my $offset = $fhIsSeekable ? $fh->tell() : 0; | |
$offset = 0 if $offset < 0; | |
# (Re-)set the "was-successfully-written" flag so that the | |
# contract advertised in the documentation ("that member and | |
# *all following it* will return false from wasWritten()") | |
# also holds for members written more than once. | |
# | |
# Not sure whether that mechanism works, anyway. If method | |
# $member->_writeToFileHandle fails with an error below and | |
# user continues with calling $zip->writeCentralDirectory | |
# manually, we should end up with the following picture | |
# unless the user seeks back to writeCentralDirectoryOffset: | |
# | |
# ... | |
# [last successfully written member] | |
# <- writeCentralDirectoryOffset points here | |
# [half-written member junk with unknown size] | |
# [central directory entry 0] | |
# ... | |
foreach my $member ($self->members()) { | |
$member->{'wasWritten'} = 0; | |
} | |
foreach my $member ($self->members()) { | |
# (Re-)set object member zip64 flag. Here is what | |
# happens next to that flag: | |
# | |
# $member->_writeToFileHandle | |
# Determines a local flag value depending on | |
# necessity and user desire and ors it to | |
# the object member | |
# $member->_writeLocalFileHeader | |
# Queries the object member to write appropriate | |
# local header | |
# $member->_writeDataDescriptor | |
# Queries the object member to write appropriate | |
# data descriptor | |
# $member->_writeCentralDirectoryFileHeader | |
# Determines a local flag value depending on | |
# necessity and user desire. Writes a central | |
# directory header appropriate to the local flag. | |
# Ors the local flag to the object member. | |
$member->{'zip64'} = 0; | |
my ($status, $memberSize) = | |
$member->_writeToFileHandle($fh, $fhIsSeekable, $offset, | |
$self->desiredZip64Mode()); | |
$member->endRead(); | |
return $status if $status != AZ_OK; | |
$offset += $memberSize; | |
# Change this so it reflects write status and last | |
# successful position | |
$member->{'wasWritten'} = 1; | |
$self->{'writeCentralDirectoryOffset'} = $offset; | |
} | |
return $self->writeCentralDirectory($fh); | |
} | |
# Write zip back to the original file, | |
# as safely as possible. | |
# Returns AZ_OK if successful. | |
sub overwrite { | |
my $self = shift; | |
return $self->overwriteAs($self->{'fileName'}); | |
} | |
# Write zip to the specified file, | |
# as safely as possible. | |
# Returns AZ_OK if successful. | |
sub overwriteAs { | |
my $self = shift; | |
my $zipName = (ref($_[0]) eq 'HASH') ? $_[0]->{filename} : shift; | |
return _error("no filename in overwriteAs()") unless defined($zipName); | |
my ($fh, $tempName) = Archive::Zip::tempFile(); | |
return _error("Can't open temp file", $!) unless $fh; | |
(my $backupName = $zipName) =~ s{(\.[^.]*)?$}{.zbk}; | |
my $status = $self->writeToFileHandle($fh); | |
$fh->close(); | |
$fh = undef; | |
if ($status != AZ_OK) { | |
unlink($tempName); | |
_printError("Can't write to $tempName"); | |
return $status; | |
} | |
my $err; | |
# rename the zip | |
if (-f $zipName && !rename($zipName, $backupName)) { | |
$err = $!; | |
unlink($tempName); | |
return _error("Can't rename $zipName as $backupName", $err); | |
} | |
# move the temp to the original name (possibly copying) | |
unless (File::Copy::move($tempName, $zipName) | |
|| File::Copy::copy($tempName, $zipName)) { | |
$err = $!; | |
rename($backupName, $zipName); | |
unlink($tempName); | |
return _error("Can't move $tempName to $zipName", $err); | |
} | |
# unlink the backup | |
if (-f $backupName && !unlink($backupName)) { | |
$err = $!; | |
return _error("Can't unlink $backupName", $err); | |
} | |
return AZ_OK; | |
} | |
# Used only during writing | |
sub _writeCentralDirectoryOffset { | |
shift->{'writeCentralDirectoryOffset'}; | |
} | |
sub _writeEOCDOffset { | |
shift->{'writeEOCDOffset'}; | |
} | |
# Expects to have _writeEOCDOffset() set | |
sub _writeEndOfCentralDirectory { | |
my ($self, $fh, $membersZip64) = @_; | |
my $zip64 = 0; | |
my $versionMadeBy = $self->versionMadeBy(); | |
my $versionNeededToExtract = $self->versionNeededToExtract(); | |
my $diskNumber = 0; | |
my $diskNumberWithStartOfCentralDirectory = 0; | |
my $numberOfCentralDirectoriesOnThisDisk = $self->numberOfMembers(); | |
my $numberOfCentralDirectories = $self->numberOfMembers(); | |
my $centralDirectorySize = | |
$self->_writeEOCDOffset() - $self->_writeCentralDirectoryOffset(); | |
my $centralDirectoryOffsetWRTStartingDiskNumber = | |
$self->_writeCentralDirectoryOffset(); | |
my $zipfileCommentLength = length($self->zipfileComment()); | |
my $eocdDataZip64 = 0; | |
$eocdDataZip64 ||= $numberOfCentralDirectoriesOnThisDisk > 0xffff; | |
$eocdDataZip64 ||= $numberOfCentralDirectories > 0xffff; | |
$eocdDataZip64 ||= $centralDirectorySize > 0xffffffff; | |
$eocdDataZip64 ||= $centralDirectoryOffsetWRTStartingDiskNumber > 0xffffffff; | |
if ( $membersZip64 | |
|| $eocdDataZip64 | |
|| $self->desiredZip64Mode() == ZIP64_EOCD) { | |
return _zip64NotSupported() unless ZIP64_SUPPORTED; | |
$zip64 = 1; | |
$versionMadeBy = 45 if ($versionMadeBy == 0); | |
$versionNeededToExtract = 45 if ($versionNeededToExtract < 45); | |
$self->_print($fh, ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE_STRING) | |
or return _ioError('writing zip64 EOCD record signature'); | |
my $record = pack( | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT, | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH + | |
SIGNATURE_LENGTH - 12, | |
$versionMadeBy, | |
$versionNeededToExtract, | |
$diskNumber, | |
$diskNumberWithStartOfCentralDirectory, | |
$numberOfCentralDirectoriesOnThisDisk, | |
$numberOfCentralDirectories, | |
$centralDirectorySize, | |
$centralDirectoryOffsetWRTStartingDiskNumber | |
); | |
$self->_print($fh, $record) | |
or return _ioError('writing zip64 EOCD record'); | |
$self->_print($fh, ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE_STRING) | |
or return _ioError('writing zip64 EOCD locator signature'); | |
my $locator = pack( | |
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT, | |
0, | |
$self->_writeEOCDOffset(), | |
1 | |
); | |
$self->_print($fh, $locator) | |
or return _ioError('writing zip64 EOCD locator'); | |
} | |
$self->_print($fh, END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING) | |
or return _ioError('writing EOCD Signature'); | |
my $header = pack( | |
END_OF_CENTRAL_DIRECTORY_FORMAT, | |
$diskNumber, | |
$diskNumberWithStartOfCentralDirectory, | |
$numberOfCentralDirectoriesOnThisDisk > 0xffff | |
? 0xffff : $numberOfCentralDirectoriesOnThisDisk, | |
$numberOfCentralDirectories > 0xffff | |
? 0xffff : $numberOfCentralDirectories, | |
$centralDirectorySize > 0xffffffff | |
? 0xffffffff : $centralDirectorySize, | |
$centralDirectoryOffsetWRTStartingDiskNumber > 0xffffffff | |
? 0xffffffff : $centralDirectoryOffsetWRTStartingDiskNumber, | |
$zipfileCommentLength | |
); | |
$self->_print($fh, $header) | |
or return _ioError('writing EOCD header'); | |
if ($zipfileCommentLength) { | |
$self->_print($fh, $self->zipfileComment()) | |
or return _ioError('writing zipfile comment'); | |
} | |
# Adjust object members related to zip64 format | |
$self->{'zip64'} = $zip64; | |
$self->{'versionMadeBy'} = $versionMadeBy; | |
$self->{'versionNeededToExtract'} = $versionNeededToExtract; | |
return AZ_OK; | |
} | |
# $offset can be specified to truncate a zip file. | |
sub writeCentralDirectory { | |
my $self = shift; | |
my ($fh, $offset); | |
if (ref($_[0]) eq 'HASH') { | |
$fh = $_[0]->{fileHandle}; | |
$offset = $_[0]->{offset}; | |
} else { | |
($fh, $offset) = @_; | |
} | |
if (defined($offset)) { | |
$self->{'writeCentralDirectoryOffset'} = $offset; | |
$fh->seek($offset, IO::Seekable::SEEK_SET) | |
or return _ioError('seeking to write central directory'); | |
} else { | |
$offset = $self->_writeCentralDirectoryOffset(); | |
} | |
my $membersZip64 = 0; | |
foreach my $member ($self->members()) { | |
my ($status, $headerSize) = | |
$member->_writeCentralDirectoryFileHeader($fh, $self->desiredZip64Mode()); | |
return $status if $status != AZ_OK; | |
$membersZip64 ||= $member->zip64(); | |
$offset += $headerSize; | |
$self->{'writeEOCDOffset'} = $offset; | |
} | |
return $self->_writeEndOfCentralDirectory($fh, $membersZip64); | |
} | |
sub read { | |
my $self = shift; | |
my $fileName = (ref($_[0]) eq 'HASH') ? shift->{filename} : shift; | |
return _error('No filename given') unless $fileName; | |
my ($status, $fh) = _newFileHandle($fileName, 'r'); | |
return _ioError("opening $fileName for read") unless $status; | |
$status = $self->readFromFileHandle($fh, $fileName); | |
return $status if $status != AZ_OK; | |
$fh->close(); | |
$self->{'fileName'} = $fileName; | |
return AZ_OK; | |
} | |
sub readFromFileHandle { | |
my $self = shift; | |
my ($fh, $fileName); | |
if (ref($_[0]) eq 'HASH') { | |
$fh = $_[0]->{fileHandle}; | |
$fileName = $_[0]->{filename}; | |
} else { | |
($fh, $fileName) = @_; | |
} | |
$fileName = $fh unless defined($fileName); | |
return _error('No filehandle given') unless $fh; | |
return _ioError('filehandle not open') unless $fh->opened(); | |
_binmode($fh); | |
$self->{'fileName'} = "$fh"; | |
# TODO: how to support non-seekable zips? | |
return _error('file not seekable') | |
unless _isSeekable($fh); | |
$fh->seek(0, 0); # rewind the file | |
my $status = $self->_findEndOfCentralDirectory($fh); | |
return $status if $status != AZ_OK; | |
my $eocdPosition; | |
($status, $eocdPosition) = $self->_readEndOfCentralDirectory($fh, $fileName); | |
return $status if $status != AZ_OK; | |
my $zip64 = $self->zip64(); | |
$fh->seek($eocdPosition - $self->centralDirectorySize(), | |
IO::Seekable::SEEK_SET) | |
or return _ioError("Can't seek $fileName"); | |
# Try to detect garbage at beginning of archives | |
# This should be 0 | |
$self->{'eocdOffset'} = $eocdPosition - $self->centralDirectorySize() # here | |
- $self->centralDirectoryOffsetWRTStartingDiskNumber(); | |
for (; ;) { | |
my $newMember = | |
Archive::Zip::Member->_newFromZipFile($fh, $fileName, $zip64, | |
$self->eocdOffset()); | |
my $signature; | |
($status, $signature) = _readSignature($fh, $fileName); | |
return $status if $status != AZ_OK; | |
if (! $zip64) { | |
last if $signature == END_OF_CENTRAL_DIRECTORY_SIGNATURE; | |
} | |
else { | |
last if $signature == ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE; | |
} | |
$status = $newMember->_readCentralDirectoryFileHeader(); | |
return $status if $status != AZ_OK; | |
$status = $newMember->endRead(); | |
return $status if $status != AZ_OK; | |
if ($newMember->isDirectory()) { | |
$newMember->_become('Archive::Zip::DirectoryMember'); | |
# Ensure above call suceeded to avoid future trouble | |
$newMember->_ISA('Archive::Zip::DirectoryMember') or | |
return $self->_error('becoming Archive::Zip::DirectoryMember'); | |
} | |
if(($newMember->{bitFlag} & 0x800) && !utf8::is_utf8($newMember->{fileName})){ | |
$newMember->{fileName} = Encode::decode_utf8($newMember->{fileName}); | |
} | |
push(@{$self->{'members'}}, $newMember); | |
} | |
return AZ_OK; | |
} | |
# Read EOCD, starting from position before signature. | |
# Checks for a zip64 EOCD record and uses that if present. | |
# | |
# Return AZ_OK (in scalar context) or a pair (AZ_OK, | |
# $eocdPosition) (in list context) on success: | |
# ( $status, $eocdPosition ) = $zip->_readEndOfCentralDirectory( $fh, $fileName ); | |
# where the returned EOCD position either points to the beginning | |
# of the EOCD or to the beginning of the zip64 EOCD record. | |
# | |
# APPNOTE.TXT as of version 6.3.6 is a bit vague on the | |
# "ZIP64(tm) format". It has a lot of conditions like "if an | |
# archive is in ZIP64 format", but never explicitly mentions | |
# *when* an archive is in that format. (Or at least I haven't | |
# found it.) | |
# | |
# So I decided that an archive is in ZIP64 format if zip64 EOCD | |
# locator and zip64 EOCD record are present before the EOCD with | |
# the format given in the specification. | |
sub _readEndOfCentralDirectory { | |
my $self = shift; | |
my $fh = shift; | |
my $fileName = shift; | |
# Remember current position, which is just before the EOCD | |
# signature | |
my $eocdPosition = $fh->tell(); | |
# Reset the zip64 format flag | |
$self->{'zip64'} = 0; | |
my $zip64EOCDPosition; | |
# Check for zip64 EOCD locator and zip64 EOCD record. Be | |
# extra careful here to not interpret any random data as | |
# zip64 data structures. If in doubt, silently continue | |
# reading the regular EOCD. | |
NOZIP64: | |
{ | |
# Do not even start looking for any zip64 structures if | |
# that would not be supported. | |
if (! ZIP64_SUPPORTED) { | |
last NOZIP64; | |
} | |
if ($eocdPosition < ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH + SIGNATURE_LENGTH) { | |
last NOZIP64; | |
} | |
# Skip to before potential zip64 EOCD locator | |
$fh->seek(-(ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH) - SIGNATURE_LENGTH, | |
IO::Seekable::SEEK_CUR) | |
or return _ioError("seeking to before zip 64 EOCD locator"); | |
my $zip64EOCDLocatorPosition = | |
$eocdPosition - ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH - SIGNATURE_LENGTH; | |
my $status; | |
my $bytesRead; | |
# Read potential zip64 EOCD locator signature | |
$status = | |
_readSignature($fh, $fileName, | |
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE, 1); | |
return $status if $status == AZ_IO_ERROR; | |
if ($status == AZ_FORMAT_ERROR) { | |
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to EOCD"); | |
last NOZIP64; | |
} | |
# Read potential zip64 EOCD locator and verify it | |
my $locator = ''; | |
$bytesRead = $fh->read($locator, ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH); | |
if ($bytesRead != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH) { | |
return _ioError("reading zip64 EOCD locator"); | |
} | |
(undef, $zip64EOCDPosition, undef) = | |
unpack(ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT, $locator); | |
if ($zip64EOCDPosition > | |
($zip64EOCDLocatorPosition - ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH - SIGNATURE_LENGTH)) { | |
# No need to seek to EOCD since we're already there | |
last NOZIP64; | |
} | |
# Skip to potential zip64 EOCD record | |
$fh->seek($zip64EOCDPosition, IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to zip64 EOCD record"); | |
# Read potential zip64 EOCD record signature | |
$status = | |
_readSignature($fh, $fileName, | |
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE, 1); | |
return $status if $status == AZ_IO_ERROR; | |
if ($status == AZ_FORMAT_ERROR) { | |
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to EOCD"); | |
last NOZIP64; | |
} | |
# Read potential zip64 EOCD record. Ignore the zip64 | |
# extensible data sector. | |
my $record = ''; | |
$bytesRead = $fh->read($record, ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH); | |
if ($bytesRead != ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH) { | |
return _ioError("reading zip64 EOCD record"); | |
} | |
# Perform one final check, hoping that all implementors | |
# follow the recommendation of the specification | |
# regarding the size of the zip64 EOCD record | |
my ($zip64EODCRecordSize) = unpack("Q<", $record); | |
if ($zip64EOCDPosition + 12 + $zip64EODCRecordSize != $zip64EOCDLocatorPosition) { | |
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to EOCD"); | |
last NOZIP64; | |
} | |
$self->{'zip64'} = 1; | |
( | |
undef, | |
$self->{'versionMadeBy'}, | |
$self->{'versionNeededToExtract'}, | |
$self->{'diskNumber'}, | |
$self->{'diskNumberWithStartOfCentralDirectory'}, | |
$self->{'numberOfCentralDirectoriesOnThisDisk'}, | |
$self->{'numberOfCentralDirectories'}, | |
$self->{'centralDirectorySize'}, | |
$self->{'centralDirectoryOffsetWRTStartingDiskNumber'} | |
) = unpack(ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT, $record); | |
# Don't just happily bail out, we still need to read the | |
# zip file comment! | |
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to EOCD"); | |
} | |
# Skip past signature | |
$fh->seek(SIGNATURE_LENGTH, IO::Seekable::SEEK_CUR) | |
or return _ioError("seeking past EOCD signature"); | |
my $header = ''; | |
my $bytesRead = $fh->read($header, END_OF_CENTRAL_DIRECTORY_LENGTH); | |
if ($bytesRead != END_OF_CENTRAL_DIRECTORY_LENGTH) { | |
return _ioError("reading end of central directory"); | |
} | |
my $zipfileCommentLength; | |
if (! $self->{'zip64'}) { | |
( | |
$self->{'diskNumber'}, | |
$self->{'diskNumberWithStartOfCentralDirectory'}, | |
$self->{'numberOfCentralDirectoriesOnThisDisk'}, | |
$self->{'numberOfCentralDirectories'}, | |
$self->{'centralDirectorySize'}, | |
$self->{'centralDirectoryOffsetWRTStartingDiskNumber'}, | |
$zipfileCommentLength | |
) = unpack(END_OF_CENTRAL_DIRECTORY_FORMAT, $header); | |
if ( $self->{'diskNumber'} == 0xffff | |
|| $self->{'diskNumberWithStartOfCentralDirectory'} == 0xffff | |
|| $self->{'numberOfCentralDirectoriesOnThisDisk'} == 0xffff | |
|| $self->{'numberOfCentralDirectories'} == 0xffff | |
|| $self->{'centralDirectorySize'} == 0xffffffff | |
|| $self->{'centralDirectoryOffsetWRTStartingDiskNumber'} == 0xffffffff) { | |
if (ZIP64_SUPPORTED) { | |
return _formatError("unexpected zip64 marker values in EOCD"); | |
} | |
else { | |
return _zip64NotSupported(); | |
} | |
} | |
} | |
else { | |
( | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
$zipfileCommentLength | |
) = unpack(END_OF_CENTRAL_DIRECTORY_FORMAT, $header); | |
} | |
if ($zipfileCommentLength) { | |
my $zipfileComment = ''; | |
$bytesRead = $fh->read($zipfileComment, $zipfileCommentLength); | |
if ($bytesRead != $zipfileCommentLength) { | |
return _ioError("reading zipfile comment"); | |
} | |
$self->{'zipfileComment'} = $zipfileComment; | |
} | |
if (! $self->{'zip64'}) { | |
return | |
wantarray | |
? (AZ_OK, $eocdPosition) | |
: AZ_OK; | |
} | |
else { | |
return | |
wantarray | |
? (AZ_OK, $zip64EOCDPosition) | |
: AZ_OK; | |
} | |
} | |
# Seek in my file to the end, then read backwards until we find the | |
# signature of the central directory record. Leave the file positioned right | |
# before the signature. Returns AZ_OK if success. | |
sub _findEndOfCentralDirectory { | |
my $self = shift; | |
my $fh = shift; | |
my $data = ''; | |
$fh->seek(0, IO::Seekable::SEEK_END) | |
or return _ioError("seeking to end"); | |
my $fileLength = $fh->tell(); | |
if ($fileLength < END_OF_CENTRAL_DIRECTORY_LENGTH + 4) { | |
return _formatError("file is too short"); | |
} | |
my $seekOffset = 0; | |
my $pos = -1; | |
for (; ;) { | |
$seekOffset += 512; | |
$seekOffset = $fileLength if ($seekOffset > $fileLength); | |
$fh->seek(-$seekOffset, IO::Seekable::SEEK_END) | |
or return _ioError("seek failed"); | |
my $bytesRead = $fh->read($data, $seekOffset); | |
if ($bytesRead != $seekOffset) { | |
return _ioError("read failed"); | |
} | |
$pos = rindex($data, END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING); | |
last | |
if ( $pos >= 0 | |
or $seekOffset == $fileLength | |
or $seekOffset >= $Archive::Zip::ChunkSize); | |
} | |
if ($pos >= 0) { | |
$fh->seek($pos - $seekOffset, IO::Seekable::SEEK_CUR) | |
or return _ioError("seeking to EOCD"); | |
return AZ_OK; | |
} else { | |
return _formatError("can't find EOCD signature"); | |
} | |
} | |
# Used to avoid taint problems when chdir'ing. | |
# Not intended to increase security in any way; just intended to shut up the -T | |
# complaints. If your Cwd module is giving you unreliable returns from cwd() | |
# you have bigger problems than this. | |
sub _untaintDir { | |
my $dir = shift; | |
$dir =~ m/$UNTAINT/s; | |
return $1; | |
} | |
sub addTree { | |
my $self = shift; | |
my ($root, $dest, $pred, $compressionLevel); | |
if (ref($_[0]) eq 'HASH') { | |
$root = $_[0]->{root}; | |
$dest = $_[0]->{zipName}; | |
$pred = $_[0]->{select}; | |
$compressionLevel = $_[0]->{compressionLevel}; | |
} else { | |
($root, $dest, $pred, $compressionLevel) = @_; | |
} | |
return _error("root arg missing in call to addTree()") | |
unless defined($root); | |
$dest = '' unless defined($dest); | |
$pred = sub { -r } | |
unless defined($pred); | |
my @files; | |
my $startDir = _untaintDir(cwd()); | |
return _error('undef returned by _untaintDir on cwd ', cwd()) | |
unless $startDir; | |
# This avoids chdir'ing in Find, in a way compatible with older | |
# versions of File::Find. | |
my $wanted = sub { | |
local $main::_ = $File::Find::name; | |
my $dir = _untaintDir($File::Find::dir); | |
chdir($startDir); | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
push(@files, Win32::GetANSIPathName($File::Find::name)) if (&$pred); | |
$dir = Win32::GetANSIPathName($dir); | |
} else { | |
push(@files, $File::Find::name) if (&$pred); | |
} | |
chdir($dir); | |
}; | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
$root = Win32::GetANSIPathName($root); | |
} | |
# File::Find will not untaint unless you explicitly pass the flag and regex pattern. | |
File::Find::find({ wanted => $wanted, untaint => 1, untaint_pattern => $UNTAINT }, $root); | |
my $rootZipName = _asZipDirName($root, 1); # with trailing slash | |
my $pattern = $rootZipName eq './' ? '^' : "^\Q$rootZipName\E"; | |
$dest = _asZipDirName($dest, 1); # with trailing slash | |
foreach my $fileName (@files) { | |
my $isDir; | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
$isDir = -d Win32::GetANSIPathName($fileName); | |
} else { | |
$isDir = -d $fileName; | |
} | |
# normalize, remove leading ./ | |
my $archiveName = _asZipDirName($fileName, $isDir); | |
if ($archiveName eq $rootZipName) { $archiveName = $dest } | |
else { $archiveName =~ s{$pattern}{$dest} } | |
next if $archiveName =~ m{^\.?/?$}; # skip current dir | |
my $member = | |
$isDir | |
? $self->addDirectory($fileName, $archiveName) | |
: $self->addFile($fileName, $archiveName); | |
$member->desiredCompressionLevel($compressionLevel); | |
return _error("add $fileName failed in addTree()") if !$member; | |
} | |
return AZ_OK; | |
} | |
sub addTreeMatching { | |
my $self = shift; | |
my ($root, $dest, $pattern, $pred, $compressionLevel); | |
if (ref($_[0]) eq 'HASH') { | |
$root = $_[0]->{root}; | |
$dest = $_[0]->{zipName}; | |
$pattern = $_[0]->{pattern}; | |
$pred = $_[0]->{select}; | |
$compressionLevel = $_[0]->{compressionLevel}; | |
} else { | |
($root, $dest, $pattern, $pred, $compressionLevel) = @_; | |
} | |
return _error("root arg missing in call to addTreeMatching()") | |
unless defined($root); | |
$dest = '' unless defined($dest); | |
return _error("pattern missing in call to addTreeMatching()") | |
unless defined($pattern); | |
my $matcher = | |
$pred ? sub { m{$pattern} && &$pred } : sub { m{$pattern} && -r }; | |
return $self->addTree($root, $dest, $matcher, $compressionLevel); | |
} | |
# Check if one of the components of a path to the file or the file name | |
# itself is an already existing symbolic link. If yes then return an | |
# error. Continuing and writing to a file traversing a link posseses | |
# a security threat, especially if the link was extracted from an | |
# attacker-supplied archive. This would allow writing to an arbitrary | |
# file. The same applies when using ".." to escape from a working | |
# directory. <https://bugzilla.redhat.com/show_bug.cgi?id=1591449> | |
sub _extractionNameIsSafe { | |
my $name = shift; | |
my ($volume, $directories) = File::Spec->splitpath($name, 1); | |
my @directories = File::Spec->splitdir($directories); | |
if (grep '..' eq $_, @directories) { | |
return _error( | |
"Could not extract $name safely: a parent directory is used"); | |
} | |
my @path; | |
my $path; | |
for my $directory (@directories) { | |
push @path, $directory; | |
$path = File::Spec->catpath($volume, File::Spec->catdir(@path), ''); | |
if (-l $path) { | |
return _error( | |
"Could not extract $name safely: $path is an existing symbolic link"); | |
} | |
if (!-e $path) { | |
last; | |
} | |
} | |
return AZ_OK; | |
} | |
# $zip->extractTree( $root, $dest [, $volume] ); | |
# | |
# $root and $dest are Unix-style. | |
# $volume is in local FS format. | |
# | |
sub extractTree { | |
my $self = shift; | |
my ($root, $dest, $volume); | |
if (ref($_[0]) eq 'HASH') { | |
$root = $_[0]->{root}; | |
$dest = $_[0]->{zipName}; | |
$volume = $_[0]->{volume}; | |
} else { | |
($root, $dest, $volume) = @_; | |
} | |
$root = '' unless defined($root); | |
if (defined $dest) { | |
if ($dest !~ m{/$}) { | |
$dest .= '/'; | |
} | |
} else { | |
$dest = './'; | |
} | |
my $pattern = "^\Q$root"; | |
my @members = $self->membersMatching($pattern); | |
foreach my $member (@members) { | |
my $fileName = $member->fileName(); # in Unix format | |
$fileName =~ s{$pattern}{$dest}; # in Unix format | |
# convert to platform format: | |
$fileName = Archive::Zip::_asLocalName($fileName, $volume); | |
if ((my $ret = _extractionNameIsSafe($fileName)) | |
!= AZ_OK) { return $ret; } | |
my $status = $member->extractToFileNamed($fileName); | |
return $status if $status != AZ_OK; | |
} | |
return AZ_OK; | |
} | |
# $zip->updateMember( $memberOrName, $fileName ); | |
# Returns (possibly updated) member, if any; undef on errors. | |
sub updateMember { | |
my $self = shift; | |
my ($oldMember, $fileName); | |
if (ref($_[0]) eq 'HASH') { | |
$oldMember = $_[0]->{memberOrZipName}; | |
$fileName = $_[0]->{name}; | |
} else { | |
($oldMember, $fileName) = @_; | |
} | |
if (!defined($fileName)) { | |
_error("updateMember(): missing fileName argument"); | |
return undef; | |
} | |
my @newStat = stat($fileName); | |
if (!@newStat) { | |
_ioError("Can't stat $fileName"); | |
return undef; | |
} | |
my $isDir = -d _; | |
my $memberName; | |
if (ref($oldMember)) { | |
$memberName = $oldMember->fileName(); | |
} else { | |
$oldMember = $self->memberNamed($memberName = $oldMember) | |
|| $self->memberNamed($memberName = | |
_asZipDirName($oldMember, $isDir)); | |
} | |
unless (defined($oldMember) | |
&& $oldMember->lastModTime() == $newStat[9] | |
&& $oldMember->isDirectory() == $isDir | |
&& ($isDir || ($oldMember->uncompressedSize() == $newStat[7]))) { | |
# create the new member | |
my $newMember = | |
$isDir | |
? Archive::Zip::Member->newDirectoryNamed($fileName, $memberName) | |
: Archive::Zip::Member->newFromFile($fileName, $memberName); | |
unless (defined($newMember)) { | |
_error("creation of member $fileName failed in updateMember()"); | |
return undef; | |
} | |
# replace old member or append new one | |
if (defined($oldMember)) { | |
$self->replaceMember($oldMember, $newMember); | |
} else { | |
$self->addMember($newMember); | |
} | |
return $newMember; | |
} | |
return $oldMember; | |
} | |
# $zip->updateTree( $root, [ $dest, [ $pred [, $mirror]]] ); | |
# | |
# This takes the same arguments as addTree, but first checks to see | |
# whether the file or directory already exists in the zip file. | |
# | |
# If the fourth argument $mirror is true, then delete all my members | |
# if corresponding files were not found. | |
sub updateTree { | |
my $self = shift; | |
my ($root, $dest, $pred, $mirror, $compressionLevel); | |
if (ref($_[0]) eq 'HASH') { | |
$root = $_[0]->{root}; | |
$dest = $_[0]->{zipName}; | |
$pred = $_[0]->{select}; | |
$mirror = $_[0]->{mirror}; | |
$compressionLevel = $_[0]->{compressionLevel}; | |
} else { | |
($root, $dest, $pred, $mirror, $compressionLevel) = @_; | |
} | |
return _error("root arg missing in call to updateTree()") | |
unless defined($root); | |
$dest = '' unless defined($dest); | |
$pred = sub { -r } | |
unless defined($pred); | |
$dest = _asZipDirName($dest, 1); | |
my $rootZipName = _asZipDirName($root, 1); # with trailing slash | |
my $pattern = $rootZipName eq './' ? '^' : "^\Q$rootZipName\E"; | |
my @files; | |
my $startDir = _untaintDir(cwd()); | |
return _error('undef returned by _untaintDir on cwd ', cwd()) | |
unless $startDir; | |
# This avoids chdir'ing in Find, in a way compatible with older | |
# versions of File::Find. | |
my $wanted = sub { | |
local $main::_ = $File::Find::name; | |
my $dir = _untaintDir($File::Find::dir); | |
chdir($startDir); | |
push(@files, $File::Find::name) if (&$pred); | |
chdir($dir); | |
}; | |
File::Find::find($wanted, $root); | |
# Now @files has all the files that I could potentially be adding to | |
# the zip. Only add the ones that are necessary. | |
# For each file (updated or not), add its member name to @done. | |
my %done; | |
foreach my $fileName (@files) { | |
my @newStat = stat($fileName); | |
my $isDir = -d _; | |
# normalize, remove leading ./ | |
my $memberName = _asZipDirName($fileName, $isDir); | |
if ($memberName eq $rootZipName) { $memberName = $dest } | |
else { $memberName =~ s{$pattern}{$dest} } | |
next if $memberName =~ m{^\.?/?$}; # skip current dir | |
$done{$memberName} = 1; | |
my $changedMember = $self->updateMember($memberName, $fileName); | |
$changedMember->desiredCompressionLevel($compressionLevel); | |
return _error("updateTree failed to update $fileName") | |
unless ref($changedMember); | |
} | |
# @done now has the archive names corresponding to all the found files. | |
# If we're mirroring, delete all those members that aren't in @done. | |
if ($mirror) { | |
foreach my $member ($self->members()) { | |
$self->removeMember($member) | |
unless $done{$member->fileName()}; | |
} | |
} | |
return AZ_OK; | |
} | |
1; | |
ARCHIVE_ZIP_ARCHIVE | |
$fatpacked{"Archive/Zip/BufferedFileHandle.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_BUFFEREDFILEHANDLE'; | |
package Archive::Zip::BufferedFileHandle; | |
# File handle that uses a string internally and can seek | |
# This is given as a demo for getting a zip file written | |
# to a string. | |
# I probably should just use IO::Scalar instead. | |
# Ned Konz, March 2000 | |
use strict; | |
use IO::File; | |
use Carp; | |
use vars qw{$VERSION}; | |
BEGIN { | |
$VERSION = '1.68'; | |
$VERSION = eval $VERSION; | |
} | |
sub new { | |
my $class = shift || __PACKAGE__; | |
$class = ref($class) || $class; | |
my $self = bless( | |
{ | |
content => '', | |
position => 0, | |
size => 0 | |
}, | |
$class | |
); | |
return $self; | |
} | |
# Utility method to read entire file | |
sub readFromFile { | |
my $self = shift; | |
my $fileName = shift; | |
my $fh = IO::File->new($fileName, "r"); | |
CORE::binmode($fh); | |
if (!$fh) { | |
Carp::carp("Can't open $fileName: $!\n"); | |
return undef; | |
} | |
local $/ = undef; | |
$self->{content} = <$fh>; | |
$self->{size} = length($self->{content}); | |
return $self; | |
} | |
sub contents { | |
my $self = shift; | |
if (@_) { | |
$self->{content} = shift; | |
$self->{size} = length($self->{content}); | |
} | |
return $self->{content}; | |
} | |
sub binmode { 1 } | |
sub close { 1 } | |
sub opened { 1 } | |
sub eof { | |
my $self = shift; | |
return $self->{position} >= $self->{size}; | |
} | |
sub seek { | |
my $self = shift; | |
my $pos = shift; | |
my $whence = shift; | |
# SEEK_SET | |
if ($whence == 0) { $self->{position} = $pos; } | |
# SEEK_CUR | |
elsif ($whence == 1) { $self->{position} += $pos; } | |
# SEEK_END | |
elsif ($whence == 2) { $self->{position} = $self->{size} + $pos; } | |
else { return 0; } | |
return 1; | |
} | |
sub tell { return shift->{position}; } | |
# Copy my data to given buffer | |
sub read { | |
my $self = shift; | |
my $buf = \($_[0]); | |
shift; | |
my $len = shift; | |
my $offset = shift || 0; | |
$$buf = '' if not defined($$buf); | |
my $bytesRead = | |
($self->{position} + $len > $self->{size}) | |
? ($self->{size} - $self->{position}) | |
: $len; | |
substr($$buf, $offset, $bytesRead) = | |
substr($self->{content}, $self->{position}, $bytesRead); | |
$self->{position} += $bytesRead; | |
return $bytesRead; | |
} | |
# Copy given buffer to me | |
sub write { | |
my $self = shift; | |
my $buf = \($_[0]); | |
shift; | |
my $len = shift; | |
my $offset = shift || 0; | |
$$buf = '' if not defined($$buf); | |
my $bufLen = length($$buf); | |
my $bytesWritten = | |
($offset + $len > $bufLen) | |
? $bufLen - $offset | |
: $len; | |
substr($self->{content}, $self->{position}, $bytesWritten) = | |
substr($$buf, $offset, $bytesWritten); | |
$self->{size} = length($self->{content}); | |
return $bytesWritten; | |
} | |
sub clearerr() { 1 } | |
1; | |
ARCHIVE_ZIP_BUFFEREDFILEHANDLE | |
$fatpacked{"Archive/Zip/DirectoryMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_DIRECTORYMEMBER'; | |
package Archive::Zip::DirectoryMember; | |
use strict; | |
use File::Path; | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw( Archive::Zip::Member ); | |
} | |
use Archive::Zip qw( | |
:ERROR_CODES | |
:UTILITY_METHODS | |
); | |
sub _newNamed { | |
my $class = shift; | |
my $fileName = shift; # FS name | |
my $newName = shift; # Zip name | |
$newName = _asZipDirName($fileName) unless $newName; | |
my $self = $class->new(@_); | |
$self->{'externalFileName'} = $fileName; | |
$self->fileName($newName); | |
if (-e $fileName) { | |
# -e does NOT do a full stat, so we need to do one now | |
if (-d _ ) { | |
my @stat = stat(_); | |
$self->unixFileAttributes($stat[2]); | |
my $mod_t = $stat[9]; | |
if ($^O eq 'MSWin32' and !$mod_t) { | |
$mod_t = time(); | |
} | |
$self->setLastModFileDateTimeFromUnix($mod_t); | |
} else { # hmm.. trying to add a non-directory? | |
_error($fileName, ' exists but is not a directory'); | |
return undef; | |
} | |
} else { | |
$self->unixFileAttributes($self->DEFAULT_DIRECTORY_PERMISSIONS); | |
$self->setLastModFileDateTimeFromUnix(time()); | |
} | |
return $self; | |
} | |
sub externalFileName { | |
shift->{'externalFileName'}; | |
} | |
sub isDirectory { | |
return 1; | |
} | |
sub extractToFileNamed { | |
my $self = shift; | |
my $name = shift; # local FS name | |
my $attribs = $self->unixFileAttributes() & 07777; | |
mkpath($name, 0, $attribs); # croaks on error | |
utime($self->lastModTime(), $self->lastModTime(), $name); | |
return AZ_OK; | |
} | |
sub fileName { | |
my $self = shift; | |
my $newName = shift; | |
$newName =~ s{/?$}{/} if defined($newName); | |
return $self->SUPER::fileName($newName); | |
} | |
# So people don't get too confused. This way it looks like the problem | |
# is in their code... | |
sub contents { | |
return wantarray ? (undef, AZ_OK) : undef; | |
} | |
1; | |
ARCHIVE_ZIP_DIRECTORYMEMBER | |
$fatpacked{"Archive/Zip/FileMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_FILEMEMBER'; | |
package Archive::Zip::FileMember; | |
use strict; | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw ( Archive::Zip::Member ); | |
} | |
use Archive::Zip qw( | |
:UTILITY_METHODS | |
); | |
sub externalFileName { | |
shift->{'externalFileName'}; | |
} | |
# Return true if I depend on the named file | |
sub _usesFileNamed { | |
my $self = shift; | |
my $fileName = shift; | |
my $xfn = $self->externalFileName(); | |
return undef if ref($xfn); | |
return $xfn eq $fileName; | |
} | |
sub fh { | |
my $self = shift; | |
$self->_openFile() | |
if !defined($self->{'fh'}) || !$self->{'fh'}->opened(); | |
return $self->{'fh'}; | |
} | |
# opens my file handle from my file name | |
sub _openFile { | |
my $self = shift; | |
my ($status, $fh) = _newFileHandle($self->externalFileName(), 'r'); | |
if (!$status) { | |
_ioError("Can't open", $self->externalFileName()); | |
return undef; | |
} | |
$self->{'fh'} = $fh; | |
_binmode($fh); | |
return $fh; | |
} | |
# Make sure I close my file handle | |
sub endRead { | |
my $self = shift; | |
undef $self->{'fh'}; # _closeFile(); | |
return $self->SUPER::endRead(@_); | |
} | |
sub _become { | |
my $self = shift; | |
my $newClass = shift; | |
return $self if ref($self) eq $newClass; | |
delete($self->{'externalFileName'}); | |
delete($self->{'fh'}); | |
return $self->SUPER::_become($newClass); | |
} | |
1; | |
ARCHIVE_ZIP_FILEMEMBER | |
$fatpacked{"Archive/Zip/Member.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_MEMBER'; | |
package Archive::Zip::Member; | |
# A generic member of an archive | |
use strict; | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw( Archive::Zip ); | |
if ($^O eq 'MSWin32') { | |
require Win32; | |
require Encode; | |
Encode->import(qw{ decode_utf8 }); | |
} | |
} | |
use Archive::Zip qw( | |
:CONSTANTS | |
:MISC_CONSTANTS | |
:ERROR_CODES | |
:PKZIP_CONSTANTS | |
:UTILITY_METHODS | |
); | |
use Time::Local (); | |
use Compress::Raw::Zlib qw( Z_OK Z_STREAM_END MAX_WBITS ); | |
use File::Path; | |
use File::Basename; | |
# Unix perms for default creation of files/dirs. | |
use constant DEFAULT_DIRECTORY_PERMISSIONS => 040755; | |
use constant DEFAULT_FILE_PERMISSIONS => 0100666; | |
use constant DIRECTORY_ATTRIB => 040000; | |
use constant FILE_ATTRIB => 0100000; | |
use constant OS_SUPPORTS_SYMLINK => do { | |
local $@; | |
!!eval { symlink("",""); 1 }; | |
}; | |
# Returns self if successful, else undef | |
# Assumes that fh is positioned at beginning of central directory file header. | |
# Leaves fh positioned immediately after file header or EOCD signature. | |
sub _newFromZipFile { | |
my $class = shift; | |
my $self = Archive::Zip::ZipFileMember->_newFromZipFile(@_); | |
return $self; | |
} | |
sub newFromString { | |
my $class = shift; | |
my ($stringOrStringRef, $fileName); | |
if (ref($_[0]) eq 'HASH') { | |
$stringOrStringRef = $_[0]->{string}; | |
$fileName = $_[0]->{zipName}; | |
} else { | |
($stringOrStringRef, $fileName) = @_; | |
} | |
my $self = | |
Archive::Zip::StringMember->_newFromString($stringOrStringRef, $fileName); | |
return $self; | |
} | |
sub newFromFile { | |
my $class = shift; | |
my ($fileName, $zipName); | |
if (ref($_[0]) eq 'HASH') { | |
$fileName = $_[0]->{fileName}; | |
$zipName = $_[0]->{zipName}; | |
} else { | |
($fileName, $zipName) = @_; | |
} | |
my $self = | |
Archive::Zip::NewFileMember->_newFromFileNamed($fileName, $zipName); | |
return $self; | |
} | |
sub newDirectoryNamed { | |
my $class = shift; | |
my ($directoryName, $newName); | |
if (ref($_[0]) eq 'HASH') { | |
$directoryName = $_[0]->{directoryName}; | |
$newName = $_[0]->{zipName}; | |
} else { | |
($directoryName, $newName) = @_; | |
} | |
my $self = | |
Archive::Zip::DirectoryMember->_newNamed($directoryName, $newName); | |
return $self; | |
} | |
sub new { | |
my $class = shift; | |
# Info-Zip 3.0 (I guess) seems to use the following values | |
# for the version fields in local and central directory | |
# headers, regardless of whether the member has an zip64 | |
# extended information extra field or not: | |
# | |
# version made by: | |
# 30 | |
# | |
# version needed to extract: | |
# 10 for directory and stored entries | |
# 20 for anything else | |
my $self = { | |
'lastModFileDateTime' => 0, | |
'fileAttributeFormat' => FA_UNIX, | |
'zip64' => 0, | |
'desiredZip64Mode' => ZIP64_AS_NEEDED, | |
'versionMadeBy' => 20, | |
'versionNeededToExtract' => 20, | |
'bitFlag' => ($Archive::Zip::UNICODE ? 0x0800 : 0), | |
'compressionMethod' => COMPRESSION_STORED, | |
'desiredCompressionMethod' => COMPRESSION_STORED, | |
'desiredCompressionLevel' => COMPRESSION_LEVEL_NONE, | |
'internalFileAttributes' => 0, | |
'externalFileAttributes' => 0, # set later | |
'fileName' => '', | |
'cdExtraField' => '', | |
'localExtraField' => '', | |
'fileComment' => '', | |
'crc32' => 0, | |
'compressedSize' => 0, | |
'uncompressedSize' => 0, | |
'password' => undef, # password for encrypted data | |
'crc32c' => -1, # crc for decrypted data | |
@_ | |
}; | |
bless($self, $class); | |
$self->unixFileAttributes($self->DEFAULT_FILE_PERMISSIONS); | |
return $self; | |
} | |
# Morph into given class (do whatever cleanup I need to do) | |
sub _become { | |
return bless($_[0], $_[1]); | |
} | |
sub fileAttributeFormat { | |
my $self = shift; | |
if (@_) { | |
$self->{fileAttributeFormat} = | |
(ref($_[0]) eq 'HASH') ? $_[0]->{format} : $_[0]; | |
} else { | |
return $self->{fileAttributeFormat}; | |
} | |
} | |
sub zip64 { | |
shift->{'zip64'}; | |
} | |
sub desiredZip64Mode { | |
my $self = shift; | |
my $desiredZip64Mode = $self->{'desiredZip64Mode'}; | |
if (@_) { | |
$self->{'desiredZip64Mode'} = | |
ref($_[0]) eq 'HASH' ? shift->{desiredZip64Mode} : shift; | |
} | |
return $desiredZip64Mode; | |
} | |
sub versionMadeBy { | |
shift->{'versionMadeBy'}; | |
} | |
sub versionNeededToExtract { | |
shift->{'versionNeededToExtract'}; | |
} | |
sub bitFlag { | |
my $self = shift; | |
# Set General Purpose Bit Flags according to the desiredCompressionLevel setting | |
if ( $self->desiredCompressionLevel == 1 | |
|| $self->desiredCompressionLevel == 2) { | |
$self->{'bitFlag'} |= DEFLATING_COMPRESSION_FAST; | |
} elsif ($self->desiredCompressionLevel == 3 | |
|| $self->desiredCompressionLevel == 4 | |
|| $self->desiredCompressionLevel == 5 | |
|| $self->desiredCompressionLevel == 6 | |
|| $self->desiredCompressionLevel == 7) { | |
$self->{'bitFlag'} |= DEFLATING_COMPRESSION_NORMAL; | |
} elsif ($self->desiredCompressionLevel == 8 | |
|| $self->desiredCompressionLevel == 9) { | |
$self->{'bitFlag'} |= DEFLATING_COMPRESSION_MAXIMUM; | |
} | |
if ($Archive::Zip::UNICODE) { | |
$self->{'bitFlag'} |= 0x0800; | |
} | |
$self->{'bitFlag'}; | |
} | |
sub password { | |
my $self = shift; | |
$self->{'password'} = shift if @_; | |
$self->{'password'}; | |
} | |
sub compressionMethod { | |
shift->{'compressionMethod'}; | |
} | |
sub desiredCompressionMethod { | |
my $self = shift; | |
my $newDesiredCompressionMethod = | |
(ref($_[0]) eq 'HASH') ? shift->{compressionMethod} : shift; | |
my $oldDesiredCompressionMethod = $self->{'desiredCompressionMethod'}; | |
if (defined($newDesiredCompressionMethod)) { | |
$self->{'desiredCompressionMethod'} = $newDesiredCompressionMethod; | |
if ($newDesiredCompressionMethod == COMPRESSION_STORED) { | |
$self->{'desiredCompressionLevel'} = 0; | |
$self->{'bitFlag'} &= ~GPBF_HAS_DATA_DESCRIPTOR_MASK | |
if $self->uncompressedSize() == 0; | |
} elsif ($oldDesiredCompressionMethod == COMPRESSION_STORED) { | |
$self->{'desiredCompressionLevel'} = COMPRESSION_LEVEL_DEFAULT; | |
} | |
} | |
return $oldDesiredCompressionMethod; | |
} | |
sub desiredCompressionLevel { | |
my $self = shift; | |
my $newDesiredCompressionLevel = | |
(ref($_[0]) eq 'HASH') ? shift->{compressionLevel} : shift; | |
my $oldDesiredCompressionLevel = $self->{'desiredCompressionLevel'}; | |
if (defined($newDesiredCompressionLevel)) { | |
$self->{'desiredCompressionLevel'} = $newDesiredCompressionLevel; | |
$self->{'desiredCompressionMethod'} = ( | |
$newDesiredCompressionLevel | |
? COMPRESSION_DEFLATED | |
: COMPRESSION_STORED | |
); | |
} | |
return $oldDesiredCompressionLevel; | |
} | |
sub fileName { | |
my $self = shift; | |
my $newName = shift; | |
if (defined $newName) { | |
$newName =~ y{\\/}{/}s; # deal with dos/windoze problems | |
$self->{'fileName'} = $newName; | |
} | |
return $self->{'fileName'}; | |
} | |
sub fileNameAsBytes { | |
my $self = shift; | |
my $bytes = $self->{'fileName'}; | |
if($self->{'bitFlag'} & 0x800){ | |
$bytes = Encode::encode_utf8($bytes); | |
} | |
return $bytes; | |
} | |
sub lastModFileDateTime { | |
my $modTime = shift->{'lastModFileDateTime'}; | |
$modTime =~ m/^(\d+)$/; # untaint | |
return $1; | |
} | |
sub lastModTime { | |
my $self = shift; | |
return _dosToUnixTime($self->lastModFileDateTime()); | |
} | |
sub setLastModFileDateTimeFromUnix { | |
my $self = shift; | |
my $time_t = shift; | |
$self->{'lastModFileDateTime'} = _unixToDosTime($time_t); | |
} | |
sub internalFileAttributes { | |
shift->{'internalFileAttributes'}; | |
} | |
sub externalFileAttributes { | |
shift->{'externalFileAttributes'}; | |
} | |
# Convert UNIX permissions into proper value for zip file | |
# Usable as a function or a method | |
sub _mapPermissionsFromUnix { | |
my $self = shift; | |
my $mode = shift; | |
my $attribs = $mode << 16; | |
# Microsoft Windows Explorer needs this bit set for directories | |
if ($mode & DIRECTORY_ATTRIB) { | |
$attribs |= 16; | |
} | |
return $attribs; | |
# TODO: map more MS-DOS perms | |
} | |
# Convert ZIP permissions into Unix ones | |
# | |
# This was taken from Info-ZIP group's portable UnZip | |
# zipfile-extraction program, version 5.50. | |
# http://www.info-zip.org/pub/infozip/ | |
# | |
# See the mapattr() function in unix/unix.c | |
# See the attribute format constants in unzpriv.h | |
# | |
# XXX Note that there's one situation that is not implemented | |
# yet that depends on the "extra field." | |
sub _mapPermissionsToUnix { | |
my $self = shift; | |
my $format = $self->{'fileAttributeFormat'}; | |
my $attribs = $self->{'externalFileAttributes'}; | |
my $mode = 0; | |
if ($format == FA_AMIGA) { | |
$attribs = $attribs >> 17 & 7; # Amiga RWE bits | |
$mode = $attribs << 6 | $attribs << 3 | $attribs; | |
return $mode; | |
} | |
if ($format == FA_THEOS) { | |
$attribs &= 0xF1FFFFFF; | |
if (($attribs & 0xF0000000) != 0x40000000) { | |
$attribs &= 0x01FFFFFF; # not a dir, mask all ftype bits | |
} else { | |
$attribs &= 0x41FFFFFF; # leave directory bit as set | |
} | |
} | |
if ( $format == FA_UNIX | |
|| $format == FA_VAX_VMS | |
|| $format == FA_ACORN | |
|| $format == FA_ATARI_ST | |
|| $format == FA_BEOS | |
|| $format == FA_QDOS | |
|| $format == FA_TANDEM) { | |
$mode = $attribs >> 16; | |
return $mode if $mode != 0 or not $self->localExtraField; | |
# warn("local extra field is: ", $self->localExtraField, "\n"); | |
# XXX This condition is not implemented | |
# I'm just including the comments from the info-zip section for now. | |
# Some (non-Info-ZIP) implementations of Zip for Unix and | |
# VMS (and probably others ??) leave 0 in the upper 16-bit | |
# part of the external_file_attributes field. Instead, they | |
# store file permission attributes in some extra field. | |
# As a work-around, we search for the presence of one of | |
# these extra fields and fall back to the MSDOS compatible | |
# part of external_file_attributes if one of the known | |
# e.f. types has been detected. | |
# Later, we might implement extraction of the permission | |
# bits from the VMS extra field. But for now, the work-around | |
# should be sufficient to provide "readable" extracted files. | |
# (For ASI Unix e.f., an experimental remap from the e.f. | |
# mode value IS already provided!) | |
} | |
# PKWARE's PKZip for Unix marks entries as FA_MSDOS, but stores the | |
# Unix attributes in the upper 16 bits of the external attributes | |
# field, just like Info-ZIP's Zip for Unix. We try to use that | |
# value, after a check for consistency with the MSDOS attribute | |
# bits (see below). | |
if ($format == FA_MSDOS) { | |
$mode = $attribs >> 16; | |
} | |
# FA_MSDOS, FA_OS2_HPFS, FA_WINDOWS_NTFS, FA_MACINTOSH, FA_TOPS20 | |
$attribs = !($attribs & 1) << 1 | ($attribs & 0x10) >> 4; | |
# keep previous $mode setting when its "owner" | |
# part appears to be consistent with DOS attribute flags! | |
return $mode if ($mode & 0700) == (0400 | $attribs << 6); | |
$mode = 0444 | $attribs << 6 | $attribs << 3 | $attribs; | |
return $mode; | |
} | |
sub unixFileAttributes { | |
my $self = shift; | |
my $oldPerms = $self->_mapPermissionsToUnix; | |
my $perms; | |
if (@_) { | |
$perms = (ref($_[0]) eq 'HASH') ? $_[0]->{attributes} : $_[0]; | |
if ($self->isDirectory) { | |
$perms &= ~FILE_ATTRIB; | |
$perms |= DIRECTORY_ATTRIB; | |
} else { | |
$perms &= ~DIRECTORY_ATTRIB; | |
$perms |= FILE_ATTRIB; | |
} | |
$self->{externalFileAttributes} = | |
$self->_mapPermissionsFromUnix($perms); | |
} | |
return $oldPerms; | |
} | |
sub localExtraField { | |
my $self = shift; | |
if (@_) { | |
my $localExtraField = | |
(ref($_[0]) eq 'HASH') ? $_[0]->{field} : $_[0]; | |
my ($status, $zip64) = | |
$self->_extractZip64ExtraField($localExtraField, undef, undef); | |
if ($status != AZ_OK) { | |
return $status; | |
} | |
elsif ($zip64) { | |
return _formatError('invalid extra field (contains zip64 information)'); | |
} | |
else { | |
$self->{localExtraField} = $localExtraField; | |
return AZ_OK; | |
} | |
} else { | |
return $self->{localExtraField}; | |
} | |
} | |
sub cdExtraField { | |
my $self = shift; | |
if (@_) { | |
my $cdExtraField = | |
(ref($_[0]) eq 'HASH') ? $_[0]->{field} : $_[0]; | |
my ($status, $zip64) = | |
$self->_extractZip64ExtraField($cdExtraField, undef, undef); | |
if ($status != AZ_OK) { | |
return $status; | |
} | |
elsif ($zip64) { | |
return _formatError('invalid extra field (contains zip64 information)'); | |
} | |
else { | |
$self->{cdExtraField} = $cdExtraField; | |
return AZ_OK; | |
} | |
} else { | |
return $self->{cdExtraField}; | |
} | |
} | |
sub extraFields { | |
my $self = shift; | |
return $self->localExtraField() . $self->cdExtraField(); | |
} | |
sub fileComment { | |
my $self = shift; | |
if (@_) { | |
$self->{fileComment} = | |
(ref($_[0]) eq 'HASH') | |
? pack('C0a*', $_[0]->{comment}) | |
: pack('C0a*', $_[0]); | |
} else { | |
return $self->{fileComment}; | |
} | |
} | |
sub hasDataDescriptor { | |
my $self = shift; | |
if (@_) { | |
my $shouldHave = shift; | |
if ($shouldHave) { | |
$self->{'bitFlag'} |= GPBF_HAS_DATA_DESCRIPTOR_MASK; | |
} else { | |
$self->{'bitFlag'} &= ~GPBF_HAS_DATA_DESCRIPTOR_MASK; | |
} | |
} | |
return $self->{'bitFlag'} & GPBF_HAS_DATA_DESCRIPTOR_MASK; | |
} | |
sub crc32 { | |
shift->{'crc32'}; | |
} | |
sub crc32String { | |
sprintf("%08x", shift->{'crc32'}); | |
} | |
sub compressedSize { | |
shift->{'compressedSize'}; | |
} | |
sub uncompressedSize { | |
shift->{'uncompressedSize'}; | |
} | |
sub isEncrypted { | |
shift->{'bitFlag'} & GPBF_ENCRYPTED_MASK; | |
} | |
sub isTextFile { | |
my $self = shift; | |
my $bit = $self->internalFileAttributes() & IFA_TEXT_FILE_MASK; | |
if (@_) { | |
my $flag = (ref($_[0]) eq 'HASH') ? shift->{flag} : shift; | |
$self->{'internalFileAttributes'} &= ~IFA_TEXT_FILE_MASK; | |
$self->{'internalFileAttributes'} |= | |
($flag ? IFA_TEXT_FILE : IFA_BINARY_FILE); | |
} | |
return $bit == IFA_TEXT_FILE; | |
} | |
sub isBinaryFile { | |
my $self = shift; | |
my $bit = $self->internalFileAttributes() & IFA_TEXT_FILE_MASK; | |
if (@_) { | |
my $flag = shift; | |
$self->{'internalFileAttributes'} &= ~IFA_TEXT_FILE_MASK; | |
$self->{'internalFileAttributes'} |= | |
($flag ? IFA_BINARY_FILE : IFA_TEXT_FILE); | |
} | |
return $bit == IFA_BINARY_FILE; | |
} | |
sub extractToFileNamed { | |
my $self = shift; | |
# local FS name | |
my $name = (ref($_[0]) eq 'HASH') ? $_[0]->{name} : $_[0]; | |
# Create directory for regular files as well as for symbolic | |
# links | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
$name = decode_utf8(Win32::GetFullPathName($name)); | |
mkpath_win32($name); | |
} else { | |
mkpath(dirname($name)); # croaks on error | |
} | |
# Check if the file / directory is a symbolic link *and* if | |
# the operating system supports these. Only in that case | |
# call method extractToFileHandle with the name of the | |
# symbolic link. If the operating system does not support | |
# symbolic links, process the member using the usual | |
# extraction routines, which creates a file containing the | |
# link target. | |
if ($self->isSymbolicLink() && OS_SUPPORTS_SYMLINK) { | |
return $self->extractToFileHandle($name); | |
} else { | |
my ($status, $fh); | |
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) { | |
Win32::CreateFile($name); | |
($status, $fh) = _newFileHandle(Win32::GetANSIPathName($name), 'w'); | |
} else { | |
($status, $fh) = _newFileHandle($name, 'w'); | |
} | |
return _ioError("Can't open file $name for write") unless $status; | |
$status = $self->extractToFileHandle($fh); | |
$fh->close(); | |
chmod($self->unixFileAttributes(), $name) | |
or return _error("Can't chmod() ${name}: $!"); | |
utime($self->lastModTime(), $self->lastModTime(), $name); | |
return $status; | |
} | |
} | |
sub mkpath_win32 { | |
my $path = shift; | |
use File::Spec; | |
my ($volume, @path) = File::Spec->splitdir($path); | |
$path = File::Spec->catfile($volume, shift @path); | |
pop @path; | |
while (@path) { | |
$path = File::Spec->catfile($path, shift @path); | |
Win32::CreateDirectory($path); | |
} | |
} | |
sub isSymbolicLink { | |
return shift->{'externalFileAttributes'} == 0xA1FF0000; | |
} | |
sub isDirectory { | |
return 0; | |
} | |
sub externalFileName { | |
return undef; | |
} | |
# Search the given extra field string for a zip64 extended | |
# information extra field and "correct" the header fields given | |
# in the remaining parameters with the information from that | |
# extra field, if required. Writes back the extra field string | |
# sans the zip64 information. The extra field string and all | |
# header fields must be passed as lvalues or the undefined value. | |
# | |
# This method returns a pair ($status, $zip64) in list context, | |
# where the latter flag specifies whether a zip64 extended | |
# information extra field was found. | |
# | |
# This method must be called with two header fields for local | |
# file headers and with four header fields for Central Directory | |
# headers. | |
sub _extractZip64ExtraField | |
{ | |
my $classOrSelf = shift; | |
my $extraField = $_[0]; | |
my ($zip64Data, $newExtraField) = (undef, ''); | |
while (length($extraField) >= 4) { | |
my ($headerId, $dataSize) = unpack('v v', $extraField); | |
if (length($extraField) < 4 + $dataSize) { | |
return _formatError('invalid extra field (bad data)'); | |
} | |
elsif ($headerId != 0x0001) { | |
$newExtraField .= substr($extraField, 0, 4 + $dataSize); | |
$extraField = substr($extraField, 4 + $dataSize); | |
} | |
else { | |
$zip64Data = substr($extraField, 4, $dataSize); | |
$extraField = substr($extraField, 4 + $dataSize); | |
} | |
} | |
if (length($extraField) != 0) { | |
return _formatError('invalid extra field (bad header ID or data size)'); | |
} | |
my $zip64 = 0; | |
if (defined($zip64Data)) { | |
return _zip64NotSupported() unless ZIP64_SUPPORTED; | |
my $dataLength = length($zip64Data); | |
# Try to be tolerant with respect to the fields to be | |
# extracted from the zip64 extended information extra | |
# field and derive that information from the data itself, | |
# if possible. This works around, for example, incorrect | |
# extra fields written by certain versions of package | |
# IO::Compress::Zip. That package provides the disk | |
# number start in the extra field without setting the | |
# corresponding regular field to 0xffff. Plus it | |
# provides the full set of fields even for the local file | |
# header. | |
# | |
# Field zero is the extra field string which we must keep | |
# in @_ for future modification, so account for that. | |
my @fields; | |
if (@_ == 3 && $dataLength == 16) { | |
@fields = (undef, 0xffffffff, 0xffffffff); | |
} | |
elsif (@_ == 3 && $dataLength == 24) { | |
push(@_, undef); | |
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff); | |
} | |
elsif (@_ == 3 && $dataLength == 28) { | |
push(@_, undef, undef); | |
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff, 0xffff); | |
} | |
elsif (@_ == 5 && $dataLength == 24) { | |
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff); | |
} | |
elsif (@_ == 5 && $dataLength == 28) { | |
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff, 0xffff); | |
} | |
else { | |
@fields = map { defined $_ ? $_ : 0 } @_; | |
} | |
my @fieldIndexes = (0); | |
my $fieldFormat = ''; | |
my $expDataLength = 0; | |
if ($fields[1] == 0xffffffff) { | |
push(@fieldIndexes, 1); | |
$fieldFormat .= 'Q< '; | |
$expDataLength += 8; | |
} | |
if ($fields[2] == 0xffffffff) { | |
push(@fieldIndexes, 2); | |
$fieldFormat .= 'Q< '; | |
$expDataLength += 8; | |
} | |
if (@fields > 3 && $fields[3] == 0xffffffff) { | |
push(@fieldIndexes, 3); | |
$fieldFormat .= 'Q< '; | |
$expDataLength += 8; | |
} | |
if (@fields > 3 && $fields[4] == 0xffff) { | |
push(@fieldIndexes, 4); | |
$fieldFormat .= 'L< '; | |
$expDataLength += 4; | |
} | |
if ($dataLength == $expDataLength) { | |
@_[@fieldIndexes] = ($newExtraField, unpack($fieldFormat, $zip64Data)); | |
$zip64 = 1; | |
} | |
else { | |
return _formatError('invalid zip64 extended information extra field'); | |
} | |
} | |
return (AZ_OK, $zip64); | |
} | |
# The following are used when copying data | |
sub _writeOffset { | |
shift->{'writeOffset'}; | |
} | |
sub _readOffset { | |
shift->{'readOffset'}; | |
} | |
sub writeLocalHeaderRelativeOffset { | |
shift->{'writeLocalHeaderRelativeOffset'}; | |
} | |
# Maintained in method Archive::Zip::Archive::writeToFileHandle | |
sub wasWritten { | |
shift->{'wasWritten'} | |
} | |
sub _dataEnded { | |
shift->{'dataEnded'}; | |
} | |
sub _readDataRemaining { | |
shift->{'readDataRemaining'}; | |
} | |
sub _inflater { | |
shift->{'inflater'}; | |
} | |
sub _deflater { | |
shift->{'deflater'}; | |
} | |
# DOS date/time format | |
# 0-4 (5) Second divided by 2 | |
# 5-10 (6) Minute (0-59) | |
# 11-15 (5) Hour (0-23 on a 24-hour clock) | |
# 16-20 (5) Day of the month (1-31) | |
# 21-24 (4) Month (1 = January, 2 = February, etc.) | |
# 25-31 (7) Year offset from 1980 (add 1980 to get actual year) | |
# Convert DOS date/time format to unix time_t format | |
# NOT AN OBJECT METHOD! | |
sub _dosToUnixTime { | |
my $dt = shift; | |
return time() unless defined($dt); | |
my $year = (($dt >> 25) & 0x7f) + 1980; | |
my $mon = (($dt >> 21) & 0x0f) - 1; | |
my $mday = (($dt >> 16) & 0x1f); | |
my $hour = (($dt >> 11) & 0x1f); | |
my $min = (($dt >> 5) & 0x3f); | |
my $sec = (($dt << 1) & 0x3e); | |
# catch errors | |
my $time_t = | |
eval { Time::Local::timelocal($sec, $min, $hour, $mday, $mon, $year); }; | |
return time() if ($@); | |
return $time_t; | |
} | |
# Note, this is not exactly UTC 1980, it's 1980 + 12 hours and 1 | |
# minute so that nothing timezoney can muck us up. | |
my $safe_epoch = 31.686060; | |
# convert a unix time to DOS date/time | |
# NOT AN OBJECT METHOD! | |
sub _unixToDosTime { | |
my $time_t = shift; | |
unless ($time_t) { | |
_error("Tried to add member with zero or undef value for time"); | |
$time_t = $safe_epoch; | |
} | |
if ($time_t < $safe_epoch) { | |
_ioError("Unsupported date before 1980 encountered, moving to 1980"); | |
$time_t = $safe_epoch; | |
} | |
my ($sec, $min, $hour, $mday, $mon, $year) = localtime($time_t); | |
my $dt = 0; | |
$dt += ($sec >> 1); | |
$dt += ($min << 5); | |
$dt += ($hour << 11); | |
$dt += ($mday << 16); | |
$dt += (($mon + 1) << 21); | |
$dt += (($year - 80) << 25); | |
return $dt; | |
} | |
# Write my local header to a file handle. | |
# Returns a pair (AZ_OK, $headerSize) on success. | |
sub _writeLocalFileHeader { | |
my $self = shift; | |
my $fh = shift; | |
my $refresh = @_ ? shift : 0; | |
my $zip64 = $self->zip64(); | |
my $hasDataDescriptor = $self->hasDataDescriptor(); | |
my $versionNeededToExtract = $self->versionNeededToExtract(); | |
my $crc32; | |
my $compressedSize; | |
my $uncompressedSize; | |
my $localExtraField = $self->localExtraField(); | |
if (! $zip64) { | |
if ($refresh) { | |
$crc32 = $self->crc32(); | |
$compressedSize = $self->_writeOffset(); | |
$uncompressedSize = $self->uncompressedSize(); | |
# Handle a brain-dead corner case gracefully. | |
# Otherwise we a) would always need to write zip64 | |
# format or b) re-write the complete member data on | |
# refresh (which might not always be possible). | |
if ($compressedSize > 0xffffffff) { | |
return _formatError('compressed size too large for refresh'); | |
} | |
} | |
elsif ($hasDataDescriptor) { | |
$crc32 = 0; | |
$compressedSize = 0; | |
$uncompressedSize = 0; | |
} | |
else { | |
$crc32 = $self->crc32(); | |
$compressedSize = $self->_writeOffset(); | |
$uncompressedSize = $self->uncompressedSize(); | |
} | |
} | |
else { | |
return _zip64NotSupported() unless ZIP64_SUPPORTED; | |
$versionNeededToExtract = 45 if ($versionNeededToExtract < 45); | |
my $zip64CompressedSize; | |
my $zip64UncompressedSize; | |
if ($refresh) { | |
$crc32 = $self->crc32(); | |
$compressedSize = 0xffffffff; | |
$uncompressedSize = 0xffffffff; | |
$zip64CompressedSize = $self->_writeOffset(); | |
$zip64UncompressedSize = $self->uncompressedSize(); | |
} | |
elsif ($hasDataDescriptor) { | |
$crc32 = 0; | |
$compressedSize = 0xffffffff; | |
$uncompressedSize = 0xffffffff; | |
$zip64CompressedSize = 0; | |
$zip64UncompressedSize = 0; | |
} | |
else { | |
$crc32 = $self->crc32(); | |
$compressedSize = 0xffffffff; | |
$uncompressedSize = 0xffffffff; | |
$zip64CompressedSize = $self->_writeOffset(); | |
$zip64UncompressedSize = $self->uncompressedSize(); | |
} | |
$localExtraField .= pack('S< S< Q< Q<', | |
0x0001, 16, | |
$zip64UncompressedSize, | |
$zip64CompressedSize); | |
} | |
my $fileNameLength = length($self->fileNameAsBytes()); | |
my $localFieldLength = length($localExtraField); | |
my $signatureData = pack(SIGNATURE_FORMAT, LOCAL_FILE_HEADER_SIGNATURE); | |
$self->_print($fh, $signatureData) | |
or return _ioError("writing local header signature"); | |
my $header = | |
pack(LOCAL_FILE_HEADER_FORMAT, | |
$versionNeededToExtract, | |
$self->{'bitFlag'}, | |
$self->desiredCompressionMethod(), | |
$self->lastModFileDateTime(), | |
$crc32, | |
$compressedSize, | |
$uncompressedSize, | |
$fileNameLength, | |
$localFieldLength); | |
$self->_print($fh, $header) | |
or return _ioError("writing local header"); | |
# Write these only if required | |
if (! $refresh || $zip64) { | |
if ($fileNameLength) { | |
$self->_print($fh, $self->fileNameAsBytes()) | |
or return _ioError("writing local header filename"); | |
} | |
if ($localFieldLength) { | |
$self->_print($fh, $localExtraField) | |
or return _ioError("writing local extra field"); | |
} | |
} | |
return | |
(AZ_OK, | |
LOCAL_FILE_HEADER_LENGTH + | |
SIGNATURE_LENGTH + | |
$fileNameLength + | |
$localFieldLength); | |
} | |
# Re-writes the local file header with new crc32 and compressedSize fields. | |
# To be called after writing the data stream. | |
# Assumes that filename and extraField sizes didn't change since last written. | |
sub _refreshLocalFileHeader { | |
my $self = shift; | |
my $fh = shift; | |
my $here = $fh->tell(); | |
$fh->seek($self->writeLocalHeaderRelativeOffset(), IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to rewrite local header"); | |
my ($status, undef) = $self->_writeLocalFileHeader($fh, 1); | |
return $status if $status != AZ_OK; | |
$fh->seek($here, IO::Seekable::SEEK_SET) | |
or return _ioError("seeking after rewrite of local header"); | |
return AZ_OK; | |
} | |
# Write central directory file header. | |
# Returns a pair (AZ_OK, $headerSize) on success. | |
sub _writeCentralDirectoryFileHeader { | |
my $self = shift; | |
my $fh = shift; | |
my $adz64m = shift; # $archiveDesiredZip64Mode | |
# (Re-)Determine whether to write zip64 format. Assume | |
# {'diskNumberStart'} is always zero. | |
my $zip64 = $adz64m == ZIP64_HEADERS | |
|| $self->desiredZip64Mode() == ZIP64_HEADERS | |
|| $self->_writeOffset() > 0xffffffff | |
|| $self->uncompressedSize() > 0xffffffff | |
|| $self->writeLocalHeaderRelativeOffset() > 0xffffffff; | |
$self->{'zip64'} ||= $zip64; | |
my $versionMadeBy = $self->versionMadeBy(); | |
my $versionNeededToExtract = $self->versionNeededToExtract(); | |
my $compressedSize = $self->_writeOffset(); | |
my $uncompressedSize = $self->uncompressedSize(); | |
my $localHeaderRelativeOffset = $self->writeLocalHeaderRelativeOffset(); | |
my $cdExtraField = $self->cdExtraField(); | |
if (!$zip64) { | |
# no-op | |
} | |
else { | |
return _zip64NotSupported() unless ZIP64_SUPPORTED; | |
$versionNeededToExtract = 45 if ($versionNeededToExtract < 45); | |
my $extraFieldFormat = ''; | |
my @extraFieldValues = (); | |
my $extraFieldSize = 0; | |
if ($uncompressedSize > 0xffffffff) { | |
$extraFieldFormat .= 'Q< '; | |
push(@extraFieldValues, $uncompressedSize); | |
$extraFieldSize += 8; | |
$uncompressedSize = 0xffffffff; | |
} | |
if ($compressedSize > 0xffffffff) { | |
$extraFieldFormat .= 'Q< '; | |
push(@extraFieldValues, $compressedSize); | |
$extraFieldSize += 8; | |
$compressedSize = 0xffffffff; | |
} | |
# Avoid empty zip64 extended information extra fields | |
if ( $localHeaderRelativeOffset > 0xffffffff | |
|| @extraFieldValues == 0) { | |
$extraFieldFormat .= 'Q< '; | |
push(@extraFieldValues, $localHeaderRelativeOffset); | |
$extraFieldSize += 8; | |
$localHeaderRelativeOffset = 0xffffffff; | |
} | |
$cdExtraField .= | |
pack("S< S< $extraFieldFormat", | |
0x0001, $extraFieldSize, | |
@extraFieldValues); | |
} | |
my $fileNameLength = length($self->fileNameAsBytes()); | |
my $extraFieldLength = length($cdExtraField); | |
my $fileCommentLength = length($self->fileComment()); | |
my $sigData = | |
pack(SIGNATURE_FORMAT, CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE); | |
$self->_print($fh, $sigData) | |
or return _ioError("writing central directory header signature"); | |
my $header = pack( | |
CENTRAL_DIRECTORY_FILE_HEADER_FORMAT, | |
$versionMadeBy, | |
$self->fileAttributeFormat(), | |
$versionNeededToExtract, | |
$self->bitFlag(), | |
$self->desiredCompressionMethod(), | |
$self->lastModFileDateTime(), | |
$self->crc32(), # these three fields should have been updated | |
$compressedSize, # by writing the data stream out | |
$uncompressedSize, # | |
$fileNameLength, | |
$extraFieldLength, | |
$fileCommentLength, | |
0, # {'diskNumberStart'}, | |
$self->internalFileAttributes(), | |
$self->externalFileAttributes(), | |
$localHeaderRelativeOffset); | |
$self->_print($fh, $header) | |
or return _ioError("writing central directory header"); | |
if ($fileNameLength) { | |
$self->_print($fh, $self->fileNameAsBytes()) | |
or return _ioError("writing central directory header signature"); | |
} | |
if ($extraFieldLength) { | |
$self->_print($fh, $cdExtraField) | |
or return _ioError("writing central directory extra field"); | |
} | |
if ($fileCommentLength) { | |
$self->_print($fh, $self->fileComment()) | |
or return _ioError("writing central directory file comment"); | |
} | |
# Update object members with information which might have | |
# changed while writing this member. We already did the | |
# zip64 flag. We must not update the extra fields with any | |
# zip64 information, since we consider that internal. | |
$self->{'versionNeededToExtract'} = $versionNeededToExtract; | |
$self->{'compressedSize'} = $self->_writeOffset(); | |
return | |
(AZ_OK, | |
CENTRAL_DIRECTORY_FILE_HEADER_LENGTH + | |
SIGNATURE_LENGTH + | |
$fileNameLength + | |
$extraFieldLength + | |
$fileCommentLength) | |
} | |
# This writes a data descriptor to the given file handle. | |
# Assumes that crc32, writeOffset, and uncompressedSize are | |
# set correctly (they should be after a write). | |
# Returns a pair (AZ_OK, $dataDescriptorSize) on success. | |
# Further, the local file header should have the | |
# GPBF_HAS_DATA_DESCRIPTOR_MASK bit set. | |
sub _writeDataDescriptor { | |
my $self = shift; | |
my $fh = shift; | |
my $descriptor; | |
if (! $self->zip64()) { | |
$descriptor = | |
pack(SIGNATURE_FORMAT . DATA_DESCRIPTOR_FORMAT, | |
DATA_DESCRIPTOR_SIGNATURE, | |
$self->crc32(), | |
$self->_writeOffset(), # compressed size | |
$self->uncompressedSize()); | |
} | |
else { | |
return _zip64NotSupported() unless ZIP64_SUPPORTED; | |
$descriptor = | |
pack(SIGNATURE_FORMAT . DATA_DESCRIPTOR_ZIP64_FORMAT, | |
DATA_DESCRIPTOR_SIGNATURE, | |
$self->crc32(), | |
$self->_writeOffset(), # compressed size | |
$self->uncompressedSize()); | |
} | |
$self->_print($fh, $descriptor) | |
or return _ioError("writing data descriptor"); | |
return (AZ_OK, length($descriptor)); | |
} | |
sub readChunk { | |
my $self = shift; | |
my $chunkSize = (ref($_[0]) eq 'HASH') ? $_[0]->{chunkSize} : $_[0]; | |
if ($self->readIsDone()) { | |
$self->endRead(); | |
my $dummy = ''; | |
return (\$dummy, AZ_STREAM_END); | |
} | |
$chunkSize = $Archive::Zip::ChunkSize if not defined($chunkSize); | |
$chunkSize = $self->_readDataRemaining() | |
if $chunkSize > $self->_readDataRemaining(); | |
my $buffer = ''; | |
my $outputRef; | |
my ($bytesRead, $status) = $self->_readRawChunk(\$buffer, $chunkSize); | |
return (\$buffer, $status) unless $status == AZ_OK; | |
$buffer && $self->isEncrypted and $buffer = $self->_decode($buffer); | |
$self->{'readDataRemaining'} -= $bytesRead; | |
$self->{'readOffset'} += $bytesRead; | |
if ($self->compressionMethod() == COMPRESSION_STORED) { | |
$self->{'crc32'} = $self->computeCRC32($buffer, $self->{'crc32'}); | |
} | |
($outputRef, $status) = &{$self->{'chunkHandler'}}($self, \$buffer); | |
$self->{'writeOffset'} += length($$outputRef); | |
$self->endRead() | |
if $self->readIsDone(); | |
return ($outputRef, $status); | |
} | |
# Read the next raw chunk of my data. Subclasses MUST implement. | |
# my ( $bytesRead, $status) = $self->_readRawChunk( \$buffer, $chunkSize ); | |
sub _readRawChunk { | |
my $self = shift; | |
return $self->_subclassResponsibility(); | |
} | |
# A place holder to catch rewindData errors if someone ignores | |
# the error code. | |
sub _noChunk { | |
my $self = shift; | |
return (\undef, _error("trying to copy chunk when init failed")); | |
} | |
# Basically a no-op so that I can have a consistent interface. | |
# ( $outputRef, $status) = $self->_copyChunk( \$buffer ); | |
sub _copyChunk { | |
my ($self, $dataRef) = @_; | |
return ($dataRef, AZ_OK); | |
} | |
# ( $outputRef, $status) = $self->_deflateChunk( \$buffer ); | |
sub _deflateChunk { | |
my ($self, $buffer) = @_; | |
my ($status) = $self->_deflater()->deflate($buffer, my $out); | |
if ($self->_readDataRemaining() == 0) { | |
my $extraOutput; | |
($status) = $self->_deflater()->flush($extraOutput); | |
$out .= $extraOutput; | |
$self->endRead(); | |
return (\$out, AZ_STREAM_END); | |
} elsif ($status == Z_OK) { | |
return (\$out, AZ_OK); | |
} else { | |
$self->endRead(); | |
my $retval = _error('deflate error', $status); | |
my $dummy = ''; | |
return (\$dummy, $retval); | |
} | |
} | |
# ( $outputRef, $status) = $self->_inflateChunk( \$buffer ); | |
sub _inflateChunk { | |
my ($self, $buffer) = @_; | |
my ($status) = $self->_inflater()->inflate($buffer, my $out); | |
my $retval; | |
$self->endRead() unless $status == Z_OK; | |
if ($status == Z_OK || $status == Z_STREAM_END) { | |
$retval = ($status == Z_STREAM_END) ? AZ_STREAM_END : AZ_OK; | |
return (\$out, $retval); | |
} else { | |
$retval = _error('inflate error', $status); | |
my $dummy = ''; | |
return (\$dummy, $retval); | |
} | |
} | |
sub rewindData { | |
my $self = shift; | |
my $status; | |
# set to trap init errors | |
$self->{'chunkHandler'} = $self->can('_noChunk'); | |
# Work around WinZip bug with 0-length DEFLATED files | |
$self->desiredCompressionMethod(COMPRESSION_STORED) | |
if $self->uncompressedSize() == 0; | |
# assume that we're going to read the whole file, and compute the CRC anew. | |
$self->{'crc32'} = 0 | |
if ($self->compressionMethod() == COMPRESSION_STORED); | |
# These are the only combinations of methods we deal with right now. | |
if ( $self->compressionMethod() == COMPRESSION_STORED | |
and $self->desiredCompressionMethod() == COMPRESSION_DEFLATED) { | |
($self->{'deflater'}, $status) = Compress::Raw::Zlib::Deflate->new( | |
'-Level' => $self->desiredCompressionLevel(), | |
'-WindowBits' => -MAX_WBITS(), # necessary magic | |
'-Bufsize' => $Archive::Zip::ChunkSize, | |
@_ | |
); # pass additional options | |
return _error('deflateInit error:', $status) | |
unless $status == Z_OK; | |
$self->{'chunkHandler'} = $self->can('_deflateChunk'); | |
} elsif ($self->compressionMethod() == COMPRESSION_DEFLATED | |
and $self->desiredCompressionMethod() == COMPRESSION_STORED) { | |
($self->{'inflater'}, $status) = Compress::Raw::Zlib::Inflate->new( | |
'-WindowBits' => -MAX_WBITS(), # necessary magic | |
'-Bufsize' => $Archive::Zip::ChunkSize, | |
@_ | |
); # pass additional options | |
return _error('inflateInit error:', $status) | |
unless $status == Z_OK; | |
$self->{'chunkHandler'} = $self->can('_inflateChunk'); | |
} elsif ($self->compressionMethod() == $self->desiredCompressionMethod()) { | |
$self->{'chunkHandler'} = $self->can('_copyChunk'); | |
} else { | |
return _error( | |
sprintf( | |
"Unsupported compression combination: read %d, write %d", | |
$self->compressionMethod(), | |
$self->desiredCompressionMethod())); | |
} | |
$self->{'readDataRemaining'} = | |
($self->compressionMethod() == COMPRESSION_STORED) | |
? $self->uncompressedSize() | |
: $self->compressedSize(); | |
$self->{'dataEnded'} = 0; | |
$self->{'readOffset'} = 0; | |
return AZ_OK; | |
} | |
sub endRead { | |
my $self = shift; | |
delete $self->{'inflater'}; | |
delete $self->{'deflater'}; | |
$self->{'dataEnded'} = 1; | |
$self->{'readDataRemaining'} = 0; | |
return AZ_OK; | |
} | |
sub readIsDone { | |
my $self = shift; | |
return ($self->_dataEnded() or !$self->_readDataRemaining()); | |
} | |
sub contents { | |
my $self = shift; | |
my $newContents = shift; | |
if (defined($newContents)) { | |
# Change our type and ensure that succeeded to avoid | |
# endless recursion | |
$self->_become('Archive::Zip::StringMember'); | |
$self->_ISA('Archive::Zip::StringMember') or | |
return | |
wantarray | |
? (undef, $self->_error('becoming Archive::Zip::StringMember')) | |
: undef; | |
# Now call the subclass contents method | |
my $retval = | |
$self->contents(pack('C0a*', $newContents)); # in case of Unicode | |
return wantarray ? ($retval, AZ_OK) : $retval; | |
} else { | |
my $oldCompression = | |
$self->desiredCompressionMethod(COMPRESSION_STORED); | |
my $status = $self->rewindData(@_); | |
if ($status != AZ_OK) { | |
$self->endRead(); | |
return wantarray ? (undef, $status) : undef; | |
} | |
my $retval = ''; | |
while ($status == AZ_OK) { | |
my $ref; | |
($ref, $status) = $self->readChunk($self->_readDataRemaining()); | |
# did we get it in one chunk? | |
if (length($$ref) == $self->uncompressedSize()) { | |
$retval = $$ref; | |
} else { | |
$retval .= $$ref | |
} | |
} | |
$self->desiredCompressionMethod($oldCompression); | |
$self->endRead(); | |
$status = AZ_OK if $status == AZ_STREAM_END; | |
$retval = undef unless $status == AZ_OK; | |
return wantarray ? ($retval, $status) : $retval; | |
} | |
} | |
sub extractToFileHandle { | |
my $self = shift; | |
# This can be the link name when "extracting" symbolic links | |
my $fhOrName = (ref($_[0]) eq 'HASH') ? shift->{fileHandle} : shift; | |
_binmode($fhOrName) if ref($fhOrName); | |
my $oldCompression = $self->desiredCompressionMethod(COMPRESSION_STORED); | |
my $status = $self->rewindData(@_); | |
$status = $self->_writeData($fhOrName) if $status == AZ_OK; | |
$self->desiredCompressionMethod($oldCompression); | |
$self->endRead(); | |
return $status; | |
} | |
# write local header and data stream to file handle. | |
# Returns a pair ($status, $memberSize) if successful. | |
# Stores the offset to the start of the header in my | |
# writeLocalHeaderRelativeOffset member. | |
sub _writeToFileHandle { | |
my $self = shift; | |
my $fh = shift; | |
my $fhIsSeekable = shift; | |
my $offset = shift; | |
my $adz64m = shift; # $archiveDesiredZip64Mode | |
return _error("no member name given for $self") | |
if $self->fileName() eq ''; | |
$self->{'writeLocalHeaderRelativeOffset'} = $offset; | |
# Determine if I need to refresh the header in a second pass | |
# later. If in doubt, I'd rather refresh, since it does not | |
# seem to be worth the hassle to save the extra seeks and | |
# writes. In addition, having below condition independent of | |
# any specific compression methods helps me piping through | |
# members with unknown compression methods unchanged. See | |
# test t/26_bzip2.t for details. | |
my $headerFieldsUnknown = $self->uncompressedSize() > 0; | |
# Determine if I need to write a data descriptor | |
# I need to do this if I can't refresh the header | |
# and I don't know compressed size or crc32 fields. | |
my $shouldWriteDataDescriptor = | |
($headerFieldsUnknown and not $fhIsSeekable); | |
$self->hasDataDescriptor(1) | |
if ($shouldWriteDataDescriptor); | |
# Determine whether to write zip64 format | |
my $zip64 = $adz64m == ZIP64_HEADERS | |
|| $self->desiredZip64Mode() == ZIP64_HEADERS | |
|| $self->uncompressedSize() > 0xffffffff; | |
$self->{'zip64'} ||= $zip64; | |
$self->{'writeOffset'} = 0; | |
my $status = $self->rewindData(); | |
return $status if $status != AZ_OK; | |
my $memberSize; | |
($status, $memberSize) = $self->_writeLocalFileHeader($fh); | |
return $status if $status != AZ_OK; | |
$status = $self->_writeData($fh); | |
return $status if $status != AZ_OK; | |
$memberSize += $self->_writeOffset(); | |
if ($self->hasDataDescriptor()) { | |
my $ddSize; | |
($status, $ddSize) = $self->_writeDataDescriptor($fh); | |
$memberSize += $ddSize; | |
} elsif ($headerFieldsUnknown) { | |
$status = $self->_refreshLocalFileHeader($fh); | |
} | |
return $status if $status != AZ_OK; | |
return ($status, $memberSize); | |
} | |
# Copy my (possibly compressed) data to given file handle. | |
# Returns C<AZ_OK> on success | |
sub _writeData { | |
my $self = shift; | |
my $fhOrName = shift; | |
if ($self->isSymbolicLink() && OS_SUPPORTS_SYMLINK) { | |
my $chunkSize = $Archive::Zip::ChunkSize; | |
my ($outRef, $status) = $self->readChunk($chunkSize); | |
symlink($$outRef, $fhOrName) | |
or return _ioError("creating symbolic link"); | |
} else { | |
return AZ_OK if ($self->uncompressedSize() == 0); | |
my $status; | |
my $chunkSize = $Archive::Zip::ChunkSize; | |
while ($self->_readDataRemaining() > 0) { | |
my $outRef; | |
($outRef, $status) = $self->readChunk($chunkSize); | |
return $status if ($status != AZ_OK and $status != AZ_STREAM_END); | |
if (length($$outRef) > 0) { | |
$self->_print($fhOrName, $$outRef) | |
or return _ioError("write error during copy"); | |
} | |
last if $status == AZ_STREAM_END; | |
} | |
} | |
return AZ_OK; | |
} | |
# Return true if I depend on the named file | |
sub _usesFileNamed { | |
return 0; | |
} | |
# ############################################################################## | |
# | |
# Decrypt section | |
# | |
# H.Merijn Brand (Tux) 2011-06-28 | |
# | |
# ############################################################################## | |
# This code is derived from the crypt source of unzip-6.0 dated 05 Jan 2007 | |
# Its license states: | |
# | |
# --8<--- | |
# Copyright (c) 1990-2007 Info-ZIP. All rights reserved. | |
# See the accompanying file LICENSE, version 2005-Feb-10 or later | |
# (the contents of which are also included in (un)zip.h) for terms of use. | |
# If, for some reason, all these files are missing, the Info-ZIP license | |
# also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html | |
# | |
# crypt.c (full version) by Info-ZIP. Last revised: [see crypt.h] | |
# The main encryption/decryption source code for Info-Zip software was | |
# originally written in Europe. To the best of our knowledge, it can | |
# be freely distributed in both source and object forms from any country, | |
# including the USA under License Exception TSU of the U.S. Export | |
# Administration Regulations (section 740.13(e)) of 6 June 2002. | |
# NOTE on copyright history: | |
# Previous versions of this source package (up to version 2.8) were | |
# not copyrighted and put in the public domain. If you cannot comply | |
# with the Info-Zip LICENSE, you may want to look for one of those | |
# public domain versions. | |
# | |
# This encryption code is a direct transcription of the algorithm from | |
# Roger Schlafly, described by Phil Katz in the file appnote.txt. This | |
# file (appnote.txt) is distributed with the PKZIP program (even in the | |
# version without encryption capabilities). | |
# -->8--- | |
# As of January 2000, US export regulations were amended to allow export | |
# of free encryption source code from the US. As of June 2002, these | |
# regulations were further relaxed to allow export of encryption binaries | |
# associated with free encryption source code. The Zip 2.31, UnZip 5.52 | |
# and Wiz 5.02 archives now include full crypto source code. As of the | |
# Zip 2.31 release, all official binaries include encryption support; the | |
# former "zcr" archives ceased to exist. | |
# (Note that restrictions may still exist in other countries, of course.) | |
# For now, we just support the decrypt stuff | |
# All below methods are supposed to be private | |
# use Data::Peek; | |
my @keys; | |
my @crct = do { | |
my $xor = 0xedb88320; | |
my @crc = (0) x 1024; | |
# generate a crc for every 8-bit value | |
foreach my $n (0 .. 255) { | |
my $c = $n; | |
$c = $c & 1 ? $xor ^ ($c >> 1) : $c >> 1 for 1 .. 8; | |
$crc[$n] = _revbe($c); | |
} | |
# generate crc for each value followed by one, two, and three zeros */ | |
foreach my $n (0 .. 255) { | |
my $c = ($crc[($crc[$n] >> 24) ^ 0] ^ ($crc[$n] << 8)) & 0xffffffff; | |
$crc[$_ * 256 + $n] = $c for 1 .. 3; | |
} | |
map { _revbe($crc[$_]) } 0 .. 1023; | |
}; | |
sub _crc32 { | |
my ($c, $b) = @_; | |
return ($crct[($c ^ $b) & 0xff] ^ ($c >> 8)); | |
} # _crc32 | |
sub _revbe { | |
my $w = shift; | |
return (($w >> 24) + | |
(($w >> 8) & 0xff00) + | |
(($w & 0xff00) << 8) + | |
(($w & 0xff) << 24)); | |
} # _revbe | |
sub _update_keys { | |
use integer; | |
my $c = shift; # signed int | |
$keys[0] = _crc32($keys[0], $c); | |
$keys[1] = (($keys[1] + ($keys[0] & 0xff)) * 0x08088405 + 1) & 0xffffffff; | |
my $keyshift = $keys[1] >> 24; | |
$keys[2] = _crc32($keys[2], $keyshift); | |
} # _update_keys | |
sub _zdecode ($) { | |
my $c = shift; | |
my $t = ($keys[2] & 0xffff) | 2; | |
_update_keys($c ^= ((($t * ($t ^ 1)) >> 8) & 0xff)); | |
return $c; | |
} # _zdecode | |
sub _decode { | |
my $self = shift; | |
my $buff = shift; | |
$self->isEncrypted or return $buff; | |
my $pass = $self->password; | |
defined $pass or return ""; | |
@keys = (0x12345678, 0x23456789, 0x34567890); | |
_update_keys($_) for unpack "C*", $pass; | |
# DDumper { uk => [ @keys ] }; | |
my $head = substr $buff, 0, 12, ""; | |
my @head = map { _zdecode($_) } unpack "C*", $head; | |
my $x = | |
$self->{externalFileAttributes} | |
? ($self->{lastModFileDateTime} >> 8) & 0xff | |
: $self->{crc32} >> 24; | |
$head[-1] == $x or return ""; # Password fail | |
# Worth checking ... | |
$self->{crc32c} = (unpack LOCAL_FILE_HEADER_FORMAT, pack "C*", @head)[3]; | |
# DHexDump ($buff); | |
$buff = pack "C*" => map { _zdecode($_) } unpack "C*" => $buff; | |
# DHexDump ($buff); | |
return $buff; | |
} # _decode | |
1; | |
ARCHIVE_ZIP_MEMBER | |
$fatpacked{"Archive/Zip/MemberRead.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_MEMBERREAD'; | |
package Archive::Zip::MemberRead; | |
=head1 NAME | |
Archive::Zip::MemberRead - A wrapper that lets you read Zip archive members as if they were files. | |
=cut | |
=head1 SYNOPSIS | |
use Archive::Zip; | |
use Archive::Zip::MemberRead; | |
$zip = Archive::Zip->new("file.zip"); | |
$fh = Archive::Zip::MemberRead->new($zip, "subdir/abc.txt"); | |
while (defined($line = $fh->getline())) | |
{ | |
print $fh->input_line_number . "#: $line\n"; | |
} | |
$read = $fh->read($buffer, 32*1024); | |
print "Read $read bytes as :$buffer:\n"; | |
=head1 DESCRIPTION | |
The Archive::Zip::MemberRead module lets you read Zip archive member data | |
just like you read data from files. | |
=head1 METHODS | |
=over 4 | |
=cut | |
use strict; | |
use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); | |
use vars qw{$VERSION}; | |
my $nl; | |
BEGIN { | |
$VERSION = '1.68'; | |
$VERSION = eval $VERSION; | |
# Requirement for newline conversion. Should check for e.g., DOS and OS/2 as well, but am too lazy. | |
$nl = $^O eq 'MSWin32' ? "\r\n" : "\n"; | |
} | |
=item Archive::Zip::Member::readFileHandle() | |
You can get a C<Archive::Zip::MemberRead> from an archive member by | |
calling C<readFileHandle()>: | |
my $member = $zip->memberNamed('abc/def.c'); | |
my $fh = $member->readFileHandle(); | |
while (defined($line = $fh->getline())) | |
{ | |
# ... | |
} | |
$fh->close(); | |
=cut | |
sub Archive::Zip::Member::readFileHandle { | |
return Archive::Zip::MemberRead->new(shift()); | |
} | |
=item Archive::Zip::MemberRead->new($zip, $fileName) | |
=item Archive::Zip::MemberRead->new($zip, $member) | |
=item Archive::Zip::MemberRead->new($member) | |
Construct a new Archive::Zip::MemberRead on the specified member. | |
my $fh = Archive::Zip::MemberRead->new($zip, 'fred.c') | |
=cut | |
sub new { | |
my ($class, $zip, $file) = @_; | |
my ($self, $member); | |
if ($zip && $file) # zip and filename, or zip and member | |
{ | |
$member = ref($file) ? $file : $zip->memberNamed($file); | |
} elsif ($zip && !$file && ref($zip)) # just member | |
{ | |
$member = $zip; | |
} else { | |
die( | |
'Archive::Zip::MemberRead::new needs a zip and filename, zip and member, or member' | |
); | |
} | |
$self = {}; | |
bless($self, $class); | |
$self->set_member($member); | |
return $self; | |
} | |
sub set_member { | |
my ($self, $member) = @_; | |
$self->{member} = $member; | |
$self->set_compression(COMPRESSION_STORED); | |
$self->rewind(); | |
} | |
sub set_compression { | |
my ($self, $compression) = @_; | |
$self->{member}->desiredCompressionMethod($compression) if $self->{member}; | |
} | |
=item setLineEnd(expr) | |
Set the line end character to use. This is set to \n by default | |
except on Windows systems where it is set to \r\n. You will | |
only need to set this on systems which are not Windows or Unix | |
based and require a line end different from \n. | |
This is a class method so call as C<Archive::Zip::MemberRead>->C<setLineEnd($nl)> | |
=cut | |
sub setLineEnd { | |
shift; | |
$nl = shift; | |
} | |
=item rewind() | |
Rewinds an C<Archive::Zip::MemberRead> so that you can read from it again | |
starting at the beginning. | |
=cut | |
sub rewind { | |
my $self = shift; | |
$self->_reset_vars(); | |
$self->{member}->rewindData() if $self->{member}; | |
} | |
sub _reset_vars { | |
my $self = shift; | |
$self->{line_no} = 0; | |
$self->{at_end} = 0; | |
delete $self->{buffer}; | |
} | |
=item input_record_separator(expr) | |
If the argument is given, input_record_separator for this | |
instance is set to it. The current setting (which may be | |
the global $/) is always returned. | |
=cut | |
sub input_record_separator { | |
my $self = shift; | |
if (@_) { | |
$self->{sep} = shift; | |
$self->{sep_re} = | |
_sep_as_re($self->{sep}); # Cache the RE as an optimization | |
} | |
return exists $self->{sep} ? $self->{sep} : $/; | |
} | |
# Return the input_record_separator in use as an RE fragment | |
# Note that if we have a per-instance input_record_separator | |
# we can just return the already converted value. Otherwise, | |
# the conversion must be done on $/ every time since we cannot | |
# know whether it has changed or not. | |
sub _sep_re { | |
my $self = shift; | |
# Important to phrase this way: sep's value may be undef. | |
return exists $self->{sep} ? $self->{sep_re} : _sep_as_re($/); | |
} | |
# Convert the input record separator into an RE and return it. | |
sub _sep_as_re { | |
my $sep = shift; | |
if (defined $sep) { | |
if ($sep eq '') { | |
return "(?:$nl){2,}"; | |
} else { | |
$sep =~ s/\n/$nl/og; | |
return quotemeta $sep; | |
} | |
} else { | |
return undef; | |
} | |
} | |
=item input_line_number() | |
Returns the current line number, but only if you're using C<getline()>. | |
Using C<read()> will not update the line number. | |
=cut | |
sub input_line_number { | |
my $self = shift; | |
return $self->{line_no}; | |
} | |
=item close() | |
Closes the given file handle. | |
=cut | |
sub close { | |
my $self = shift; | |
$self->_reset_vars(); | |
$self->{member}->endRead(); | |
} | |
=item buffer_size([ $size ]) | |
Gets or sets the buffer size used for reads. | |
Default is the chunk size used by Archive::Zip. | |
=cut | |
sub buffer_size { | |
my ($self, $size) = @_; | |
if (!$size) { | |
return $self->{chunkSize} || Archive::Zip::chunkSize(); | |
} else { | |
$self->{chunkSize} = $size; | |
} | |
} | |
=item getline() | |
Returns the next line from the currently open member. | |
Makes sense only for text files. | |
A read error is considered fatal enough to die. | |
Returns undef on eof. All subsequent calls would return undef, | |
unless a rewind() is called. | |
Note: The line returned has the input_record_separator (default: newline) removed. | |
=item getline( { preserve_line_ending => 1 } ) | |
Returns the next line including the line ending. | |
=cut | |
sub getline { | |
my ($self, $argref) = @_; | |
my $size = $self->buffer_size(); | |
my $sep = $self->_sep_re(); | |
my $preserve_line_ending; | |
if (ref $argref eq 'HASH') { | |
$preserve_line_ending = $argref->{'preserve_line_ending'}; | |
$sep =~ s/\\([^A-Za-z_0-9])+/$1/g; | |
} | |
for (; ;) { | |
if ( $sep | |
&& defined($self->{buffer}) | |
&& $self->{buffer} =~ s/^(.*?)$sep//s) { | |
my $line = $1; | |
$self->{line_no}++; | |
if ($preserve_line_ending) { | |
return $line . $sep; | |
} else { | |
return $line; | |
} | |
} elsif ($self->{at_end}) { | |
$self->{line_no}++ if $self->{buffer}; | |
return delete $self->{buffer}; | |
} | |
my ($temp, $status) = $self->{member}->readChunk($size); | |
if ($status != AZ_OK && $status != AZ_STREAM_END) { | |
die "ERROR: Error reading chunk from archive - $status"; | |
} | |
$self->{at_end} = $status == AZ_STREAM_END; | |
$self->{buffer} .= $$temp; | |
} | |
} | |
=item read($buffer, $num_bytes_to_read) | |
Simulates a normal C<read()> system call. | |
Returns the no. of bytes read. C<undef> on error, 0 on eof, I<e.g.>: | |
$fh = Archive::Zip::MemberRead->new($zip, "sreeji/secrets.bin"); | |
while (1) | |
{ | |
$read = $fh->read($buffer, 1024); | |
die "FATAL ERROR reading my secrets !\n" if (!defined($read)); | |
last if (!$read); | |
# Do processing. | |
.... | |
} | |
=cut | |
# | |
# All these $_ are required to emulate read(). | |
# | |
sub read { | |
my $self = $_[0]; | |
my $size = $_[2]; | |
my ($temp, $status, $ret); | |
($temp, $status) = $self->{member}->readChunk($size); | |
if ($status != AZ_OK && $status != AZ_STREAM_END) { | |
$_[1] = undef; | |
$ret = undef; | |
} else { | |
$_[1] = $$temp; | |
$ret = length($$temp); | |
} | |
return $ret; | |
} | |
1; | |
=back | |
=head1 AUTHOR | |
Sreeji K. Das E<lt>sreeji_k@yahoo.comE<gt> | |
See L<Archive::Zip> by Ned Konz without which this module does not make | |
any sense! | |
Minor mods by Ned Konz. | |
=head1 COPYRIGHT | |
Copyright 2002 Sreeji K. Das. | |
This program is free software; you can redistribute it and/or modify it under | |
the same terms as Perl itself. | |
=cut | |
ARCHIVE_ZIP_MEMBERREAD | |
$fatpacked{"Archive/Zip/MockFileHandle.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_MOCKFILEHANDLE'; | |
package Archive::Zip::MockFileHandle; | |
# Output file handle that calls a custom write routine | |
# Ned Konz, March 2000 | |
# This is provided to help with writing zip files | |
# when you have to process them a chunk at a time. | |
use strict; | |
use vars qw{$VERSION}; | |
BEGIN { | |
$VERSION = '1.68'; | |
$VERSION = eval $VERSION; | |
} | |
sub new { | |
my $class = shift || __PACKAGE__; | |
$class = ref($class) || $class; | |
my $self = bless( | |
{ | |
'position' => 0, | |
'size' => 0 | |
}, | |
$class | |
); | |
return $self; | |
} | |
sub eof { | |
my $self = shift; | |
return $self->{'position'} >= $self->{'size'}; | |
} | |
# Copy given buffer to me | |
sub print { | |
my $self = shift; | |
my $bytes = join('', @_); | |
my $bytesWritten = $self->writeHook($bytes); | |
if ($self->{'position'} + $bytesWritten > $self->{'size'}) { | |
$self->{'size'} = $self->{'position'} + $bytesWritten; | |
} | |
$self->{'position'} += $bytesWritten; | |
return $bytesWritten; | |
} | |
# Called on each write. | |
# Override in subclasses. | |
# Return number of bytes written (0 on error). | |
sub writeHook { | |
my $self = shift; | |
my $bytes = shift; | |
return length($bytes); | |
} | |
sub binmode { 1 } | |
sub close { 1 } | |
sub clearerr { 1 } | |
# I'm write-only! | |
sub read { 0 } | |
sub tell { return shift->{'position'} } | |
sub opened { 1 } | |
1; | |
ARCHIVE_ZIP_MOCKFILEHANDLE | |
$fatpacked{"Archive/Zip/NewFileMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_NEWFILEMEMBER'; | |
package Archive::Zip::NewFileMember; | |
use strict; | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw ( Archive::Zip::FileMember ); | |
} | |
use Archive::Zip qw( | |
:CONSTANTS | |
:ERROR_CODES | |
:UTILITY_METHODS | |
); | |
# Given a file name, set up for eventual writing. | |
sub _newFromFileNamed { | |
my $class = shift; | |
my $fileName = shift; # local FS format | |
my $newName = shift; | |
$newName = _asZipDirName($fileName) unless defined($newName); | |
return undef unless (stat($fileName) && -r _ && !-d _ ); | |
my $self = $class->new(@_); | |
$self->{'fileName'} = $newName; | |
$self->{'externalFileName'} = $fileName; | |
$self->{'compressionMethod'} = COMPRESSION_STORED; | |
my @stat = stat(_); | |
$self->{'compressedSize'} = $self->{'uncompressedSize'} = $stat[7]; | |
$self->desiredCompressionMethod( | |
($self->compressedSize() > 0) | |
? COMPRESSION_DEFLATED | |
: COMPRESSION_STORED | |
); | |
$self->unixFileAttributes($stat[2]); | |
$self->setLastModFileDateTimeFromUnix($stat[9]); | |
$self->isTextFile(-T _ ); | |
return $self; | |
} | |
sub rewindData { | |
my $self = shift; | |
my $status = $self->SUPER::rewindData(@_); | |
return $status unless $status == AZ_OK; | |
return AZ_IO_ERROR unless $self->fh(); | |
$self->fh()->clearerr(); | |
$self->fh()->seek(0, IO::Seekable::SEEK_SET) | |
or return _ioError("rewinding", $self->externalFileName()); | |
return AZ_OK; | |
} | |
# Return bytes read. Note that first parameter is a ref to a buffer. | |
# my $data; | |
# my ( $bytesRead, $status) = $self->readRawChunk( \$data, $chunkSize ); | |
sub _readRawChunk { | |
my ($self, $dataRef, $chunkSize) = @_; | |
return (0, AZ_OK) unless $chunkSize; | |
my $bytesRead = $self->fh()->read($$dataRef, $chunkSize) | |
or return (0, _ioError("reading data")); | |
return ($bytesRead, AZ_OK); | |
} | |
# If I already exist, extraction is a no-op. | |
sub extractToFileNamed { | |
my $self = shift; | |
my $name = shift; # local FS name | |
if (File::Spec->rel2abs($name) eq | |
File::Spec->rel2abs($self->externalFileName()) and -r $name) { | |
return AZ_OK; | |
} else { | |
return $self->SUPER::extractToFileNamed($name, @_); | |
} | |
} | |
1; | |
ARCHIVE_ZIP_NEWFILEMEMBER | |
$fatpacked{"Archive/Zip/StringMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_STRINGMEMBER'; | |
package Archive::Zip::StringMember; | |
use strict; | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw( Archive::Zip::Member ); | |
} | |
use Archive::Zip qw( | |
:CONSTANTS | |
:ERROR_CODES | |
); | |
# Create a new string member. Default is COMPRESSION_STORED. | |
# Can take a ref to a string as well. | |
sub _newFromString { | |
my $class = shift; | |
my $string = shift; | |
my $name = shift; | |
my $self = $class->new(@_); | |
$self->contents($string); | |
$self->fileName($name) if defined($name); | |
# Set the file date to now | |
$self->setLastModFileDateTimeFromUnix(time()); | |
$self->unixFileAttributes($self->DEFAULT_FILE_PERMISSIONS); | |
return $self; | |
} | |
sub _become { | |
my $self = shift; | |
my $newClass = shift; | |
return $self if ref($self) eq $newClass; | |
delete($self->{'contents'}); | |
return $self->SUPER::_become($newClass); | |
} | |
# Get or set my contents. Note that we do not call the superclass | |
# version of this, because it calls us. | |
sub contents { | |
my $self = shift; | |
my $string = shift; | |
if (defined($string)) { | |
$self->{'contents'} = | |
pack('C0a*', (ref($string) eq 'SCALAR') ? $$string : $string); | |
$self->{'uncompressedSize'} = $self->{'compressedSize'} = | |
length($self->{'contents'}); | |
$self->{'compressionMethod'} = COMPRESSION_STORED; | |
} | |
return wantarray ? ($self->{'contents'}, AZ_OK) : $self->{'contents'}; | |
} | |
# Return bytes read. Note that first parameter is a ref to a buffer. | |
# my $data; | |
# my ( $bytesRead, $status) = $self->readRawChunk( \$data, $chunkSize ); | |
sub _readRawChunk { | |
my ($self, $dataRef, $chunkSize) = @_; | |
$$dataRef = substr($self->contents(), $self->_readOffset(), $chunkSize); | |
return (length($$dataRef), AZ_OK); | |
} | |
1; | |
ARCHIVE_ZIP_STRINGMEMBER | |
$fatpacked{"Archive/Zip/Tree.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_TREE'; | |
package Archive::Zip::Tree; | |
use strict; | |
use vars qw{$VERSION}; | |
BEGIN { | |
$VERSION = '1.68'; | |
} | |
use Archive::Zip; | |
warn( | |
"Archive::Zip::Tree is deprecated; its methods have been moved into Archive::Zip." | |
) if $^W; | |
1; | |
__END__ | |
=head1 NAME | |
Archive::Zip::Tree - (DEPRECATED) methods for adding/extracting trees using Archive::Zip | |
=head1 DESCRIPTION | |
This module is deprecated, because all its methods were moved into the main | |
Archive::Zip module. | |
It is included in the distribution merely to avoid breaking old code. | |
See L<Archive::Zip>. | |
=head1 AUTHOR | |
Ned Konz, perl@bike-nomad.com | |
=head1 COPYRIGHT | |
Copyright (c) 2000-2002 Ned Konz. All rights reserved. This program is free | |
software; you can redistribute it and/or modify it under the same terms | |
as Perl itself. | |
=head1 SEE ALSO | |
L<Archive::Zip> | |
=cut | |
ARCHIVE_ZIP_TREE | |
$fatpacked{"Archive/Zip/ZipFileMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_ZIPFILEMEMBER'; | |
package Archive::Zip::ZipFileMember; | |
use strict; | |
use vars qw( $VERSION @ISA ); | |
BEGIN { | |
$VERSION = '1.68'; | |
@ISA = qw ( Archive::Zip::FileMember ); | |
} | |
use Archive::Zip qw( | |
:CONSTANTS | |
:ERROR_CODES | |
:PKZIP_CONSTANTS | |
:UTILITY_METHODS | |
); | |
# Create a new Archive::Zip::ZipFileMember | |
# given a filename and optional open file handle | |
# | |
sub _newFromZipFile { | |
my $class = shift; | |
my $fh = shift; | |
my $externalFileName = shift; | |
my $archiveZip64 = @_ ? shift : 0; | |
my $possibleEocdOffset = @_ ? shift : 0; # normally 0 | |
my $self = $class->new( | |
'eocdCrc32' => 0, | |
'diskNumberStart' => 0, | |
'localHeaderRelativeOffset' => 0, | |
'dataOffset' => 0, # localHeaderRelativeOffset + header length | |
@_ | |
); | |
$self->{'externalFileName'} = $externalFileName; | |
$self->{'fh'} = $fh; | |
$self->{'archiveZip64'} = $archiveZip64; | |
$self->{'possibleEocdOffset'} = $possibleEocdOffset; | |
return $self; | |
} | |
sub isDirectory { | |
my $self = shift; | |
return (substr($self->fileName, -1, 1) eq '/' | |
and $self->uncompressedSize == 0); | |
} | |
# Seek to the beginning of the local header, just past the signature. | |
# Verify that the local header signature is in fact correct. | |
# Update the localHeaderRelativeOffset if necessary by adding the possibleEocdOffset. | |
# Returns status. | |
sub _seekToLocalHeader { | |
my $self = shift; | |
my $where = shift; # optional | |
my $previousWhere = shift; # optional | |
$where = $self->localHeaderRelativeOffset() unless defined($where); | |
# avoid loop on certain corrupt files (from Julian Field) | |
return _formatError("corrupt zip file") | |
if defined($previousWhere) && $where == $previousWhere; | |
my $status; | |
my $signature; | |
$status = $self->fh()->seek($where, IO::Seekable::SEEK_SET); | |
return _ioError("seeking to local header") unless $status; | |
($status, $signature) = | |
_readSignature($self->fh(), $self->externalFileName(), | |
LOCAL_FILE_HEADER_SIGNATURE, 1); | |
return $status if $status == AZ_IO_ERROR; | |
# retry with EOCD offset if any was given. | |
if ($status == AZ_FORMAT_ERROR && $self->{'possibleEocdOffset'}) { | |
$status = $self->_seekToLocalHeader( | |
$self->localHeaderRelativeOffset() + $self->{'possibleEocdOffset'}, | |
$where | |
); | |
if ($status == AZ_OK) { | |
$self->{'localHeaderRelativeOffset'} += | |
$self->{'possibleEocdOffset'}; | |
$self->{'possibleEocdOffset'} = 0; | |
} | |
} | |
return $status; | |
} | |
# Because I'm going to delete the file handle, read the local file | |
# header if the file handle is seekable. If it is not, I assume that | |
# I've already read the local header. | |
# Return ( $status, $self ) | |
sub _become { | |
my $self = shift; | |
my $newClass = shift; | |
return $self if ref($self) eq $newClass; | |
my $status = AZ_OK; | |
if (_isSeekable($self->fh())) { | |
my $here = $self->fh()->tell(); | |
$status = $self->_seekToLocalHeader(); | |
$status = $self->_readLocalFileHeader() if $status == AZ_OK; | |
$self->fh()->seek($here, IO::Seekable::SEEK_SET); | |
return $status unless $status == AZ_OK; | |
} | |
delete($self->{'eocdCrc32'}); | |
delete($self->{'diskNumberStart'}); | |
delete($self->{'localHeaderRelativeOffset'}); | |
delete($self->{'dataOffset'}); | |
delete($self->{'archiveZip64'}); | |
delete($self->{'possibleEocdOffset'}); | |
return $self->SUPER::_become($newClass); | |
} | |
sub diskNumberStart { | |
shift->{'diskNumberStart'}; | |
} | |
sub localHeaderRelativeOffset { | |
shift->{'localHeaderRelativeOffset'}; | |
} | |
sub dataOffset { | |
shift->{'dataOffset'}; | |
} | |
# Skip local file header, updating only extra field stuff. | |
# Assumes that fh is positioned before signature. | |
sub _skipLocalFileHeader { | |
my $self = shift; | |
my $header; | |
my $bytesRead = $self->fh()->read($header, LOCAL_FILE_HEADER_LENGTH); | |
if ($bytesRead != LOCAL_FILE_HEADER_LENGTH) { | |
return _ioError("reading local file header"); | |
} | |
my $fileNameLength; | |
my $extraFieldLength; | |
my $bitFlag; | |
( | |
undef, # $self->{'versionNeededToExtract'}, | |
$bitFlag, | |
undef, # $self->{'compressionMethod'}, | |
undef, # $self->{'lastModFileDateTime'}, | |
undef, # $crc32, | |
undef, # $compressedSize, | |
undef, # $uncompressedSize, | |
$fileNameLength, | |
$extraFieldLength | |
) = unpack(LOCAL_FILE_HEADER_FORMAT, $header); | |
if ($fileNameLength) { | |
$self->fh()->seek($fileNameLength, IO::Seekable::SEEK_CUR) | |
or return _ioError("skipping local file name"); | |
} | |
my $zip64 = 0; | |
if ($extraFieldLength) { | |
$bytesRead = | |
$self->fh()->read($self->{'localExtraField'}, $extraFieldLength); | |
if ($bytesRead != $extraFieldLength) { | |
return _ioError("reading local extra field"); | |
} | |
if ($self->{'archiveZip64'}) { | |
my $status; | |
($status, $zip64) = | |
$self->_extractZip64ExtraField($self->{'localExtraField'}, undef, undef); | |
return $status if $status != AZ_OK; | |
$self->{'zip64'} ||= $zip64; | |
} | |
} | |
$self->{'dataOffset'} = $self->fh()->tell(); | |
if ($bitFlag & GPBF_HAS_DATA_DESCRIPTOR_MASK) { | |
# Read the crc32, compressedSize, and uncompressedSize from the | |
# extended data descriptor, which directly follows the compressed data. | |
# | |
# Skip over the compressed file data (assumes that EOCD compressedSize | |
# was correct) | |
$self->fh()->seek($self->{'compressedSize'}, IO::Seekable::SEEK_CUR) | |
or return _ioError("seeking to extended local header"); | |
# these values should be set correctly from before. | |
my $oldCrc32 = $self->{'eocdCrc32'}; | |
my $oldCompressedSize = $self->{'compressedSize'}; | |
my $oldUncompressedSize = $self->{'uncompressedSize'}; | |
my $status = $self->_readDataDescriptor($zip64); | |
return $status unless $status == AZ_OK; | |
# The buffer with encrypted data is prefixed with a new | |
# encrypted 12 byte header. The size only changes when | |
# the buffer is also compressed | |
$self->isEncrypted && $oldUncompressedSize > $self->{'uncompressedSize'} | |
and $oldUncompressedSize -= DATA_DESCRIPTOR_LENGTH; | |
return _formatError( | |
"CRC or size mismatch while skipping data descriptor") | |
if ( $oldCrc32 != $self->{'crc32'} | |
|| $oldUncompressedSize != $self->{'uncompressedSize'}); | |
$self->{'crc32'} = 0 | |
if $self->compressionMethod() == COMPRESSION_STORED ; | |
} | |
return AZ_OK; | |
} | |
# Read from a local file header into myself. Returns AZ_OK (in | |
# scalar context) or a pair (AZ_OK, $headerSize) (in list | |
# context) if successful. | |
# Assumes that fh is positioned after signature. | |
# Note that crc32, compressedSize, and uncompressedSize will be 0 if | |
# GPBF_HAS_DATA_DESCRIPTOR_MASK is set in the bitFlag. | |
sub _readLocalFileHeader { | |
my $self = shift; | |
my $header; | |
my $bytesRead = $self->fh()->read($header, LOCAL_FILE_HEADER_LENGTH); | |
if ($bytesRead != LOCAL_FILE_HEADER_LENGTH) { | |
return _ioError("reading local file header"); | |
} | |
my $fileNameLength; | |
my $crc32; | |
my $compressedSize; | |
my $uncompressedSize; | |
my $extraFieldLength; | |
( | |
$self->{'versionNeededToExtract'}, $self->{'bitFlag'}, | |
$self->{'compressionMethod'}, $self->{'lastModFileDateTime'}, | |
$crc32, $compressedSize, | |
$uncompressedSize, $fileNameLength, | |
$extraFieldLength | |
) = unpack(LOCAL_FILE_HEADER_FORMAT, $header); | |
if ($fileNameLength) { | |
my $fileName; | |
$bytesRead = $self->fh()->read($fileName, $fileNameLength); | |
if ($bytesRead != $fileNameLength) { | |
return _ioError("reading local file name"); | |
} | |
$self->fileName($fileName); | |
} | |
my $zip64 = 0; | |
if ($extraFieldLength) { | |
$bytesRead = | |
$self->fh()->read($self->{'localExtraField'}, $extraFieldLength); | |
if ($bytesRead != $extraFieldLength) { | |
return _ioError("reading local extra field"); | |
} | |
if ($self->{'archiveZip64'}) { | |
my $status; | |
($status, $zip64) = | |
$self->_extractZip64ExtraField($self->{'localExtraField'}, | |
$uncompressedSize, | |
$compressedSize); | |
return $status if $status != AZ_OK; | |
$self->{'zip64'} ||= $zip64; | |
} | |
} | |
$self->{'dataOffset'} = $self->fh()->tell(); | |
if ($self->hasDataDescriptor()) { | |
# Read the crc32, compressedSize, and uncompressedSize from the | |
# extended data descriptor. | |
# Skip over the compressed file data (assumes that EOCD compressedSize | |
# was correct) | |
$self->fh()->seek($self->{'compressedSize'}, IO::Seekable::SEEK_CUR) | |
or return _ioError("seeking to extended local header"); | |
my $status = $self->_readDataDescriptor($zip64); | |
return $status unless $status == AZ_OK; | |
} else { | |
return _formatError( | |
"CRC or size mismatch after reading data descriptor") | |
if ( $self->{'crc32'} != $crc32 | |
|| $self->{'uncompressedSize'} != $uncompressedSize); | |
} | |
return | |
wantarray | |
? (AZ_OK, | |
SIGNATURE_LENGTH, | |
LOCAL_FILE_HEADER_LENGTH + | |
$fileNameLength + | |
$extraFieldLength) | |
: AZ_OK; | |
} | |
# This will read the data descriptor, which is after the end of compressed file | |
# data in members that have GPBF_HAS_DATA_DESCRIPTOR_MASK set in their bitFlag. | |
# The only reliable way to find these is to rely on the EOCD compressedSize. | |
# Assumes that file is positioned immediately after the compressed data. | |
# Returns status; sets crc32, compressedSize, and uncompressedSize. | |
sub _readDataDescriptor { | |
my $self = shift; | |
my $zip64 = shift; | |
my $signatureData; | |
my $header; | |
my $crc32; | |
my $compressedSize; | |
my $uncompressedSize; | |
my $bytesRead = $self->fh()->read($signatureData, SIGNATURE_LENGTH); | |
return _ioError("reading header signature") | |
if $bytesRead != SIGNATURE_LENGTH; | |
my $signature = unpack(SIGNATURE_FORMAT, $signatureData); | |
my $dataDescriptorLength; | |
my $dataDescriptorFormat; | |
my $dataDescriptorLengthNoSig; | |
my $dataDescriptorFormatNoSig; | |
if (! $zip64) { | |
$dataDescriptorLength = DATA_DESCRIPTOR_LENGTH; | |
$dataDescriptorFormat = DATA_DESCRIPTOR_FORMAT; | |
$dataDescriptorLengthNoSig = DATA_DESCRIPTOR_LENGTH_NO_SIG; | |
$dataDescriptorFormatNoSig = DATA_DESCRIPTOR_FORMAT_NO_SIG | |
} | |
else { | |
$dataDescriptorLength = DATA_DESCRIPTOR_ZIP64_LENGTH; | |
$dataDescriptorFormat = DATA_DESCRIPTOR_ZIP64_FORMAT; | |
$dataDescriptorLengthNoSig = DATA_DESCRIPTOR_ZIP64_LENGTH_NO_SIG; | |
$dataDescriptorFormatNoSig = DATA_DESCRIPTOR_ZIP64_FORMAT_NO_SIG | |
} | |
# unfortunately, the signature appears to be optional. | |
if ($signature == DATA_DESCRIPTOR_SIGNATURE | |
&& ($signature != $self->{'crc32'})) { | |
$bytesRead = $self->fh()->read($header, $dataDescriptorLength); | |
return _ioError("reading data descriptor") | |
if $bytesRead != $dataDescriptorLength; | |
($crc32, $compressedSize, $uncompressedSize) = | |
unpack($dataDescriptorFormat, $header); | |
} else { | |
$bytesRead = $self->fh()->read($header, $dataDescriptorLengthNoSig); | |
return _ioError("reading data descriptor") | |
if $bytesRead != $dataDescriptorLengthNoSig; | |
$crc32 = $signature; | |
($compressedSize, $uncompressedSize) = | |
unpack($dataDescriptorFormatNoSig, $header); | |
} | |
$self->{'eocdCrc32'} = $self->{'crc32'} | |
unless defined($self->{'eocdCrc32'}); | |
$self->{'crc32'} = $crc32; | |
$self->{'compressedSize'} = $compressedSize; | |
$self->{'uncompressedSize'} = $uncompressedSize; | |
return AZ_OK; | |
} | |
# Read a Central Directory header. Return AZ_OK on success. | |
# Assumes that fh is positioned right after the signature. | |
sub _readCentralDirectoryFileHeader { | |
my $self = shift; | |
my $fh = $self->fh(); | |
my $header = ''; | |
my $bytesRead = $fh->read($header, CENTRAL_DIRECTORY_FILE_HEADER_LENGTH); | |
if ($bytesRead != CENTRAL_DIRECTORY_FILE_HEADER_LENGTH) { | |
return _ioError("reading central dir header"); | |
} | |
my ($fileNameLength, $extraFieldLength, $fileCommentLength); | |
( | |
$self->{'versionMadeBy'}, | |
$self->{'fileAttributeFormat'}, | |
$self->{'versionNeededToExtract'}, | |
$self->{'bitFlag'}, | |
$self->{'compressionMethod'}, | |
$self->{'lastModFileDateTime'}, | |
$self->{'crc32'}, | |
$self->{'compressedSize'}, | |
$self->{'uncompressedSize'}, | |
$fileNameLength, | |
$extraFieldLength, | |
$fileCommentLength, | |
$self->{'diskNumberStart'}, | |
$self->{'internalFileAttributes'}, | |
$self->{'externalFileAttributes'}, | |
$self->{'localHeaderRelativeOffset'} | |
) = unpack(CENTRAL_DIRECTORY_FILE_HEADER_FORMAT, $header); | |
$self->{'eocdCrc32'} = $self->{'crc32'}; | |
if ($fileNameLength) { | |
$bytesRead = $fh->read($self->{'fileName'}, $fileNameLength); | |
if ($bytesRead != $fileNameLength) { | |
_ioError("reading central dir filename"); | |
} | |
} | |
if ($extraFieldLength) { | |
$bytesRead = $fh->read($self->{'cdExtraField'}, $extraFieldLength); | |
if ($bytesRead != $extraFieldLength) { | |
return _ioError("reading central dir extra field"); | |
} | |
if ($self->{'archiveZip64'}) { | |
my ($status, $zip64) = | |
$self->_extractZip64ExtraField($self->{'cdExtraField'}, | |
$self->{'uncompressedSize'}, | |
$self->{'compressedSize'}, | |
$self->{'localHeaderRelativeOffset'}, | |
$self->{'diskNumberStart'}); | |
return $status if $status != AZ_OK; | |
$self->{'zip64'} ||= $zip64; | |
} | |
} | |
if ($fileCommentLength) { | |
$bytesRead = $fh->read($self->{'fileComment'}, $fileCommentLength); | |
if ($bytesRead != $fileCommentLength) { | |
return _ioError("reading central dir file comment"); | |
} | |
} | |
# NK 10/21/04: added to avoid problems with manipulated headers | |
if ( $self->{'uncompressedSize'} != $self->{'compressedSize'} | |
and $self->{'compressionMethod'} == COMPRESSION_STORED) { | |
$self->{'uncompressedSize'} = $self->{'compressedSize'}; | |
} | |
$self->desiredCompressionMethod($self->compressionMethod()); | |
return AZ_OK; | |
} | |
sub rewindData { | |
my $self = shift; | |
my $status = $self->SUPER::rewindData(@_); | |
return $status unless $status == AZ_OK; | |
return AZ_IO_ERROR unless $self->fh(); | |
$self->fh()->clearerr(); | |
# Seek to local file header. | |
# The only reason that I'm doing this this way is that the extraField | |
# length seems to be different between the CD header and the LF header. | |
$status = $self->_seekToLocalHeader(); | |
return $status unless $status == AZ_OK; | |
# skip local file header | |
$status = $self->_skipLocalFileHeader(); | |
return $status unless $status == AZ_OK; | |
# Seek to beginning of file data | |
$self->fh()->seek($self->dataOffset(), IO::Seekable::SEEK_SET) | |
or return _ioError("seeking to beginning of file data"); | |
return AZ_OK; | |
} | |
# Return bytes read. Note that first parameter is a ref to a buffer. | |
# my $data; | |
# my ( $bytesRead, $status) = $self->readRawChunk( \$data, $chunkSize ); | |
sub _readRawChunk { | |
my ($self, $dataRef, $chunkSize) = @_; | |
return (0, AZ_OK) unless $chunkSize; | |
my $bytesRead = $self->fh()->read($$dataRef, $chunkSize) | |
or return (0, _ioError("reading data")); | |
return ($bytesRead, AZ_OK); | |
} | |
1; | |
ARCHIVE_ZIP_ZIPFILEMEMBER | |
$fatpacked{"Carp.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CARP'; | |
package Carp; | |
{ use 5.006; } | |
use strict; | |
use warnings; | |
BEGIN { | |
# Very old versions of warnings.pm load Carp. This can go wrong due | |
# to the circular dependency. If warnings is invoked before Carp, | |
# then warnings starts by loading Carp, then Carp (above) tries to | |
# invoke warnings, and gets nothing because warnings is in the process | |
# of loading and hasn't defined its import method yet. If we were | |
# only turning on warnings ("use warnings" above) this wouldn't be too | |
# bad, because Carp would just gets the state of the -w switch and so | |
# might not get some warnings that it wanted. The real problem is | |
# that we then want to turn off Unicode warnings, but "no warnings | |
# 'utf8'" won't be effective if we're in this circular-dependency | |
# situation. So, if warnings.pm is an affected version, we turn | |
# off all warnings ourselves by directly setting ${^WARNING_BITS}. | |
# On unaffected versions, we turn off just Unicode warnings, via | |
# the proper API. | |
if(!defined($warnings::VERSION) || eval($warnings::VERSION) < 1.06) { | |
${^WARNING_BITS} = ""; | |
} else { | |
"warnings"->unimport("utf8"); | |
} | |
} | |
sub _fetch_sub { # fetch sub without autovivifying | |
my($pack, $sub) = @_; | |
$pack .= '::'; | |
# only works with top-level packages | |
return unless exists($::{$pack}); | |
for ($::{$pack}) { | |
return unless ref \$_ eq 'GLOB' && *$_{HASH} && exists $$_{$sub}; | |
for ($$_{$sub}) { | |
return ref \$_ eq 'GLOB' ? *$_{CODE} : undef | |
} | |
} | |
} | |
# UTF8_REGEXP_PROBLEM is a compile-time constant indicating whether Carp | |
# must avoid applying a regular expression to an upgraded (is_utf8) | |
# string. There are multiple problems, on different Perl versions, | |
# that require this to be avoided. All versions prior to 5.13.8 will | |
# load utf8_heavy.pl for the swash system, even if the regexp doesn't | |
# use character classes. Perl 5.6 and Perls [5.11.2, 5.13.11) exhibit | |
# specific problems when Carp is being invoked in the aftermath of a | |
# syntax error. | |
BEGIN { | |
if("$]" < 5.013011) { | |
*UTF8_REGEXP_PROBLEM = sub () { 1 }; | |
} else { | |
*UTF8_REGEXP_PROBLEM = sub () { 0 }; | |
} | |
} | |
# is_utf8() is essentially the utf8::is_utf8() function, which indicates | |
# whether a string is represented in the upgraded form (using UTF-8 | |
# internally). As utf8::is_utf8() is only available from Perl 5.8 | |
# onwards, extra effort is required here to make it work on Perl 5.6. | |
BEGIN { | |
if(defined(my $sub = _fetch_sub utf8 => 'is_utf8')) { | |
*is_utf8 = $sub; | |
} else { | |
# black magic for perl 5.6 | |
*is_utf8 = sub { unpack("C", "\xaa".$_[0]) != 170 }; | |
} | |
} | |
# The downgrade() function defined here is to be used for attempts to | |
# downgrade where it is acceptable to fail. It must be called with a | |
# second argument that is a true value. | |
BEGIN { | |
if(defined(my $sub = _fetch_sub utf8 => 'downgrade')) { | |
*downgrade = \&{"utf8::downgrade"}; | |
} else { | |
*downgrade = sub { | |
my $r = ""; | |
my $l = length($_[0]); | |
for(my $i = 0; $i != $l; $i++) { | |
my $o = ord(substr($_[0], $i, 1)); | |
return if $o > 255; | |
$r .= chr($o); | |
} | |
$_[0] = $r; | |
}; | |
} | |
} | |
# is_safe_printable_codepoint() indicates whether a character, specified | |
# by integer codepoint, is OK to output literally in a trace. Generally | |
# this is if it is a printable character in the ancestral character set | |
# (ASCII or EBCDIC). This is used on some Perls in situations where a | |
# regexp can't be used. | |
BEGIN { | |
*is_safe_printable_codepoint = | |
"$]" >= 5.007_003 ? | |
eval(q(sub ($) { | |
my $u = utf8::native_to_unicode($_[0]); | |
$u >= 0x20 && $u <= 0x7e; | |
})) | |
: ord("A") == 65 ? | |
sub ($) { $_[0] >= 0x20 && $_[0] <= 0x7e } | |
: | |
sub ($) { | |
# Early EBCDIC | |
# 3 EBCDIC code pages supported then; all controls but one | |
# are the code points below SPACE. The other one is 0x5F on | |
# POSIX-BC; FF on the other two. | |
# FIXME: there are plenty of unprintable codepoints other | |
# than those that this code and the comment above identifies | |
# as "controls". | |
$_[0] >= ord(" ") && $_[0] <= 0xff && | |
$_[0] != (ord ("^") == 106 ? 0x5f : 0xff); | |
} | |
; | |
} | |
sub _univ_mod_loaded { | |
return 0 unless exists($::{"UNIVERSAL::"}); | |
for ($::{"UNIVERSAL::"}) { | |
return 0 unless ref \$_ eq "GLOB" && *$_{HASH} && exists $$_{"$_[0]::"}; | |
for ($$_{"$_[0]::"}) { | |
return 0 unless ref \$_ eq "GLOB" && *$_{HASH} && exists $$_{"VERSION"}; | |
for ($$_{"VERSION"}) { | |
return 0 unless ref \$_ eq "GLOB"; | |
return ${*$_{SCALAR}}; | |
} | |
} | |
} | |
} | |
# _maybe_isa() is usually the UNIVERSAL::isa function. We have to avoid | |
# the latter if the UNIVERSAL::isa module has been loaded, to avoid infi- | |
# nite recursion; in that case _maybe_isa simply returns true. | |
my $isa; | |
BEGIN { | |
if (_univ_mod_loaded('isa')) { | |
*_maybe_isa = sub { 1 } | |
} | |
else { | |
# Since we have already done the check, record $isa for use below | |
# when defining _StrVal. | |
*_maybe_isa = $isa = _fetch_sub(UNIVERSAL => "isa"); | |
} | |
} | |
# We need an overload::StrVal or equivalent function, but we must avoid | |
# loading any modules on demand, as Carp is used from __DIE__ handlers and | |
# may be invoked after a syntax error. | |
# We can copy recent implementations of overload::StrVal and use | |
# overloading.pm, which is the fastest implementation, so long as | |
# overloading is available. If it is not available, we use our own pure- | |
# Perl StrVal. We never actually use overload::StrVal, for various rea- | |
# sons described below. | |
# overload versions are as follows: | |
# undef-1.00 (up to perl 5.8.0) uses bless (avoid!) | |
# 1.01-1.17 (perl 5.8.1 to 5.14) uses Scalar::Util | |
# 1.18+ (perl 5.16+) uses overloading | |
# The ancient 'bless' implementation (that inspires our pure-Perl version) | |
# blesses unblessed references and must be avoided. Those using | |
# Scalar::Util use refaddr, possibly the pure-Perl implementation, which | |
# has the same blessing bug, and must be avoided. Also, Scalar::Util is | |
# loaded on demand. Since we avoid the Scalar::Util implementations, we | |
# end up having to implement our own overloading.pm-based version for perl | |
# 5.10.1 to 5.14. Since it also works just as well in more recent ver- | |
# sions, we use it there, too. | |
BEGIN { | |
if (eval { require "overloading.pm" }) { | |
*_StrVal = eval 'sub { no overloading; "$_[0]" }' | |
} | |
else { | |
# Work around the UNIVERSAL::can/isa modules to avoid recursion. | |
# _mycan is either UNIVERSAL::can, or, in the presence of an | |
# override, overload::mycan. | |
*_mycan = _univ_mod_loaded('can') | |
? do { require "overload.pm"; _fetch_sub overload => 'mycan' } | |
: \&UNIVERSAL::can; | |
# _blessed is either UNIVERAL::isa(...), or, in the presence of an | |
# override, a hideous, but fairly reliable, workaround. | |
*_blessed = $isa | |
? sub { &$isa($_[0], "UNIVERSAL") } | |
: sub { | |
my $probe = "UNIVERSAL::Carp_probe_" . rand; | |
no strict 'refs'; | |
local *$probe = sub { "unlikely string" }; | |
local $@; | |
local $SIG{__DIE__} = sub{}; | |
(eval { $_[0]->$probe } || '') eq 'unlikely string' | |
}; | |
*_StrVal = sub { | |
my $pack = ref $_[0]; | |
# Perl's overload mechanism uses the presence of a special | |
# "method" named "((" or "()" to signal it is in effect. | |
# This test seeks to see if it has been set up. "((" post- | |
# dates overloading.pm, so we can skip it. | |
return "$_[0]" unless _mycan($pack, "()"); | |
# Even at this point, the invocant may not be blessed, so | |
# check for that. | |
return "$_[0]" if not _blessed($_[0]); | |
bless $_[0], "Carp"; | |
my $str = "$_[0]"; | |
bless $_[0], $pack; | |
$pack . substr $str, index $str, "="; | |
} | |
} | |
} | |
our $VERSION = '1.50'; | |
$VERSION =~ tr/_//d; | |
our $MaxEvalLen = 0; | |
our $Verbose = 0; | |
our $CarpLevel = 0; | |
our $MaxArgLen = 64; # How much of each argument to print. 0 = all. | |
our $MaxArgNums = 8; # How many arguments to print. 0 = all. | |
our $RefArgFormatter = undef; # allow caller to format reference arguments | |
require Exporter; | |
our @ISA = ('Exporter'); | |
our @EXPORT = qw(confess croak carp); | |
our @EXPORT_OK = qw(cluck verbose longmess shortmess); | |
our @EXPORT_FAIL = qw(verbose); # hook to enable verbose mode | |
# The members of %Internal are packages that are internal to perl. | |
# Carp will not report errors from within these packages if it | |
# can. The members of %CarpInternal are internal to Perl's warning | |
# system. Carp will not report errors from within these packages | |
# either, and will not report calls *to* these packages for carp and | |
# croak. They replace $CarpLevel, which is deprecated. The | |
# $Max(EvalLen|(Arg(Len|Nums)) variables are used to specify how the eval | |
# text and function arguments should be formatted when printed. | |
our %CarpInternal; | |
our %Internal; | |
# disable these by default, so they can live w/o require Carp | |
$CarpInternal{Carp}++; | |
$CarpInternal{warnings}++; | |
$Internal{Exporter}++; | |
$Internal{'Exporter::Heavy'}++; | |
# if the caller specifies verbose usage ("perl -MCarp=verbose script.pl") | |
# then the following method will be called by the Exporter which knows | |
# to do this thanks to @EXPORT_FAIL, above. $_[1] will contain the word | |
# 'verbose'. | |
sub export_fail { shift; $Verbose = shift if $_[0] eq 'verbose'; @_ } | |
sub _cgc { | |
no strict 'refs'; | |
return \&{"CORE::GLOBAL::caller"} if defined &{"CORE::GLOBAL::caller"}; | |
return; | |
} | |
sub longmess { | |
local($!, $^E); | |
# Icky backwards compatibility wrapper. :-( | |
# | |
# The story is that the original implementation hard-coded the | |
# number of call levels to go back, so calls to longmess were off | |
# by one. Other code began calling longmess and expecting this | |
# behaviour, so the replacement has to emulate that behaviour. | |
my $cgc = _cgc(); | |
my $call_pack = $cgc ? $cgc->() : caller(); | |
if ( $Internal{$call_pack} or $CarpInternal{$call_pack} ) { | |
return longmess_heavy(@_); | |
} | |
else { | |
local $CarpLevel = $CarpLevel + 1; | |
return longmess_heavy(@_); | |
} | |
} | |
our @CARP_NOT; | |
sub shortmess { | |
local($!, $^E); | |
my $cgc = _cgc(); | |
# Icky backwards compatibility wrapper. :-( | |
local @CARP_NOT = $cgc ? $cgc->() : caller(); | |
shortmess_heavy(@_); | |
} | |
sub croak { die shortmess @_ } | |
sub confess { die longmess @_ } | |
sub carp { warn shortmess @_ } | |
sub cluck { warn longmess @_ } | |
BEGIN { | |
if("$]" >= 5.015002 || ("$]" >= 5.014002 && "$]" < 5.015) || | |
("$]" >= 5.012005 && "$]" < 5.013)) { | |
*CALLER_OVERRIDE_CHECK_OK = sub () { 1 }; | |
} else { | |
*CALLER_OVERRIDE_CHECK_OK = sub () { 0 }; | |
} | |
} | |
sub caller_info { | |
my $i = shift(@_) + 1; | |
my %call_info; | |
my $cgc = _cgc(); | |
{ | |
# Some things override caller() but forget to implement the | |
# @DB::args part of it, which we need. We check for this by | |
# pre-populating @DB::args with a sentinel which no-one else | |
# has the address of, so that we can detect whether @DB::args | |
# has been properly populated. However, on earlier versions | |
# of perl this check tickles a bug in CORE::caller() which | |
# leaks memory. So we only check on fixed perls. | |
@DB::args = \$i if CALLER_OVERRIDE_CHECK_OK; | |
package DB; | |
@call_info{ | |
qw(pack file line sub has_args wantarray evaltext is_require) } | |
= $cgc ? $cgc->($i) : caller($i); | |
} | |
unless ( defined $call_info{file} ) { | |
return (); | |
} | |
my $sub_name = Carp::get_subname( \%call_info ); | |
if ( $call_info{has_args} ) { | |
# Guard our serialization of the stack from stack refcounting bugs | |
# NOTE this is NOT a complete solution, we cannot 100% guard against | |
# these bugs. However in many cases Perl *is* capable of detecting | |
# them and throws an error when it does. Unfortunately serializing | |
# the arguments on the stack is a perfect way of finding these bugs, | |
# even when they would not affect normal program flow that did not | |
# poke around inside the stack. Inside of Carp.pm it makes little | |
# sense reporting these bugs, as Carp's job is to report the callers | |
# errors, not the ones it might happen to tickle while doing so. | |
# See: https://rt.perl.org/Public/Bug/Display.html?id=131046 | |
# and: https://rt.perl.org/Public/Bug/Display.html?id=52610 | |
# for more details and discussion. - Yves | |
my @args = map { | |
my $arg; | |
local $@= $@; | |
eval { | |
$arg = $_; | |
1; | |
} or do { | |
$arg = '** argument not available anymore **'; | |
}; | |
$arg; | |
} @DB::args; | |
if (CALLER_OVERRIDE_CHECK_OK && @args == 1 | |
&& ref $args[0] eq ref \$i | |
&& $args[0] == \$i ) { | |
@args = (); # Don't let anyone see the address of $i | |
local $@; | |
my $where = eval { | |
my $func = $cgc or return ''; | |
my $gv = | |
(_fetch_sub B => 'svref_2object' or return '') | |
->($func)->GV; | |
my $package = $gv->STASH->NAME; | |
my $subname = $gv->NAME; | |
return unless defined $package && defined $subname; | |
# returning CORE::GLOBAL::caller isn't useful for tracing the cause: | |
return if $package eq 'CORE::GLOBAL' && $subname eq 'caller'; | |
" in &${package}::$subname"; | |
} || ''; | |
@args | |
= "** Incomplete caller override detected$where; \@DB::args were not set **"; | |
} | |
else { | |
my $overflow; | |
if ( $MaxArgNums and @args > $MaxArgNums ) | |
{ # More than we want to show? | |
$#args = $MaxArgNums - 1; | |
$overflow = 1; | |
} | |
@args = map { Carp::format_arg($_) } @args; | |
if ($overflow) { | |
push @args, '...'; | |
} | |
} | |
# Push the args onto the subroutine | |
$sub_name .= '(' . join( ', ', @args ) . ')'; | |
} | |
$call_info{sub_name} = $sub_name; | |
return wantarray() ? %call_info : \%call_info; | |
} | |
# Transform an argument to a function into a string. | |
our $in_recurse; | |
sub format_arg { | |
my $arg = shift; | |
if ( my $pack= ref($arg) ) { | |
# legitimate, let's not leak it. | |
if (!$in_recurse && _maybe_isa( $arg, 'UNIVERSAL' ) && | |
do { | |
local $@; | |
local $in_recurse = 1; | |
local $SIG{__DIE__} = sub{}; | |
eval {$arg->can('CARP_TRACE') } | |
}) | |
{ | |
return $arg->CARP_TRACE(); | |
} | |
elsif (!$in_recurse && | |
defined($RefArgFormatter) && | |
do { | |
local $@; | |
local $in_recurse = 1; | |
local $SIG{__DIE__} = sub{}; | |
eval {$arg = $RefArgFormatter->($arg); 1} | |
}) | |
{ | |
return $arg; | |
} | |
else | |
{ | |
# Argument may be blessed into a class with overloading, and so | |
# might have an overloaded stringification. We don't want to | |
# risk getting the overloaded stringification, so we need to | |
# use _StrVal, our overload::StrVal()-equivalent. | |
return _StrVal $arg; | |
} | |
} | |
return "undef" if !defined($arg); | |
downgrade($arg, 1); | |
return $arg if !(UTF8_REGEXP_PROBLEM && is_utf8($arg)) && | |
$arg =~ /\A-?[0-9]+(?:\.[0-9]*)?(?:[eE][-+]?[0-9]+)?\z/; | |
my $suffix = ""; | |
if ( 2 < $MaxArgLen and $MaxArgLen < length($arg) ) { | |
substr ( $arg, $MaxArgLen - 3 ) = ""; | |
$suffix = "..."; | |
} | |
if(UTF8_REGEXP_PROBLEM && is_utf8($arg)) { | |
for(my $i = length($arg); $i--; ) { | |
my $c = substr($arg, $i, 1); | |
my $x = substr($arg, 0, 0); # work around bug on Perl 5.8.{1,2} | |
if($c eq "\"" || $c eq "\\" || $c eq "\$" || $c eq "\@") { | |
substr $arg, $i, 0, "\\"; | |
next; | |
} | |
my $o = ord($c); | |
substr $arg, $i, 1, sprintf("\\x{%x}", $o) | |
unless is_safe_printable_codepoint($o); | |
} | |
} else { | |
$arg =~ s/([\"\\\$\@])/\\$1/g; | |
# This is all the ASCII printables spelled-out. It is portable to all | |
# Perl versions and platforms (such as EBCDIC). There are other more | |
# compact ways to do this, but may not work everywhere every version. | |
$arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg; | |
} | |
downgrade($arg, 1); | |
return "\"".$arg."\"".$suffix; | |
} | |
sub Regexp::CARP_TRACE { | |
my $arg = "$_[0]"; | |
downgrade($arg, 1); | |
if(UTF8_REGEXP_PROBLEM && is_utf8($arg)) { | |
for(my $i = length($arg); $i--; ) { | |
my $o = ord(substr($arg, $i, 1)); | |
my $x = substr($arg, 0, 0); # work around bug on Perl 5.8.{1,2} | |
substr $arg, $i, 1, sprintf("\\x{%x}", $o) | |
unless is_safe_printable_codepoint($o); | |
} | |
} else { | |
# See comment in format_arg() about this same regex. | |
$arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg; | |
} | |
downgrade($arg, 1); | |
my $suffix = ""; | |
if($arg =~ /\A\(\?\^?([a-z]*)(?:-[a-z]*)?:(.*)\)\z/s) { | |
($suffix, $arg) = ($1, $2); | |
} | |
if ( 2 < $MaxArgLen and $MaxArgLen < length($arg) ) { | |
substr ( $arg, $MaxArgLen - 3 ) = ""; | |
$suffix = "...".$suffix; | |
} | |
return "qr($arg)$suffix"; | |
} | |
# Takes an inheritance cache and a package and returns | |
# an anon hash of known inheritances and anon array of | |
# inheritances which consequences have not been figured | |
# for. | |
sub get_status { | |
my $cache = shift; | |
my $pkg = shift; | |
$cache->{$pkg} ||= [ { $pkg => $pkg }, [ trusts_directly($pkg) ] ]; | |
return @{ $cache->{$pkg} }; | |
} | |
# Takes the info from caller() and figures out the name of | |
# the sub/require/eval | |
sub get_subname { | |
my $info = shift; | |
if ( defined( $info->{evaltext} ) ) { | |
my $eval = $info->{evaltext}; | |
if ( $info->{is_require} ) { | |
return "require $eval"; | |
} | |
else { | |
$eval =~ s/([\\\'])/\\$1/g; | |
return "eval '" . str_len_trim( $eval, $MaxEvalLen ) . "'"; | |
} | |
} | |
# this can happen on older perls when the sub (or the stash containing it) | |
# has been deleted | |
if ( !defined( $info->{sub} ) ) { | |
return '__ANON__::__ANON__'; | |
} | |
return ( $info->{sub} eq '(eval)' ) ? 'eval {...}' : $info->{sub}; | |
} | |
# Figures out what call (from the point of view of the caller) | |
# the long error backtrace should start at. | |
sub long_error_loc { | |
my $i; | |
my $lvl = $CarpLevel; | |
{ | |
++$i; | |
my $cgc = _cgc(); | |
my @caller = $cgc ? $cgc->($i) : caller($i); | |
my $pkg = $caller[0]; | |
unless ( defined($pkg) ) { | |
# This *shouldn't* happen. | |
if (%Internal) { | |
local %Internal; | |
$i = long_error_loc(); | |
last; | |
} | |
elsif (defined $caller[2]) { | |
# this can happen when the stash has been deleted | |
# in that case, just assume that it's a reasonable place to | |
# stop (the file and line data will still be intact in any | |
# case) - the only issue is that we can't detect if the | |
# deleted package was internal (so don't do that then) | |
# -doy | |
redo unless 0 > --$lvl; | |
last; | |
} | |
else { | |
return 2; | |
} | |
} | |
redo if $CarpInternal{$pkg}; | |
redo unless 0 > --$lvl; | |
redo if $Internal{$pkg}; | |
} | |
return $i - 1; | |
} | |
sub longmess_heavy { | |
if ( ref( $_[0] ) ) { # don't break references as exceptions | |
return wantarray ? @_ : $_[0]; | |
} | |
my $i = long_error_loc(); | |
return ret_backtrace( $i, @_ ); | |
} | |
BEGIN { | |
if("$]" >= 5.017004) { | |
# The LAST_FH constant is a reference to the variable. | |
$Carp::{LAST_FH} = \eval '\${^LAST_FH}'; | |
} else { | |
eval '*LAST_FH = sub () { 0 }'; | |
} | |
} | |
# Returns a full stack backtrace starting from where it is | |
# told. | |
sub ret_backtrace { | |
my ( $i, @error ) = @_; | |
my $mess; | |
my $err = join '', @error; | |
$i++; | |
my $tid_msg = ''; | |
if ( defined &threads::tid ) { | |
my $tid = threads->tid; | |
$tid_msg = " thread $tid" if $tid; | |
} | |
my %i = caller_info($i); | |
$mess = "$err at $i{file} line $i{line}$tid_msg"; | |
if( $. ) { | |
# Use ${^LAST_FH} if available. | |
if (LAST_FH) { | |
if (${+LAST_FH}) { | |
$mess .= sprintf ", <%s> %s %d", | |
*${+LAST_FH}{NAME}, | |
($/ eq "\n" ? "line" : "chunk"), $. | |
} | |
} | |
else { | |
local $@ = ''; | |
local $SIG{__DIE__}; | |
eval { | |
CORE::die; | |
}; | |
if($@ =~ /^Died at .*(, <.*?> (?:line|chunk) \d+).$/ ) { | |
$mess .= $1; | |
} | |
} | |
} | |
$mess .= "\.\n"; | |
while ( my %i = caller_info( ++$i ) ) { | |
$mess .= "\t$i{sub_name} called at $i{file} line $i{line}$tid_msg\n"; | |
} | |
return $mess; | |
} | |
sub ret_summary { | |
my ( $i, @error ) = @_; | |
my $err = join '', @error; | |
$i++; | |
my $tid_msg = ''; | |
if ( defined &threads::tid ) { | |
my $tid = threads->tid; | |
$tid_msg = " thread $tid" if $tid; | |
} | |
my %i = caller_info($i); | |
return "$err at $i{file} line $i{line}$tid_msg\.\n"; | |
} | |
sub short_error_loc { | |
# You have to create your (hash)ref out here, rather than defaulting it | |
# inside trusts *on a lexical*, as you want it to persist across calls. | |
# (You can default it on $_[2], but that gets messy) | |
my $cache = {}; | |
my $i = 1; | |
my $lvl = $CarpLevel; | |
{ | |
my $cgc = _cgc(); | |
my $called = $cgc ? $cgc->($i) : caller($i); | |
$i++; | |
my $caller = $cgc ? $cgc->($i) : caller($i); | |
if (!defined($caller)) { | |
my @caller = $cgc ? $cgc->($i) : caller($i); | |
if (@caller) { | |
# if there's no package but there is other caller info, then | |
# the package has been deleted - treat this as a valid package | |
# in this case | |
redo if defined($called) && $CarpInternal{$called}; | |
redo unless 0 > --$lvl; | |
last; | |
} | |
else { | |
return 0; | |
} | |
} | |
redo if $Internal{$caller}; | |
redo if $CarpInternal{$caller}; | |
redo if $CarpInternal{$called}; | |
redo if trusts( $called, $caller, $cache ); | |
redo if trusts( $caller, $called, $cache ); | |
redo unless 0 > --$lvl; | |
} | |
return $i - 1; | |
} | |
sub shortmess_heavy { | |
return longmess_heavy(@_) if $Verbose; | |
return @_ if ref( $_[0] ); # don't break references as exceptions | |
my $i = short_error_loc(); | |
if ($i) { | |
ret_summary( $i, @_ ); | |
} | |
else { | |
longmess_heavy(@_); | |
} | |
} | |
# If a string is too long, trims it with ... | |
sub str_len_trim { | |
my $str = shift; | |
my $max = shift || 0; | |
if ( 2 < $max and $max < length($str) ) { | |
substr( $str, $max - 3 ) = '...'; | |
} | |
return $str; | |
} | |
# Takes two packages and an optional cache. Says whether the | |
# first inherits from the second. | |
# | |
# Recursive versions of this have to work to avoid certain | |
# possible endless loops, and when following long chains of | |
# inheritance are less efficient. | |
sub trusts { | |
my $child = shift; | |
my $parent = shift; | |
my $cache = shift; | |
my ( $known, $partial ) = get_status( $cache, $child ); | |
# Figure out consequences until we have an answer | |
while ( @$partial and not exists $known->{$parent} ) { | |
my $anc = shift @$partial; | |
next if exists $known->{$anc}; | |
$known->{$anc}++; | |
my ( $anc_knows, $anc_partial ) = get_status( $cache, $anc ); | |
my @found = keys %$anc_knows; | |
@$known{@found} = (); | |
push @$partial, @$anc_partial; | |
} | |
return exists $known->{$parent}; | |
} | |
# Takes a package and gives a list of those trusted directly | |
sub trusts_directly { | |
my $class = shift; | |
no strict 'refs'; | |
my $stash = \%{"$class\::"}; | |
for my $var (qw/ CARP_NOT ISA /) { | |
# Don't try using the variable until we know it exists, | |
# to avoid polluting the caller's namespace. | |
if ( $stash->{$var} && ref \$stash->{$var} eq 'GLOB' | |
&& *{$stash->{$var}}{ARRAY} && @{$stash->{$var}} ) { | |
return @{$stash->{$var}} | |
} | |
} | |
return; | |
} | |
if(!defined($warnings::VERSION) || | |
do { no warnings "numeric"; $warnings::VERSION < 1.03 }) { | |
# Very old versions of warnings.pm import from Carp. This can go | |
# wrong due to the circular dependency. If Carp is invoked before | |
# warnings, then Carp starts by loading warnings, then warnings | |
# tries to import from Carp, and gets nothing because Carp is in | |
# the process of loading and hasn't defined its import method yet. | |
# So we work around that by manually exporting to warnings here. | |
no strict "refs"; | |
*{"warnings::$_"} = \&$_ foreach @EXPORT; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Carp - alternative warn and die for modules | |
=head1 SYNOPSIS | |
use Carp; | |
# warn user (from perspective of caller) | |
carp "string trimmed to 80 chars"; | |
# die of errors (from perspective of caller) | |
croak "We're outta here!"; | |
# die of errors with stack backtrace | |
confess "not implemented"; | |
# cluck, longmess and shortmess not exported by default | |
use Carp qw(cluck longmess shortmess); | |
cluck "This is how we got here!"; # warn with stack backtrace | |
$long_message = longmess( "message from cluck() or confess()" ); | |
$short_message = shortmess( "message from carp() or croak()" ); | |
=head1 DESCRIPTION | |
The Carp routines are useful in your own modules because | |
they act like C<die()> or C<warn()>, but with a message which is more | |
likely to be useful to a user of your module. In the case of | |
C<cluck()> and C<confess()>, that context is a summary of every | |
call in the call-stack; C<longmess()> returns the contents of the error | |
message. | |
For a shorter message you can use C<carp()> or C<croak()> which report the | |
error as being from where your module was called. C<shortmess()> returns the | |
contents of this error message. There is no guarantee that that is where the | |
error was, but it is a good educated guess. | |
C<Carp> takes care not to clobber the status variables C<$!> and C<$^E> | |
in the course of assembling its error messages. This means that a | |
C<$SIG{__DIE__}> or C<$SIG{__WARN__}> handler can capture the error | |
information held in those variables, if it is required to augment the | |
error message, and if the code calling C<Carp> left useful values there. | |
Of course, C<Carp> can't guarantee the latter. | |
You can also alter the way the output and logic of C<Carp> works, by | |
changing some global variables in the C<Carp> namespace. See the | |
section on C<GLOBAL VARIABLES> below. | |
Here is a more complete description of how C<carp> and C<croak> work. | |
What they do is search the call-stack for a function call stack where | |
they have not been told that there shouldn't be an error. If every | |
call is marked safe, they give up and give a full stack backtrace | |
instead. In other words they presume that the first likely looking | |
potential suspect is guilty. Their rules for telling whether | |
a call shouldn't generate errors work as follows: | |
=over 4 | |
=item 1. | |
Any call from a package to itself is safe. | |
=item 2. | |
Packages claim that there won't be errors on calls to or from | |
packages explicitly marked as safe by inclusion in C<@CARP_NOT>, or | |
(if that array is empty) C<@ISA>. The ability to override what | |
@ISA says is new in 5.8. | |
=item 3. | |
The trust in item 2 is transitive. If A trusts B, and B | |
trusts C, then A trusts C. So if you do not override C<@ISA> | |
with C<@CARP_NOT>, then this trust relationship is identical to, | |
"inherits from". | |
=item 4. | |
Any call from an internal Perl module is safe. (Nothing keeps | |
user modules from marking themselves as internal to Perl, but | |
this practice is discouraged.) | |
=item 5. | |
Any call to Perl's warning system (eg Carp itself) is safe. | |
(This rule is what keeps it from reporting the error at the | |
point where you call C<carp> or C<croak>.) | |
=item 6. | |
C<$Carp::CarpLevel> can be set to skip a fixed number of additional | |
call levels. Using this is not recommended because it is very | |
difficult to get it to behave correctly. | |
=back | |
=head2 Forcing a Stack Trace | |
As a debugging aid, you can force Carp to treat a croak as a confess | |
and a carp as a cluck across I<all> modules. In other words, force a | |
detailed stack trace to be given. This can be very helpful when trying | |
to understand why, or from where, a warning or error is being generated. | |
This feature is enabled by 'importing' the non-existent symbol | |
'verbose'. You would typically enable it by saying | |
perl -MCarp=verbose script.pl | |
or by including the string C<-MCarp=verbose> in the PERL5OPT | |
environment variable. | |
Alternately, you can set the global variable C<$Carp::Verbose> to true. | |
See the C<GLOBAL VARIABLES> section below. | |
=head2 Stack Trace formatting | |
At each stack level, the subroutine's name is displayed along with | |
its parameters. For simple scalars, this is sufficient. For complex | |
data types, such as objects and other references, this can simply | |
display C<'HASH(0x1ab36d8)'>. | |
Carp gives two ways to control this. | |
=over 4 | |
=item 1. | |
For objects, a method, C<CARP_TRACE>, will be called, if it exists. If | |
this method doesn't exist, or it recurses into C<Carp>, or it otherwise | |
throws an exception, this is skipped, and Carp moves on to the next option, | |
otherwise checking stops and the string returned is used. It is recommended | |
that the object's type is part of the string to make debugging easier. | |
=item 2. | |
For any type of reference, C<$Carp::RefArgFormatter> is checked (see below). | |
This variable is expected to be a code reference, and the current parameter | |
is passed in. If this function doesn't exist (the variable is undef), or | |
it recurses into C<Carp>, or it otherwise throws an exception, this is | |
skipped, and Carp moves on to the next option, otherwise checking stops | |
and the string returned is used. | |
=item 3. | |
Otherwise, if neither C<CARP_TRACE> nor C<$Carp::RefArgFormatter> is | |
available, stringify the value ignoring any overloading. | |
=back | |
=head1 GLOBAL VARIABLES | |
=head2 $Carp::MaxEvalLen | |
This variable determines how many characters of a string-eval are to | |
be shown in the output. Use a value of C<0> to show all text. | |
Defaults to C<0>. | |
=head2 $Carp::MaxArgLen | |
This variable determines how many characters of each argument to a | |
function to print. Use a value of C<0> to show the full length of the | |
argument. | |
Defaults to C<64>. | |
=head2 $Carp::MaxArgNums | |
This variable determines how many arguments to each function to show. | |
Use a false value to show all arguments to a function call. To suppress all | |
arguments, use C<-1> or C<'0 but true'>. | |
Defaults to C<8>. | |
=head2 $Carp::Verbose | |
This variable makes C<carp()> and C<croak()> generate stack backtraces | |
just like C<cluck()> and C<confess()>. This is how C<use Carp 'verbose'> | |
is implemented internally. | |
Defaults to C<0>. | |
=head2 $Carp::RefArgFormatter | |
This variable sets a general argument formatter to display references. | |
Plain scalars and objects that implement C<CARP_TRACE> will not go through | |
this formatter. Calling C<Carp> from within this function is not supported. | |
local $Carp::RefArgFormatter = sub { | |
require Data::Dumper; | |
Data::Dumper::Dump($_[0]); # not necessarily safe | |
}; | |
=head2 @CARP_NOT | |
This variable, I<in your package>, says which packages are I<not> to be | |
considered as the location of an error. The C<carp()> and C<cluck()> | |
functions will skip over callers when reporting where an error occurred. | |
NB: This variable must be in the package's symbol table, thus: | |
# These work | |
our @CARP_NOT; # file scope | |
use vars qw(@CARP_NOT); # package scope | |
@My::Package::CARP_NOT = ... ; # explicit package variable | |
# These don't work | |
sub xyz { ... @CARP_NOT = ... } # w/o declarations above | |
my @CARP_NOT; # even at top-level | |
Example of use: | |
package My::Carping::Package; | |
use Carp; | |
our @CARP_NOT; | |
sub bar { .... or _error('Wrong input') } | |
sub _error { | |
# temporary control of where'ness, __PACKAGE__ is implicit | |
local @CARP_NOT = qw(My::Friendly::Caller); | |
carp(@_) | |
} | |
This would make C<Carp> report the error as coming from a caller not | |
in C<My::Carping::Package>, nor from C<My::Friendly::Caller>. | |
Also read the L</DESCRIPTION> section above, about how C<Carp> decides | |
where the error is reported from. | |
Use C<@CARP_NOT>, instead of C<$Carp::CarpLevel>. | |
Overrides C<Carp>'s use of C<@ISA>. | |
=head2 %Carp::Internal | |
This says what packages are internal to Perl. C<Carp> will never | |
report an error as being from a line in a package that is internal to | |
Perl. For example: | |
$Carp::Internal{ (__PACKAGE__) }++; | |
# time passes... | |
sub foo { ... or confess("whatever") }; | |
would give a full stack backtrace starting from the first caller | |
outside of __PACKAGE__. (Unless that package was also internal to | |
Perl.) | |
=head2 %Carp::CarpInternal | |
This says which packages are internal to Perl's warning system. For | |
generating a full stack backtrace this is the same as being internal | |
to Perl, the stack backtrace will not start inside packages that are | |
listed in C<%Carp::CarpInternal>. But it is slightly different for | |
the summary message generated by C<carp> or C<croak>. There errors | |
will not be reported on any lines that are calling packages in | |
C<%Carp::CarpInternal>. | |
For example C<Carp> itself is listed in C<%Carp::CarpInternal>. | |
Therefore the full stack backtrace from C<confess> will not start | |
inside of C<Carp>, and the short message from calling C<croak> is | |
not placed on the line where C<croak> was called. | |
=head2 $Carp::CarpLevel | |
This variable determines how many additional call frames are to be | |
skipped that would not otherwise be when reporting where an error | |
occurred on a call to one of C<Carp>'s functions. It is fairly easy | |
to count these call frames on calls that generate a full stack | |
backtrace. However it is much harder to do this accounting for calls | |
that generate a short message. Usually people skip too many call | |
frames. If they are lucky they skip enough that C<Carp> goes all of | |
the way through the call stack, realizes that something is wrong, and | |
then generates a full stack backtrace. If they are unlucky then the | |
error is reported from somewhere misleading very high in the call | |
stack. | |
Therefore it is best to avoid C<$Carp::CarpLevel>. Instead use | |
C<@CARP_NOT>, C<%Carp::Internal> and C<%Carp::CarpInternal>. | |
Defaults to C<0>. | |
=head1 BUGS | |
The Carp routines don't handle exception objects currently. | |
If called with a first argument that is a reference, they simply | |
call die() or warn(), as appropriate. | |
=head1 SEE ALSO | |
L<Carp::Always>, | |
L<Carp::Clan> | |
=head1 CONTRIBUTING | |
L<Carp> is maintained by the perl 5 porters as part of the core perl 5 | |
version control repository. Please see the L<perlhack> perldoc for how to | |
submit patches and contribute to it. | |
=head1 AUTHOR | |
The Carp module first appeared in Larry Wall's perl 5.000 distribution. | |
Since then it has been modified by several of the perl 5 porters. | |
Andrew Main (Zefram) <zefram@fysh.org> divested Carp into an independent | |
distribution. | |
=head1 COPYRIGHT | |
Copyright (C) 1994-2013 Larry Wall | |
Copyright (C) 2011, 2012, 2013 Andrew Main (Zefram) <zefram@fysh.org> | |
=head1 LICENSE | |
This module is free software; you can redistribute it and/or modify it | |
under the same terms as Perl itself. | |
CARP | |
$fatpacked{"Carp/Heavy.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CARP_HEAVY'; | |
package Carp::Heavy; | |
use Carp (); | |
our $VERSION = '1.50'; | |
$VERSION =~ tr/_//d; | |
# Carp::Heavy was merged into Carp in version 1.12. Any mismatched versions | |
# after this point are not significant and can be ignored. | |
if(($Carp::VERSION || 0) < 1.12) { | |
my $cv = defined($Carp::VERSION) ? $Carp::VERSION : "undef"; | |
die "Version mismatch between Carp $cv ($INC{q(Carp.pm)}) and Carp::Heavy $VERSION ($INC{q(Carp/Heavy.pm)}). Did you alter \@INC after Carp was loaded?\n"; | |
} | |
1; | |
# Most of the machinery of Carp used to be here. | |
# It has been moved in Carp.pm now, but this placeholder remains for | |
# the benefit of modules that like to preload Carp::Heavy directly. | |
# This must load Carp, because some modules rely on the historical | |
# behaviour of Carp::Heavy loading Carp. | |
CARP_HEAVY | |
$fatpacked{"Crypt/RC4.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CRYPT_RC4'; | |
#--------------------------------------------------------------------# | |
# Crypt::RC4 | |
# Date Written: 07-Jun-2000 04:15:55 PM | |
# Last Modified: 13-Dec-2001 03:33:49 PM | |
# Author: Kurt Kincaid (sifukurt@yahoo.com) | |
# Copyright (c) 2001, Kurt Kincaid | |
# All Rights Reserved. | |
# | |
# This is free software and may be modified and/or | |
# redistributed under the same terms as Perl itself. | |
#--------------------------------------------------------------------# | |
package Crypt::RC4; | |
use strict; | |
use vars qw( $VERSION @ISA @EXPORT $MAX_CHUNK_SIZE ); | |
$MAX_CHUNK_SIZE = 1024 unless $MAX_CHUNK_SIZE; | |
require Exporter; | |
@ISA = qw(Exporter); | |
@EXPORT = qw(RC4); | |
$VERSION = '2.02'; | |
sub new { | |
my ( $class, $key ) = @_; | |
my $self = bless {}, $class; | |
$self->{state} = Setup( $key ); | |
$self->{x} = 0; | |
$self->{y} = 0; | |
$self; | |
} | |
sub RC4 { | |
my $self; | |
my( @state, $x, $y ); | |
if ( ref $_[0] ) { | |
$self = shift; | |
@state = @{ $self->{state} }; | |
$x = $self->{x}; | |
$y = $self->{y}; | |
} else { | |
@state = Setup( shift ); | |
$x = $y = 0; | |
} | |
my $message = shift; | |
my $num_pieces = do { | |
my $num = length($message) / $MAX_CHUNK_SIZE; | |
my $int = int $num; | |
$int == $num ? $int : $int+1; | |
}; | |
for my $piece ( 0..$num_pieces - 1 ) { | |
my @message = unpack "C*", substr($message, $piece * $MAX_CHUNK_SIZE, $MAX_CHUNK_SIZE); | |
for ( @message ) { | |
$x = 0 if ++$x > 255; | |
$y -= 256 if ($y += $state[$x]) > 255; | |
@state[$x, $y] = @state[$y, $x]; | |
$_ ^= $state[( $state[$x] + $state[$y] ) % 256]; | |
} | |
substr($message, $piece * $MAX_CHUNK_SIZE, $MAX_CHUNK_SIZE) = pack "C*", @message; | |
} | |
if ($self) { | |
$self->{state} = \@state; | |
$self->{x} = $x; | |
$self->{y} = $y; | |
} | |
$message; | |
} | |
sub Setup { | |
my @k = unpack( 'C*', shift ); | |
my @state = 0..255; | |
my $y = 0; | |
for my $x (0..255) { | |
$y = ( $k[$x % @k] + $state[$x] + $y ) % 256; | |
@state[$x, $y] = @state[$y, $x]; | |
} | |
wantarray ? @state : \@state; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Crypt::RC4 - Perl implementation of the RC4 encryption algorithm | |
=head1 SYNOPSIS | |
# Functional Style | |
use Crypt::RC4; | |
$encrypted = RC4( $passphrase, $plaintext ); | |
$decrypt = RC4( $passphrase, $encrypted ); | |
# OO Style | |
use Crypt::RC4; | |
$ref = Crypt::RC4->new( $passphrase ); | |
$encrypted = $ref->RC4( $plaintext ); | |
$ref2 = Crypt::RC4->new( $passphrase ); | |
$decrypted = $ref2->RC4( $encrypted ); | |
# process an entire file, one line at a time | |
# (Warning: Encrypted file leaks line lengths.) | |
$ref3 = Crypt::RC4->new( $passphrase ); | |
while (<FILE>) { | |
chomp; | |
print $ref3->RC4($_), "\n"; | |
} | |
=head1 DESCRIPTION | |
A simple implementation of the RC4 algorithm, developed by RSA Security, Inc. Here is the description | |
from RSA's website: | |
RC4 is a stream cipher designed by Rivest for RSA Data Security (now RSA Security). It is a variable | |
key-size stream cipher with byte-oriented operations. The algorithm is based on the use of a random | |
permutation. Analysis shows that the period of the cipher is overwhelmingly likely to be greater than | |
10100. Eight to sixteen machine operations are required per output byte, and the cipher can be | |
expected to run very quickly in software. Independent analysts have scrutinized the algorithm and it | |
is considered secure. | |
Based substantially on the "RC4 in 3 lines of perl" found at http://www.cypherspace.org | |
A major bug in v1.0 was fixed by David Hook (dgh@wumpus.com.au). Thanks, David. | |
=head1 AUTHOR | |
Kurt Kincaid (sifukurt@yahoo.com) | |
Ronald Rivest for RSA Security, Inc. | |
=head1 BUGS | |
Disclaimer: Strictly speaking, this module uses the "alleged" RC4 | |
algorithm. The Algorithm known as "RC4" is a trademark of RSA Security | |
Inc., and this document makes no claims one way or another that this | |
is the correct algorithm, and further, make no claims about the | |
quality of the source code nor any licensing requirements for | |
commercial use. | |
There's nothing preventing you from using this module in an insecure | |
way which leaks information. For example, encrypting multilple | |
messages with the same passphrase may allow an attacker to decode all of | |
them with little effort, even though they'll appear to be secured. If | |
serious crypto is your goal, be careful. Be very careful. | |
It's a pure-Perl implementation, so that rating of "Eight | |
to sixteen machine operations" is good for nothing but a good laugh. | |
If encryption and decryption are a bottleneck for you, please re-write | |
this module to use native code wherever practical. | |
=head1 LICENSE | |
This is free software and may be modified and/or | |
redistributed under the same terms as Perl itself. | |
=head1 SEE ALSO | |
L<perl>, L<http://www.cypherspace.org>, L<http://www.rsasecurity.com>, | |
L<http://www.achtung.com/crypto/rc4.html>, | |
L<http://www.columbia.edu/~ariel/ssleay/rrc4.html> | |
=cut | |
CRYPT_RC4 | |
$fatpacked{"Digest/Perl/MD5.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'DIGEST_PERL_MD5'; | |
package Digest::Perl::MD5; | |
use strict; | |
use integer; | |
use Exporter; | |
use vars qw($VERSION @ISA @EXPORTER @EXPORT_OK); | |
@EXPORT_OK = qw(md5 md5_hex md5_base64); | |
@ISA = 'Exporter'; | |
$VERSION = '1.9'; | |
# I-Vektor | |
sub A() { 0x67_45_23_01 } | |
sub B() { 0xef_cd_ab_89 } | |
sub C() { 0x98_ba_dc_fe } | |
sub D() { 0x10_32_54_76 } | |
# for internal use | |
sub MAX() { 0xFFFFFFFF } | |
# pad a message to a multiple of 64 | |
sub padding { | |
my $l = length (my $msg = shift() . chr(128)); | |
$msg .= "\0" x (($l%64<=56?56:120)-$l%64); | |
$l = ($l-1)*8; | |
$msg .= pack 'VV', $l & MAX , ($l >> 16 >> 16); | |
} | |
sub rotate_left($$) { | |
#$_[0] << $_[1] | $_[0] >> (32 - $_[1]); | |
#my $right = $_[0] >> (32 - $_[1]); | |
#my $rmask = (1 << $_[1]) - 1; | |
($_[0] << $_[1]) | (( $_[0] >> (32 - $_[1]) ) & ((1 << $_[1]) - 1)); | |
#$_[0] << $_[1] | (($_[0]>> (32 - $_[1])) & (1 << (32 - $_[1])) - 1); | |
} | |
sub gen_code { | |
# Discard upper 32 bits on 64 bit archs. | |
my $MSK = ((1 << 16) << 16) ? ' & ' . MAX : ''; | |
# FF => "X0=rotate_left(((X1&X2)|(~X1&X3))+X0+X4+X6$MSK,X5)+X1$MSK;", | |
# GG => "X0=rotate_left(((X1&X3)|(X2&(~X3)))+X0+X4+X6$MSK,X5)+X1$MSK;", | |
my %f = ( | |
FF => "X0=rotate_left((X3^(X1&(X2^X3)))+X0+X4+X6$MSK,X5)+X1$MSK;", | |
GG => "X0=rotate_left((X2^(X3&(X1^X2)))+X0+X4+X6$MSK,X5)+X1$MSK;", | |
HH => "X0=rotate_left((X1^X2^X3)+X0+X4+X6$MSK,X5)+X1$MSK;", | |
II => "X0=rotate_left((X2^(X1|(~X3)))+X0+X4+X6$MSK,X5)+X1$MSK;", | |
); | |
#unless ( (1 << 16) << 16) { %f = %{$CODES{'32bit'}} } | |
#else { %f = %{$CODES{'64bit'}} } | |
my %s = ( # shift lengths | |
S11 => 7, S12 => 12, S13 => 17, S14 => 22, S21 => 5, S22 => 9, S23 => 14, | |
S24 => 20, S31 => 4, S32 => 11, S33 => 16, S34 => 23, S41 => 6, S42 => 10, | |
S43 => 15, S44 => 21 | |
); | |
my $insert = "\n"; | |
while(defined( my $data = <DATA> )) { | |
chomp $data; | |
next unless $data =~ /^[FGHI]/; | |
my ($func,@x) = split /,/, $data; | |
my $c = $f{$func}; | |
$c =~ s/X(\d)/$x[$1]/g; | |
$c =~ s/(S\d{2})/$s{$1}/; | |
$c =~ s/^(.*)=rotate_left\((.*),(.*)\)\+(.*)$//; | |
my $su = 32 - $3; | |
my $sh = (1 << $3) - 1; | |
$c = "$1=(((\$r=$2)<<$3)|((\$r>>$su)&$sh))+$4"; | |
#my $rotate = "(($2 << $3) || (($2 >> (32 - $3)) & (1 << $2) - 1)))"; | |
# $c = "\$r = $2; | |
# $1 = ((\$r << $3) | ((\$r >> (32 - $3)) & ((1 << $3) - 1))) + $4"; | |
$insert .= "\t$c\n"; | |
} | |
close DATA; | |
my $dump = ' | |
sub round { | |
my ($a,$b,$c,$d) = @_[0 .. 3]; | |
my $r;' . $insert . ' | |
$_[0]+$a' . $MSK . ', $_[1]+$b ' . $MSK . | |
', $_[2]+$c' . $MSK . ', $_[3]+$d' . $MSK . '; | |
}'; | |
eval $dump; | |
# print "$dump\n"; | |
# exit 0; | |
} | |
gen_code(); | |
######################################### | |
# Private output converter functions: | |
sub _encode_hex { unpack 'H*', $_[0] } | |
sub _encode_base64 { | |
my $res; | |
while ($_[0] =~ /(.{1,45})/gs) { | |
$res .= substr pack('u', $1), 1; | |
chop $res; | |
} | |
$res =~ tr|` -_|AA-Za-z0-9+/|;#` | |
chop $res; chop $res; | |
$res | |
} | |
######################################### | |
# OOP interface: | |
sub new { | |
my $proto = shift; | |
my $class = ref $proto || $proto; | |
my $self = {}; | |
bless $self, $class; | |
$self->reset(); | |
$self | |
} | |
sub reset { | |
my $self = shift; | |
delete $self->{_data}; | |
$self->{_state} = [A,B,C,D]; | |
$self->{_length} = 0; | |
$self | |
} | |
sub add { | |
my $self = shift; | |
$self->{_data} .= join '', @_ if @_; | |
my ($i,$c); | |
for $i (0 .. (length $self->{_data})/64-1) { | |
my @X = unpack 'V16', substr $self->{_data}, $i*64, 64; | |
@{$self->{_state}} = round(@{$self->{_state}},@X); | |
++$c; | |
} | |
if ($c) { | |
substr ($self->{_data}, 0, $c*64) = ''; | |
$self->{_length} += $c*64; | |
} | |
$self | |
} | |
sub finalize { | |
my $self = shift; | |
$self->{_data} .= chr(128); | |
my $l = $self->{_length} + length $self->{_data}; | |
$self->{_data} .= "\0" x (($l%64<=56?56:120)-$l%64); | |
$l = ($l-1)*8; | |
$self->{_data} .= pack 'VV', $l & MAX , ($l >> 16 >> 16); | |
$self->add(); | |
$self | |
} | |
sub addfile { | |
my ($self,$fh) = @_; | |
if (!ref($fh) && ref(\$fh) ne "GLOB") { | |
require Symbol; | |
$fh = Symbol::qualify($fh, scalar caller); | |
} | |
# $self->{_data} .= do{local$/;<$fh>}; | |
my $read = 0; | |
my $buffer = ''; | |
$self->add($buffer) while $read = read $fh, $buffer, 8192; | |
die __PACKAGE__, " read failed: $!" unless defined $read; | |
$self | |
} | |
sub add_bits { | |
my $self = shift; | |
return $self->add( pack 'B*', shift ) if @_ == 1; | |
my ($b,$n) = @_; | |
die __PACKAGE__, " Invalid number of bits\n" if $n%8; | |
$self->add( substr $b, 0, $n/8 ) | |
} | |
sub digest { | |
my $self = shift; | |
$self->finalize(); | |
my $res = pack 'V4', @{$self->{_state}}; | |
$self->reset(); | |
$res | |
} | |
sub hexdigest { | |
_encode_hex($_[0]->digest) | |
} | |
sub b64digest { | |
_encode_base64($_[0]->digest) | |
} | |
sub clone { | |
my $self = shift; | |
my $clone = { | |
_state => [@{$self->{_state}}], | |
_length => $self->{_length}, | |
_data => $self->{_data} | |
}; | |
bless $clone, ref $self || $self; | |
} | |
######################################### | |
# Procedural interface: | |
sub md5 { | |
my $message = padding(join'',@_); | |
my ($a,$b,$c,$d) = (A,B,C,D); | |
my $i; | |
for $i (0 .. (length $message)/64-1) { | |
my @X = unpack 'V16', substr $message,$i*64,64; | |
($a,$b,$c,$d) = round($a,$b,$c,$d,@X); | |
} | |
pack 'V4',$a,$b,$c,$d; | |
} | |
sub md5_hex { _encode_hex &md5 } | |
sub md5_base64 { _encode_base64 &md5 } | |
1; | |
=head1 NAME | |
Digest::MD5::Perl - Perl implementation of Ron Rivests MD5 Algorithm | |
=head1 DISCLAIMER | |
This is B<not> an interface (like C<Digest::MD5>) but a Perl implementation of MD5. | |
It is written in perl only and because of this it is slow but it works without C-Code. | |
You should use C<Digest::MD5> instead of this module if it is available. | |
This module is only useful for | |
=over 4 | |
=item | |
computers where you cannot install C<Digest::MD5> (e.g. lack of a C-Compiler) | |
=item | |
encrypting only small amounts of data (less than one million bytes). I use it to | |
hash passwords. | |
=item | |
educational purposes | |
=back | |
=head1 SYNOPSIS | |
# Functional style | |
use Digest::MD5 qw(md5 md5_hex md5_base64); | |
$hash = md5 $data; | |
$hash = md5_hex $data; | |
$hash = md5_base64 $data; | |
# OO style | |
use Digest::MD5; | |
$ctx = Digest::MD5->new; | |
$ctx->add($data); | |
$ctx->addfile(*FILE); | |
$digest = $ctx->digest; | |
$digest = $ctx->hexdigest; | |
$digest = $ctx->b64digest; | |
=head1 DESCRIPTION | |
This modules has the same interface as the much faster C<Digest::MD5>. So you can | |
easily exchange them, e.g. | |
BEGIN { | |
eval { | |
require Digest::MD5; | |
import Digest::MD5 'md5_hex' | |
}; | |
if ($@) { # ups, no Digest::MD5 | |
require Digest::Perl::MD5; | |
import Digest::Perl::MD5 'md5_hex' | |
} | |
} | |
If the C<Digest::MD5> module is available it is used and if not you take | |
C<Digest::Perl::MD5>. | |
You can also install the Perl part of Digest::MD5 together with Digest::Perl::MD5 | |
and use Digest::MD5 as normal, it falls back to Digest::Perl::MD5 if it | |
cannot load its object files. | |
For a detailed Documentation see the C<Digest::MD5> module. | |
=head1 EXAMPLES | |
The simplest way to use this library is to import the md5_hex() | |
function (or one of its cousins): | |
use Digest::Perl::MD5 'md5_hex'; | |
print 'Digest is ', md5_hex('foobarbaz'), "\n"; | |
The above example would print out the message | |
Digest is 6df23dc03f9b54cc38a0fc1483df6e21 | |
provided that the implementation is working correctly. The same | |
checksum can also be calculated in OO style: | |
use Digest::MD5; | |
$md5 = Digest::MD5->new; | |
$md5->add('foo', 'bar'); | |
$md5->add('baz'); | |
$digest = $md5->hexdigest; | |
print "Digest is $digest\n"; | |
The digest methods are destructive. That means you can only call them | |
once and the $md5 objects is reset after use. You can make a copy with clone: | |
$md5->clone->hexdigest | |
=head1 LIMITATIONS | |
This implementation of the MD5 algorithm has some limitations: | |
=over 4 | |
=item | |
It's slow, very slow. I've done my very best but Digest::MD5 is still about 100 times faster. | |
You can only encrypt Data up to one million bytes in an acceptable time. But it's very useful | |
for encrypting small amounts of data like passwords. | |
=item | |
You can only encrypt up to 2^32 bits = 512 MB on 32bit archs. But You should | |
use C<Digest::MD5> for those amounts of data anyway. | |
=back | |
=head1 SEE ALSO | |
L<Digest::MD5> | |
L<md5(1)> | |
RFC 1321 | |
tools/md5: a small BSD compatible md5 tool written in pure perl. | |
=head1 COPYRIGHT | |
This library is free software; you can redistribute it and/or | |
modify it under the same terms as Perl itself. | |
Copyright 2000 Christian Lackas, Imperia Software Solutions | |
Copyright 1998-1999 Gisle Aas. | |
Copyright 1995-1996 Neil Winton. | |
Copyright 1991-1992 RSA Data Security, Inc. | |
The MD5 algorithm is defined in RFC 1321. The basic C code | |
implementing the algorithm is derived from that in the RFC and is | |
covered by the following copyright: | |
=over 4 | |
=item | |
Copyright (C) 1991-1992, RSA Data Security, Inc. Created 1991. All | |
rights reserved. | |
License to copy and use this software is granted provided that it | |
is identified as the "RSA Data Security, Inc. MD5 Message-Digest | |
Algorithm" in all material mentioning or referencing this software | |
or this function. | |
License is also granted to make and use derivative works provided | |
that such works are identified as "derived from the RSA Data | |
Security, Inc. MD5 Message-Digest Algorithm" in all material | |
mentioning or referencing the derived work. | |
RSA Data Security, Inc. makes no representations concerning either | |
the merchantability of this software or the suitability of this | |
software for any particular purpose. It is provided "as is" | |
without express or implied warranty of any kind. | |
These notices must be retained in any copies of any part of this | |
documentation and/or software. | |
=back | |
This copyright does not prohibit distribution of any version of Perl | |
containing this extension under the terms of the GNU or Artistic | |
licenses. | |
=head1 AUTHORS | |
The original MD5 interface was written by Neil Winton | |
(<N.Winton (at) axion.bt.co.uk>). | |
C<Digest::MD5> was made by Gisle Aas <gisle (at) aas.no> (I took his Interface | |
and part of the documentation). | |
Thanks to Guido Flohr for his 'use integer'-hint. | |
This release was made by Christian Lackas <delta (at) lackas.net>. | |
=cut | |
__DATA__ | |
FF,$a,$b,$c,$d,$_[4],7,0xd76aa478,/* 1 */ | |
FF,$d,$a,$b,$c,$_[5],12,0xe8c7b756,/* 2 */ | |
FF,$c,$d,$a,$b,$_[6],17,0x242070db,/* 3 */ | |
FF,$b,$c,$d,$a,$_[7],22,0xc1bdceee,/* 4 */ | |
FF,$a,$b,$c,$d,$_[8],7,0xf57c0faf,/* 5 */ | |
FF,$d,$a,$b,$c,$_[9],12,0x4787c62a,/* 6 */ | |
FF,$c,$d,$a,$b,$_[10],17,0xa8304613,/* 7 */ | |
FF,$b,$c,$d,$a,$_[11],22,0xfd469501,/* 8 */ | |
FF,$a,$b,$c,$d,$_[12],7,0x698098d8,/* 9 */ | |
FF,$d,$a,$b,$c,$_[13],12,0x8b44f7af,/* 10 */ | |
FF,$c,$d,$a,$b,$_[14],17,0xffff5bb1,/* 11 */ | |
FF,$b,$c,$d,$a,$_[15],22,0x895cd7be,/* 12 */ | |
FF,$a,$b,$c,$d,$_[16],7,0x6b901122,/* 13 */ | |
FF,$d,$a,$b,$c,$_[17],12,0xfd987193,/* 14 */ | |
FF,$c,$d,$a,$b,$_[18],17,0xa679438e,/* 15 */ | |
FF,$b,$c,$d,$a,$_[19],22,0x49b40821,/* 16 */ | |
GG,$a,$b,$c,$d,$_[5],5,0xf61e2562,/* 17 */ | |
GG,$d,$a,$b,$c,$_[10],9,0xc040b340,/* 18 */ | |
GG,$c,$d,$a,$b,$_[15],14,0x265e5a51,/* 19 */ | |
GG,$b,$c,$d,$a,$_[4],20,0xe9b6c7aa,/* 20 */ | |
GG,$a,$b,$c,$d,$_[9],5,0xd62f105d,/* 21 */ | |
GG,$d,$a,$b,$c,$_[14],9,0x2441453,/* 22 */ | |
GG,$c,$d,$a,$b,$_[19],14,0xd8a1e681,/* 23 */ | |
GG,$b,$c,$d,$a,$_[8],20,0xe7d3fbc8,/* 24 */ | |
GG,$a,$b,$c,$d,$_[13],5,0x21e1cde6,/* 25 */ | |
GG,$d,$a,$b,$c,$_[18],9,0xc33707d6,/* 26 */ | |
GG,$c,$d,$a,$b,$_[7],14,0xf4d50d87,/* 27 */ | |
GG,$b,$c,$d,$a,$_[12],20,0x455a14ed,/* 28 */ | |
GG,$a,$b,$c,$d,$_[17],5,0xa9e3e905,/* 29 */ | |
GG,$d,$a,$b,$c,$_[6],9,0xfcefa3f8,/* 30 */ | |
GG,$c,$d,$a,$b,$_[11],14,0x676f02d9,/* 31 */ | |
GG,$b,$c,$d,$a,$_[16],20,0x8d2a4c8a,/* 32 */ | |
HH,$a,$b,$c,$d,$_[9],4,0xfffa3942,/* 33 */ | |
HH,$d,$a,$b,$c,$_[12],11,0x8771f681,/* 34 */ | |
HH,$c,$d,$a,$b,$_[15],16,0x6d9d6122,/* 35 */ | |
HH,$b,$c,$d,$a,$_[18],23,0xfde5380c,/* 36 */ | |
HH,$a,$b,$c,$d,$_[5],4,0xa4beea44,/* 37 */ | |
HH,$d,$a,$b,$c,$_[8],11,0x4bdecfa9,/* 38 */ | |
HH,$c,$d,$a,$b,$_[11],16,0xf6bb4b60,/* 39 */ | |
HH,$b,$c,$d,$a,$_[14],23,0xbebfbc70,/* 40 */ | |
HH,$a,$b,$c,$d,$_[17],4,0x289b7ec6,/* 41 */ | |
HH,$d,$a,$b,$c,$_[4],11,0xeaa127fa,/* 42 */ | |
HH,$c,$d,$a,$b,$_[7],16,0xd4ef3085,/* 43 */ | |
HH,$b,$c,$d,$a,$_[10],23,0x4881d05,/* 44 */ | |
HH,$a,$b,$c,$d,$_[13],4,0xd9d4d039,/* 45 */ | |
HH,$d,$a,$b,$c,$_[16],11,0xe6db99e5,/* 46 */ | |
HH,$c,$d,$a,$b,$_[19],16,0x1fa27cf8,/* 47 */ | |
HH,$b,$c,$d,$a,$_[6],23,0xc4ac5665,/* 48 */ | |
II,$a,$b,$c,$d,$_[4],6,0xf4292244,/* 49 */ | |
II,$d,$a,$b,$c,$_[11],10,0x432aff97,/* 50 */ | |
II,$c,$d,$a,$b,$_[18],15,0xab9423a7,/* 51 */ | |
II,$b,$c,$d,$a,$_[9],21,0xfc93a039,/* 52 */ | |
II,$a,$b,$c,$d,$_[16],6,0x655b59c3,/* 53 */ | |
II,$d,$a,$b,$c,$_[7],10,0x8f0ccc92,/* 54 */ | |
II,$c,$d,$a,$b,$_[14],15,0xffeff47d,/* 55 */ | |
II,$b,$c,$d,$a,$_[5],21,0x85845dd1,/* 56 */ | |
II,$a,$b,$c,$d,$_[12],6,0x6fa87e4f,/* 57 */ | |
II,$d,$a,$b,$c,$_[19],10,0xfe2ce6e0,/* 58 */ | |
II,$c,$d,$a,$b,$_[10],15,0xa3014314,/* 59 */ | |
II,$b,$c,$d,$a,$_[17],21,0x4e0811a1,/* 60 */ | |
II,$a,$b,$c,$d,$_[8],6,0xf7537e82,/* 61 */ | |
II,$d,$a,$b,$c,$_[15],10,0xbd3af235,/* 62 */ | |
II,$c,$d,$a,$b,$_[6],15,0x2ad7d2bb,/* 63 */ | |
II,$b,$c,$d,$a,$_[13],21,0xeb86d391,/* 64 */ | |
DIGEST_PERL_MD5 | |
$fatpacked{"Exporter.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'EXPORTER'; | |
package Exporter; | |
require 5.006; | |
# Be lean. | |
#use strict; | |
#no strict 'refs'; | |
our $Debug = 0; | |
our $ExportLevel = 0; | |
our $Verbose ||= 0; | |
our $VERSION = '5.74'; | |
our (%Cache); | |
sub as_heavy { | |
require Exporter::Heavy; | |
# Unfortunately, this does not work if the caller is aliased as *name = \&foo | |
# Thus the need to create a lot of identical subroutines | |
my $c = (caller(1))[3]; | |
$c =~ s/.*:://; | |
\&{"Exporter::Heavy::heavy_$c"}; | |
} | |
sub export { | |
goto &{as_heavy()}; | |
} | |
sub import { | |
my $pkg = shift; | |
my $callpkg = caller($ExportLevel); | |
if ($pkg eq "Exporter" and @_ and $_[0] eq "import") { | |
*{$callpkg."::import"} = \&import; | |
return; | |
} | |
# We *need* to treat @{"$pkg\::EXPORT_FAIL"} since Carp uses it :-( | |
my $exports = \@{"$pkg\::EXPORT"}; | |
# But, avoid creating things if they don't exist, which saves a couple of | |
# hundred bytes per package processed. | |
my $fail = ${$pkg . '::'}{EXPORT_FAIL} && \@{"$pkg\::EXPORT_FAIL"}; | |
return export $pkg, $callpkg, @_ | |
if $Verbose or $Debug or $fail && @$fail > 1; | |
my $export_cache = ($Cache{$pkg} ||= {}); | |
my $args = @_ or @_ = @$exports; | |
if ($args and not %$export_cache) { | |
s/^&//, $export_cache->{$_} = 1 | |
foreach (@$exports, @{"$pkg\::EXPORT_OK"}); | |
} | |
my $heavy; | |
# Try very hard not to use {} and hence have to enter scope on the foreach | |
# We bomb out of the loop with last as soon as heavy is set. | |
if ($args or $fail) { | |
($heavy = (/\W/ or $args and not exists $export_cache->{$_} | |
or $fail and @$fail and $_ eq $fail->[0])) and last | |
foreach (@_); | |
} else { | |
($heavy = /\W/) and last | |
foreach (@_); | |
} | |
return export $pkg, $callpkg, ($args ? @_ : ()) if $heavy; | |
local $SIG{__WARN__} = | |
sub {require Carp; &Carp::carp} if not $SIG{__WARN__}; | |
# shortcut for the common case of no type character | |
*{"$callpkg\::$_"} = \&{"$pkg\::$_"} foreach @_; | |
} | |
# Default methods | |
sub export_fail { | |
my $self = shift; | |
@_; | |
} | |
# Unfortunately, caller(1)[3] "does not work" if the caller is aliased as | |
# *name = \&foo. Thus the need to create a lot of identical subroutines | |
# Otherwise we could have aliased them to export(). | |
sub export_to_level { | |
goto &{as_heavy()}; | |
} | |
sub export_tags { | |
goto &{as_heavy()}; | |
} | |
sub export_ok_tags { | |
goto &{as_heavy()}; | |
} | |
sub require_version { | |
goto &{as_heavy()}; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Exporter - Implements default import method for modules | |
=head1 SYNOPSIS | |
In module F<YourModule.pm>: | |
package YourModule; | |
require Exporter; | |
our @ISA = qw(Exporter); | |
our @EXPORT_OK = qw(munge frobnicate); # symbols to export on request | |
or | |
package YourModule; | |
use Exporter 'import'; # gives you Exporter's import() method directly | |
our @EXPORT_OK = qw(munge frobnicate); # symbols to export on request | |
In other files which wish to use C<YourModule>: | |
use YourModule qw(frobnicate); # import listed symbols | |
frobnicate ($left, $right) # calls YourModule::frobnicate | |
Take a look at L</Good Practices> for some variants | |
you will like to use in modern Perl code. | |
=head1 DESCRIPTION | |
The Exporter module implements an C<import> method which allows a module | |
to export functions and variables to its users' namespaces. Many modules | |
use Exporter rather than implementing their own C<import> method because | |
Exporter provides a highly flexible interface, with an implementation optimised | |
for the common case. | |
Perl automatically calls the C<import> method when processing a | |
C<use> statement for a module. Modules and C<use> are documented | |
in L<perlfunc> and L<perlmod>. Understanding the concept of | |
modules and how the C<use> statement operates is important to | |
understanding the Exporter. | |
=head2 How to Export | |
The arrays C<@EXPORT> and C<@EXPORT_OK> in a module hold lists of | |
symbols that are going to be exported into the users name space by | |
default, or which they can request to be exported, respectively. The | |
symbols can represent functions, scalars, arrays, hashes, or typeglobs. | |
The symbols must be given by full name with the exception that the | |
ampersand in front of a function is optional, e.g. | |
our @EXPORT = qw(afunc $scalar @array); # afunc is a function | |
our @EXPORT_OK = qw(&bfunc %hash *typeglob); # explicit prefix on &bfunc | |
If you are only exporting function names it is recommended to omit the | |
ampersand, as the implementation is faster this way. | |
=head2 Selecting What to Export | |
Do B<not> export method names! | |
Do B<not> export anything else by default without a good reason! | |
Exports pollute the namespace of the module user. If you must export | |
try to use C<@EXPORT_OK> in preference to C<@EXPORT> and avoid short or | |
common symbol names to reduce the risk of name clashes. | |
Generally anything not exported is still accessible from outside the | |
module using the C<YourModule::item_name> (or C<< $blessed_ref->method >>) | |
syntax. By convention you can use a leading underscore on names to | |
informally indicate that they are 'internal' and not for public use. | |
(It is actually possible to get private functions by saying: | |
my $subref = sub { ... }; | |
$subref->(@args); # Call it as a function | |
$obj->$subref(@args); # Use it as a method | |
However if you use them for methods it is up to you to figure out | |
how to make inheritance work.) | |
As a general rule, if the module is trying to be object oriented | |
then export nothing. If it's just a collection of functions then | |
C<@EXPORT_OK> anything but use C<@EXPORT> with caution. For function and | |
method names use barewords in preference to names prefixed with | |
ampersands for the export lists. | |
Other module design guidelines can be found in L<perlmod>. | |
=head2 How to Import | |
In other files which wish to use your module there are three basic ways for | |
them to load your module and import its symbols: | |
=over 4 | |
=item C<use YourModule;> | |
This imports all the symbols from YourModule's C<@EXPORT> into the namespace | |
of the C<use> statement. | |
=item C<use YourModule ();> | |
This causes perl to load your module but does not import any symbols. | |
=item C<use YourModule qw(...);> | |
This imports only the symbols listed by the caller into their namespace. | |
All listed symbols must be in your C<@EXPORT> or C<@EXPORT_OK>, else an error | |
occurs. The advanced export features of Exporter are accessed like this, | |
but with list entries that are syntactically distinct from symbol names. | |
=back | |
Unless you want to use its advanced features, this is probably all you | |
need to know to use Exporter. | |
=head1 Advanced Features | |
=head2 Specialised Import Lists | |
If any of the entries in an import list begins with !, : or / then | |
the list is treated as a series of specifications which either add to | |
or delete from the list of names to import. They are processed left to | |
right. Specifications are in the form: | |
[!]name This name only | |
[!]:DEFAULT All names in @EXPORT | |
[!]:tag All names in $EXPORT_TAGS{tag} anonymous array | |
[!]/pattern/ All names in @EXPORT and @EXPORT_OK which match | |
A leading ! indicates that matching names should be deleted from the | |
list of names to import. If the first specification is a deletion it | |
is treated as though preceded by :DEFAULT. If you just want to import | |
extra names in addition to the default set you will still need to | |
include :DEFAULT explicitly. | |
e.g., F<Module.pm> defines: | |
our @EXPORT = qw(A1 A2 A3 A4 A5); | |
our @EXPORT_OK = qw(B1 B2 B3 B4 B5); | |
our %EXPORT_TAGS = (T1 => [qw(A1 A2 B1 B2)], T2 => [qw(A1 A2 B3 B4)]); | |
Note that you cannot use tags in @EXPORT or @EXPORT_OK. | |
Names in EXPORT_TAGS must also appear in @EXPORT or @EXPORT_OK. | |
An application using Module can say something like: | |
use Module qw(:DEFAULT :T2 !B3 A3); | |
Other examples include: | |
use Socket qw(!/^[AP]F_/ !SOMAXCONN !SOL_SOCKET); | |
use POSIX qw(:errno_h :termios_h !TCSADRAIN !/^EXIT/); | |
Remember that most patterns (using //) will need to be anchored | |
with a leading ^, e.g., C</^EXIT/> rather than C</EXIT/>. | |
You can say C<BEGIN { $Exporter::Verbose=1 }> to see how the | |
specifications are being processed and what is actually being imported | |
into modules. | |
=head2 Exporting Without Using Exporter's import Method | |
Exporter has a special method, 'export_to_level' which is used in situations | |
where you can't directly call Exporter's | |
import method. The export_to_level | |
method looks like: | |
MyPackage->export_to_level( | |
$where_to_export, $package, @what_to_export | |
); | |
where C<$where_to_export> is an integer telling how far up the calling stack | |
to export your symbols, and C<@what_to_export> is an array telling what | |
symbols *to* export (usually this is C<@_>). The C<$package> argument is | |
currently unused. | |
For example, suppose that you have a module, A, which already has an | |
import function: | |
package A; | |
our @ISA = qw(Exporter); | |
our @EXPORT_OK = qw($b); | |
sub import | |
{ | |
$A::b = 1; # not a very useful import method | |
} | |
and you want to Export symbol C<$A::b> back to the module that called | |
package A. Since Exporter relies on the import method to work, via | |
inheritance, as it stands Exporter::import() will never get called. | |
Instead, say the following: | |
package A; | |
our @ISA = qw(Exporter); | |
our @EXPORT_OK = qw($b); | |
sub import | |
{ | |
$A::b = 1; | |
A->export_to_level(1, @_); | |
} | |
This will export the symbols one level 'above' the current package - ie: to | |
the program or module that used package A. | |
Note: Be careful not to modify C<@_> at all before you call export_to_level | |
- or people using your package will get very unexplained results! | |
=head2 Exporting Without Inheriting from Exporter | |
By including Exporter in your C<@ISA> you inherit an Exporter's import() method | |
but you also inherit several other helper methods which you probably don't | |
want. To avoid this you can do: | |
package YourModule; | |
use Exporter qw(import); | |
which will export Exporter's own import() method into YourModule. | |
Everything will work as before but you won't need to include Exporter in | |
C<@YourModule::ISA>. | |
Note: This feature was introduced in version 5.57 | |
of Exporter, released with perl 5.8.3. | |
=head2 Module Version Checking | |
The Exporter module will convert an attempt to import a number from a | |
module into a call to C<< $module_name->VERSION($value) >>. This can | |
be used to validate that the version of the module being used is | |
greater than or equal to the required version. | |
For historical reasons, Exporter supplies a C<require_version> method that | |
simply delegates to C<VERSION>. Originally, before C<UNIVERSAL::VERSION> | |
existed, Exporter would call C<require_version>. | |
Since the C<UNIVERSAL::VERSION> method treats the C<$VERSION> number as | |
a simple numeric value it will regard version 1.10 as lower than | |
1.9. For this reason it is strongly recommended that you use numbers | |
with at least two decimal places, e.g., 1.09. | |
=head2 Managing Unknown Symbols | |
In some situations you may want to prevent certain symbols from being | |
exported. Typically this applies to extensions which have functions | |
or constants that may not exist on some systems. | |
The names of any symbols that cannot be exported should be listed | |
in the C<@EXPORT_FAIL> array. | |
If a module attempts to import any of these symbols the Exporter | |
will give the module an opportunity to handle the situation before | |
generating an error. The Exporter will call an export_fail method | |
with a list of the failed symbols: | |
@failed_symbols = $module_name->export_fail(@failed_symbols); | |
If the C<export_fail> method returns an empty list then no error is | |
recorded and all the requested symbols are exported. If the returned | |
list is not empty then an error is generated for each symbol and the | |
export fails. The Exporter provides a default C<export_fail> method which | |
simply returns the list unchanged. | |
Uses for the C<export_fail> method include giving better error messages | |
for some symbols and performing lazy architectural checks (put more | |
symbols into C<@EXPORT_FAIL> by default and then take them out if someone | |
actually tries to use them and an expensive check shows that they are | |
usable on that platform). | |
=head2 Tag Handling Utility Functions | |
Since the symbols listed within C<%EXPORT_TAGS> must also appear in either | |
C<@EXPORT> or C<@EXPORT_OK>, two utility functions are provided which allow | |
you to easily add tagged sets of symbols to C<@EXPORT> or C<@EXPORT_OK>: | |
our %EXPORT_TAGS = (foo => [qw(aa bb cc)], bar => [qw(aa cc dd)]); | |
Exporter::export_tags('foo'); # add aa, bb and cc to @EXPORT | |
Exporter::export_ok_tags('bar'); # add aa, cc and dd to @EXPORT_OK | |
Any names which are not tags are added to C<@EXPORT> or C<@EXPORT_OK> | |
unchanged but will trigger a warning (with C<-w>) to avoid misspelt tags | |
names being silently added to C<@EXPORT> or C<@EXPORT_OK>. Future versions | |
may make this a fatal error. | |
=head2 Generating Combined Tags | |
If several symbol categories exist in C<%EXPORT_TAGS>, it's usually | |
useful to create the utility ":all" to simplify "use" statements. | |
The simplest way to do this is: | |
our %EXPORT_TAGS = (foo => [qw(aa bb cc)], bar => [qw(aa cc dd)]); | |
# add all the other ":class" tags to the ":all" class, | |
# deleting duplicates | |
{ | |
my %seen; | |
push @{$EXPORT_TAGS{all}}, | |
grep {!$seen{$_}++} @{$EXPORT_TAGS{$_}} foreach keys %EXPORT_TAGS; | |
} | |
F<CGI.pm> creates an ":all" tag which contains some (but not really | |
all) of its categories. That could be done with one small | |
change: | |
# add some of the other ":class" tags to the ":all" class, | |
# deleting duplicates | |
{ | |
my %seen; | |
push @{$EXPORT_TAGS{all}}, | |
grep {!$seen{$_}++} @{$EXPORT_TAGS{$_}} | |
foreach qw/html2 html3 netscape form cgi internal/; | |
} | |
Note that the tag names in C<%EXPORT_TAGS> don't have the leading ':'. | |
=head2 C<AUTOLOAD>ed Constants | |
Many modules make use of C<AUTOLOAD>ing for constant subroutines to | |
avoid having to compile and waste memory on rarely used values (see | |
L<perlsub> for details on constant subroutines). Calls to such | |
constant subroutines are not optimized away at compile time because | |
they can't be checked at compile time for constancy. | |
Even if a prototype is available at compile time, the body of the | |
subroutine is not (it hasn't been C<AUTOLOAD>ed yet). perl needs to | |
examine both the C<()> prototype and the body of a subroutine at | |
compile time to detect that it can safely replace calls to that | |
subroutine with the constant value. | |
A workaround for this is to call the constants once in a C<BEGIN> block: | |
package My ; | |
use Socket ; | |
foo( SO_LINGER ); ## SO_LINGER NOT optimized away; called at runtime | |
BEGIN { SO_LINGER } | |
foo( SO_LINGER ); ## SO_LINGER optimized away at compile time. | |
This forces the C<AUTOLOAD> for C<SO_LINGER> to take place before | |
SO_LINGER is encountered later in C<My> package. | |
If you are writing a package that C<AUTOLOAD>s, consider forcing | |
an C<AUTOLOAD> for any constants explicitly imported by other packages | |
or which are usually used when your package is C<use>d. | |
=head1 Good Practices | |
=head2 Declaring C<@EXPORT_OK> and Friends | |
When using C<Exporter> with the standard C<strict> and C<warnings> | |
pragmas, the C<our> keyword is needed to declare the package | |
variables C<@EXPORT_OK>, C<@EXPORT>, C<@ISA>, etc. | |
our @ISA = qw(Exporter); | |
our @EXPORT_OK = qw(munge frobnicate); | |
If backward compatibility for Perls B<under> 5.6 is important, | |
one must write instead a C<use vars> statement. | |
use vars qw(@ISA @EXPORT_OK); | |
@ISA = qw(Exporter); | |
@EXPORT_OK = qw(munge frobnicate); | |
=head2 Playing Safe | |
There are some caveats with the use of runtime statements | |
like C<require Exporter> and the assignment to package | |
variables, which can be very subtle for the unaware programmer. | |
This may happen for instance with mutually recursive | |
modules, which are affected by the time the relevant | |
constructions are executed. | |
The ideal (but a bit ugly) way to never have to think | |
about that is to use C<BEGIN> blocks. So the first part | |
of the L</SYNOPSIS> code could be rewritten as: | |
package YourModule; | |
use strict; | |
use warnings; | |
our (@ISA, @EXPORT_OK); | |
BEGIN { | |
require Exporter; | |
@ISA = qw(Exporter); | |
@EXPORT_OK = qw(munge frobnicate); # symbols to export on request | |
} | |
The C<BEGIN> will assure that the loading of F<Exporter.pm> | |
and the assignments to C<@ISA> and C<@EXPORT_OK> happen | |
immediately, leaving no room for something to get awry | |
or just plain wrong. | |
With respect to loading C<Exporter> and inheriting, there | |
are alternatives with the use of modules like C<base> and C<parent>. | |
use base qw(Exporter); | |
# or | |
use parent qw(Exporter); | |
Any of these statements are nice replacements for | |
C<BEGIN { require Exporter; @ISA = qw(Exporter); }> | |
with the same compile-time effect. The basic difference | |
is that C<base> code interacts with declared C<fields> | |
while C<parent> is a streamlined version of the older | |
C<base> code to just establish the IS-A relationship. | |
For more details, see the documentation and code of | |
L<base> and L<parent>. | |
Another thorough remedy to that runtime | |
vs. compile-time trap is to use L<Exporter::Easy>, | |
which is a wrapper of Exporter that allows all | |
boilerplate code at a single gulp in the | |
use statement. | |
use Exporter::Easy ( | |
OK => [ qw(munge frobnicate) ], | |
); | |
# @ISA setup is automatic | |
# all assignments happen at compile time | |
=head2 What Not to Export | |
You have been warned already in L</Selecting What to Export> | |
to not export: | |
=over 4 | |
=item * | |
method names (because you don't need to | |
and that's likely to not do what you want), | |
=item * | |
anything by default (because you don't want to surprise your users... | |
badly) | |
=item * | |
anything you don't need to (because less is more) | |
=back | |
There's one more item to add to this list. Do B<not> | |
export variable names. Just because C<Exporter> lets you | |
do that, it does not mean you should. | |
@EXPORT_OK = qw($svar @avar %hvar); # DON'T! | |
Exporting variables is not a good idea. They can | |
change under the hood, provoking horrible | |
effects at-a-distance that are too hard to track | |
and to fix. Trust me: they are not worth it. | |
To provide the capability to set/get class-wide | |
settings, it is best instead to provide accessors | |
as subroutines or class methods instead. | |
=head1 SEE ALSO | |
C<Exporter> is definitely not the only module with | |
symbol exporter capabilities. At CPAN, you may find | |
a bunch of them. Some are lighter. Some | |
provide improved APIs and features. Pick the one | |
that fits your needs. The following is | |
a sample list of such modules. | |
Exporter::Easy | |
Exporter::Lite | |
Exporter::Renaming | |
Exporter::Tidy | |
Sub::Exporter / Sub::Installer | |
Perl6::Export / Perl6::Export::Attrs | |
=head1 LICENSE | |
This library is free software. You can redistribute it | |
and/or modify it under the same terms as Perl itself. | |
=cut | |
EXPORTER | |
$fatpacked{"Exporter/Heavy.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'EXPORTER_HEAVY'; | |
package Exporter::Heavy; | |
use strict; | |
no strict 'refs'; | |
# On one line so MakeMaker will see it. | |
require Exporter; our $VERSION = $Exporter::VERSION; | |
=head1 NAME | |
Exporter::Heavy - Exporter guts | |
=head1 SYNOPSIS | |
(internal use only) | |
=head1 DESCRIPTION | |
No user-serviceable parts inside. | |
=cut | |
# | |
# We go to a lot of trouble not to 'require Carp' at file scope, | |
# because Carp requires Exporter, and something has to give. | |
# | |
sub _rebuild_cache { | |
my ($pkg, $exports, $cache) = @_; | |
s/^&// foreach @$exports; | |
@{$cache}{@$exports} = (1) x @$exports; | |
my $ok = \@{"${pkg}::EXPORT_OK"}; | |
if (@$ok) { | |
s/^&// foreach @$ok; | |
@{$cache}{@$ok} = (1) x @$ok; | |
} | |
} | |
sub heavy_export { | |
# Save the old __WARN__ handler in case it was defined | |
my $oldwarn = $SIG{__WARN__}; | |
# First make import warnings look like they're coming from the "use". | |
local $SIG{__WARN__} = sub { | |
# restore it back so proper stacking occurs | |
local $SIG{__WARN__} = $oldwarn; | |
my $text = shift; | |
if ($text =~ s/ at \S*Exporter\S*.pm line \d+.*\n//) { | |
require Carp; | |
local $Carp::CarpLevel = 1; # ignore package calling us too. | |
Carp::carp($text); | |
} | |
else { | |
warn $text; | |
} | |
}; | |
local $SIG{__DIE__} = sub { | |
require Carp; | |
local $Carp::CarpLevel = 1; # ignore package calling us too. | |
Carp::croak("$_[0]Illegal null symbol in \@${1}::EXPORT") | |
if $_[0] =~ /^Unable to create sub named "(.*?)::"/; | |
}; | |
my($pkg, $callpkg, @imports) = @_; | |
my($type, $sym, $cache_is_current, $oops); | |
my($exports, $export_cache) = (\@{"${pkg}::EXPORT"}, | |
$Exporter::Cache{$pkg} ||= {}); | |
if (@imports) { | |
if (!%$export_cache) { | |
_rebuild_cache ($pkg, $exports, $export_cache); | |
$cache_is_current = 1; | |
} | |
if (grep m{^[/!:]}, @imports) { | |
my $tagsref = \%{"${pkg}::EXPORT_TAGS"}; | |
my $tagdata; | |
my %imports; | |
my($remove, $spec, @names, @allexports); | |
# negated first item implies starting with default set: | |
unshift @imports, ':DEFAULT' if $imports[0] =~ m/^!/; | |
foreach $spec (@imports){ | |
$remove = $spec =~ s/^!//; | |
if ($spec =~ s/^://){ | |
if ($spec eq 'DEFAULT'){ | |
@names = @$exports; | |
} | |
elsif ($tagdata = $tagsref->{$spec}) { | |
@names = @$tagdata; | |
} | |
else { | |
warn qq["$spec" is not defined in %${pkg}::EXPORT_TAGS]; | |
++$oops; | |
next; | |
} | |
} | |
elsif ($spec =~ m:^/(.*)/$:){ | |
my $patn = $1; | |
@allexports = keys %$export_cache unless @allexports; # only do keys once | |
@names = grep(/$patn/, @allexports); # not anchored by default | |
} | |
else { | |
@names = ($spec); # is a normal symbol name | |
} | |
warn "Import ".($remove ? "del":"add").": @names " | |
if $Exporter::Verbose; | |
if ($remove) { | |
foreach $sym (@names) { delete $imports{$sym} } | |
} | |
else { | |
@imports{@names} = (1) x @names; | |
} | |
} | |
@imports = keys %imports; | |
} | |
my @carp; | |
foreach $sym (@imports) { | |
if (!$export_cache->{$sym}) { | |
if ($sym =~ m/^\d/) { | |
$pkg->VERSION($sym); # inherit from UNIVERSAL | |
# If the version number was the only thing specified | |
# then we should act as if nothing was specified: | |
if (@imports == 1) { | |
@imports = @$exports; | |
last; | |
} | |
# We need a way to emulate 'use Foo ()' but still | |
# allow an easy version check: "use Foo 1.23, ''"; | |
if (@imports == 2 and !$imports[1]) { | |
@imports = (); | |
last; | |
} | |
} elsif ($sym !~ s/^&// || !$export_cache->{$sym}) { | |
# Last chance - see if they've updated EXPORT_OK since we | |
# cached it. | |
unless ($cache_is_current) { | |
%$export_cache = (); | |
_rebuild_cache ($pkg, $exports, $export_cache); | |
$cache_is_current = 1; | |
} | |
if (!$export_cache->{$sym}) { | |
# accumulate the non-exports | |
push @carp, | |
qq["$sym" is not exported by the $pkg module]; | |
$oops++; | |
} | |
} | |
} | |
} | |
if ($oops) { | |
require Carp; | |
Carp::croak(join("\n", @carp, "Can't continue after import errors")); | |
} | |
} | |
else { | |
@imports = @$exports; | |
} | |
my($fail, $fail_cache) = (\@{"${pkg}::EXPORT_FAIL"}, | |
$Exporter::FailCache{$pkg} ||= {}); | |
if (@$fail) { | |
if (!%$fail_cache) { | |
# Build cache of symbols. Optimise the lookup by adding | |
# barewords twice... both with and without a leading &. | |
# (Technique could be applied to $export_cache at cost of memory) | |
my @expanded = map { /^\w/ ? ($_, '&'.$_) : $_ } @$fail; | |
warn "${pkg}::EXPORT_FAIL cached: @expanded" if $Exporter::Verbose; | |
@{$fail_cache}{@expanded} = (1) x @expanded; | |
} | |
my @failed; | |
foreach $sym (@imports) { push(@failed, $sym) if $fail_cache->{$sym} } | |
if (@failed) { | |
@failed = $pkg->export_fail(@failed); | |
foreach $sym (@failed) { | |
require Carp; | |
Carp::carp(qq["$sym" is not implemented by the $pkg module ], | |
"on this architecture"); | |
} | |
if (@failed) { | |
require Carp; | |
Carp::croak("Can't continue after import errors"); | |
} | |
} | |
} | |
warn "Importing into $callpkg from $pkg: ", | |
join(", ",sort @imports) if $Exporter::Verbose; | |
foreach $sym (@imports) { | |
# shortcut for the common case of no type character | |
(*{"${callpkg}::$sym"} = \&{"${pkg}::$sym"}, next) | |
unless $sym =~ s/^(\W)//; | |
$type = $1; | |
no warnings 'once'; | |
*{"${callpkg}::$sym"} = | |
$type eq '&' ? \&{"${pkg}::$sym"} : | |
$type eq '$' ? \${"${pkg}::$sym"} : | |
$type eq '@' ? \@{"${pkg}::$sym"} : | |
$type eq '%' ? \%{"${pkg}::$sym"} : | |
$type eq '*' ? *{"${pkg}::$sym"} : | |
do { require Carp; Carp::croak("Can't export symbol: $type$sym") }; | |
} | |
} | |
sub heavy_export_to_level | |
{ | |
my $pkg = shift; | |
my $level = shift; | |
(undef) = shift; # XXX redundant arg | |
my $callpkg = caller($level); | |
$pkg->export($callpkg, @_); | |
} | |
# Utility functions | |
sub _push_tags { | |
my($pkg, $var, $syms) = @_; | |
my @nontag = (); | |
my $export_tags = \%{"${pkg}::EXPORT_TAGS"}; | |
push(@{"${pkg}::$var"}, | |
map { $export_tags->{$_} ? @{$export_tags->{$_}} | |
: scalar(push(@nontag,$_),$_) } | |
(@$syms) ? @$syms : keys %$export_tags); | |
if (@nontag and $^W) { | |
# This may change to a die one day | |
require Carp; | |
Carp::carp(join(", ", @nontag)." are not tags of $pkg"); | |
} | |
} | |
sub heavy_require_version { | |
my($self, $wanted) = @_; | |
my $pkg = ref $self || $self; | |
return ${pkg}->VERSION($wanted); | |
} | |
sub heavy_export_tags { | |
_push_tags((caller)[0], "EXPORT", \@_); | |
} | |
sub heavy_export_ok_tags { | |
_push_tags((caller)[0], "EXPORT_OK", \@_); | |
} | |
1; | |
EXPORTER_HEAVY | |
$fatpacked{"File/Path.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'FILE_PATH'; | |
package File::Path; | |
use 5.005_04; | |
use strict; | |
use Cwd 'getcwd'; | |
use File::Basename (); | |
use File::Spec (); | |
BEGIN { | |
if ( $] < 5.006 ) { | |
# can't say 'opendir my $dh, $dirname' | |
# need to initialise $dh | |
eval 'use Symbol'; | |
} | |
} | |
use Exporter (); | |
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); | |
$VERSION = '2.18'; | |
$VERSION = eval $VERSION; | |
@ISA = qw(Exporter); | |
@EXPORT = qw(mkpath rmtree); | |
@EXPORT_OK = qw(make_path remove_tree); | |
BEGIN { | |
for (qw(VMS MacOS MSWin32 os2)) { | |
no strict 'refs'; | |
*{"_IS_\U$_"} = $^O eq $_ ? sub () { 1 } : sub () { 0 }; | |
} | |
# These OSes complain if you want to remove a file that you have no | |
# write permission to: | |
*_FORCE_WRITABLE = ( | |
grep { $^O eq $_ } qw(amigaos dos epoc MSWin32 MacOS os2) | |
) ? sub () { 1 } : sub () { 0 }; | |
# Unix-like systems need to stat each directory in order to detect | |
# race condition. MS-Windows is immune to this particular attack. | |
*_NEED_STAT_CHECK = !(_IS_MSWIN32()) ? sub () { 1 } : sub () { 0 }; | |
} | |
sub _carp { | |
require Carp; | |
goto &Carp::carp; | |
} | |
sub _croak { | |
require Carp; | |
goto &Carp::croak; | |
} | |
sub _error { | |
my $arg = shift; | |
my $message = shift; | |
my $object = shift; | |
if ( $arg->{error} ) { | |
$object = '' unless defined $object; | |
$message .= ": $!" if $!; | |
push @{ ${ $arg->{error} } }, { $object => $message }; | |
} | |
else { | |
_carp( defined($object) ? "$message for $object: $!" : "$message: $!" ); | |
} | |
} | |
sub __is_arg { | |
my ($arg) = @_; | |
# If client code blessed an array ref to HASH, this will not work | |
# properly. We could have done $arg->isa() wrapped in eval, but | |
# that would be expensive. This implementation should suffice. | |
# We could have also used Scalar::Util:blessed, but we choose not | |
# to add this dependency | |
return ( ref $arg eq 'HASH' ); | |
} | |
sub make_path { | |
push @_, {} unless @_ and __is_arg( $_[-1] ); | |
goto &mkpath; | |
} | |
sub mkpath { | |
my $old_style = !( @_ and __is_arg( $_[-1] ) ); | |
my $data; | |
my $paths; | |
if ($old_style) { | |
my ( $verbose, $mode ); | |
( $paths, $verbose, $mode ) = @_; | |
$paths = [$paths] unless UNIVERSAL::isa( $paths, 'ARRAY' ); | |
$data->{verbose} = $verbose; | |
$data->{mode} = defined $mode ? $mode : oct '777'; | |
} | |
else { | |
my %args_permitted = map { $_ => 1 } ( qw| | |
chmod | |
error | |
group | |
mask | |
mode | |
owner | |
uid | |
user | |
verbose | |
| ); | |
my %not_on_win32_args = map { $_ => 1 } ( qw| | |
group | |
owner | |
uid | |
user | |
| ); | |
my @bad_args = (); | |
my @win32_implausible_args = (); | |
my $arg = pop @_; | |
for my $k (sort keys %{$arg}) { | |
if (! $args_permitted{$k}) { | |
push @bad_args, $k; | |
} | |
elsif ($not_on_win32_args{$k} and _IS_MSWIN32) { | |
push @win32_implausible_args, $k; | |
} | |
else { | |
$data->{$k} = $arg->{$k}; | |
} | |
} | |
_carp("Unrecognized option(s) passed to mkpath() or make_path(): @bad_args") | |
if @bad_args; | |
_carp("Option(s) implausible on Win32 passed to mkpath() or make_path(): @win32_implausible_args") | |
if @win32_implausible_args; | |
$data->{mode} = delete $data->{mask} if exists $data->{mask}; | |
$data->{mode} = oct '777' unless exists $data->{mode}; | |
${ $data->{error} } = [] if exists $data->{error}; | |
unless (@win32_implausible_args) { | |
$data->{owner} = delete $data->{user} if exists $data->{user}; | |
$data->{owner} = delete $data->{uid} if exists $data->{uid}; | |
if ( exists $data->{owner} and $data->{owner} =~ /\D/ ) { | |
my $uid = ( getpwnam $data->{owner} )[2]; | |
if ( defined $uid ) { | |
$data->{owner} = $uid; | |
} | |
else { | |
_error( $data, | |
"unable to map $data->{owner} to a uid, ownership not changed" | |
); | |
delete $data->{owner}; | |
} | |
} | |
if ( exists $data->{group} and $data->{group} =~ /\D/ ) { | |
my $gid = ( getgrnam $data->{group} )[2]; | |
if ( defined $gid ) { | |
$data->{group} = $gid; | |
} | |
else { | |
_error( $data, | |
"unable to map $data->{group} to a gid, group ownership not changed" | |
); | |
delete $data->{group}; | |
} | |
} | |
if ( exists $data->{owner} and not exists $data->{group} ) { | |
$data->{group} = -1; # chown will leave group unchanged | |
} | |
if ( exists $data->{group} and not exists $data->{owner} ) { | |
$data->{owner} = -1; # chown will leave owner unchanged | |
} | |
} | |
$paths = [@_]; | |
} | |
return _mkpath( $data, $paths ); | |
} | |
sub _mkpath { | |
my $data = shift; | |
my $paths = shift; | |
my ( @created ); | |
foreach my $path ( @{$paths} ) { | |
next unless defined($path) and length($path); | |
$path .= '/' if _IS_OS2 and $path =~ /^\w:\z/s; # feature of CRT | |
# Logic wants Unix paths, so go with the flow. | |
if (_IS_VMS) { | |
next if $path eq '/'; | |
$path = VMS::Filespec::unixify($path); | |
} | |
next if -d $path; | |
my $parent = File::Basename::dirname($path); | |
# Coverage note: It's not clear how we would test the condition: | |
# '-d $parent or $path eq $parent' | |
unless ( -d $parent or $path eq $parent ) { | |
push( @created, _mkpath( $data, [$parent] ) ); | |
} | |
print "mkdir $path\n" if $data->{verbose}; | |
if ( mkdir( $path, $data->{mode} ) ) { | |
push( @created, $path ); | |
if ( exists $data->{owner} ) { | |
# NB: $data->{group} guaranteed to be set during initialisation | |
if ( !chown $data->{owner}, $data->{group}, $path ) { | |
_error( $data, | |
"Cannot change ownership of $path to $data->{owner}:$data->{group}" | |
); | |
} | |
} | |
if ( exists $data->{chmod} ) { | |
# Coverage note: It's not clear how we would trigger the next | |
# 'if' block. Failure of 'chmod' might first result in a | |
# system error: "Permission denied". | |
if ( !chmod $data->{chmod}, $path ) { | |
_error( $data, | |
"Cannot change permissions of $path to $data->{chmod}" ); | |
} | |
} | |
} | |
else { | |
my $save_bang = $!; | |
# From 'perldoc perlvar': $EXTENDED_OS_ERROR ($^E) is documented | |
# as: | |
# Error information specific to the current operating system. At the | |
# moment, this differs from "$!" under only VMS, OS/2, and Win32 | |
# (and for MacPerl). On all other platforms, $^E is always just the | |
# same as $!. | |
my ( $e, $e1 ) = ( $save_bang, $^E ); | |
$e .= "; $e1" if $e ne $e1; | |
# allow for another process to have created it meanwhile | |
if ( ! -d $path ) { | |
$! = $save_bang; | |
if ( $data->{error} ) { | |
push @{ ${ $data->{error} } }, { $path => $e }; | |
} | |
else { | |
_croak("mkdir $path: $e"); | |
} | |
} | |
} | |
} | |
return @created; | |
} | |
sub remove_tree { | |
push @_, {} unless @_ and __is_arg( $_[-1] ); | |
goto &rmtree; | |
} | |
sub _is_subdir { | |
my ( $dir, $test ) = @_; | |
my ( $dv, $dd ) = File::Spec->splitpath( $dir, 1 ); | |
my ( $tv, $td ) = File::Spec->splitpath( $test, 1 ); | |
# not on same volume | |
return 0 if $dv ne $tv; | |
my @d = File::Spec->splitdir($dd); | |
my @t = File::Spec->splitdir($td); | |
# @t can't be a subdir if it's shorter than @d | |
return 0 if @t < @d; | |
return join( '/', @d ) eq join( '/', splice @t, 0, +@d ); | |
} | |
sub rmtree { | |
my $old_style = !( @_ and __is_arg( $_[-1] ) ); | |
my ($arg, $data, $paths); | |
if ($old_style) { | |
my ( $verbose, $safe ); | |
( $paths, $verbose, $safe ) = @_; | |
$data->{verbose} = $verbose; | |
$data->{safe} = defined $safe ? $safe : 0; | |
if ( defined($paths) and length($paths) ) { | |
$paths = [$paths] unless UNIVERSAL::isa( $paths, 'ARRAY' ); | |
} | |
else { | |
_carp("No root path(s) specified\n"); | |
return 0; | |
} | |
} | |
else { | |
my %args_permitted = map { $_ => 1 } ( qw| | |
error | |
keep_root | |
result | |
safe | |
verbose | |
| ); | |
my @bad_args = (); | |
my $arg = pop @_; | |
for my $k (sort keys %{$arg}) { | |
if (! $args_permitted{$k}) { | |
push @bad_args, $k; | |
} | |
else { | |
$data->{$k} = $arg->{$k}; | |
} | |
} | |
_carp("Unrecognized option(s) passed to remove_tree(): @bad_args") | |
if @bad_args; | |
${ $data->{error} } = [] if exists $data->{error}; | |
${ $data->{result} } = [] if exists $data->{result}; | |
# Wouldn't it make sense to do some validation on @_ before assigning | |
# to $paths here? | |
# In the $old_style case we guarantee that each path is both defined | |
# and non-empty. We don't check that here, which means we have to | |
# check it later in the first condition in this line: | |
# if ( $ortho_root_length && _is_subdir( $ortho_root, $ortho_cwd ) ) { | |
# Granted, that would be a change in behavior for the two | |
# non-old-style interfaces. | |
$paths = [@_]; | |
} | |
$data->{prefix} = ''; | |
$data->{depth} = 0; | |
my @clean_path; | |
$data->{cwd} = getcwd() or do { | |
_error( $data, "cannot fetch initial working directory" ); | |
return 0; | |
}; | |
for ( $data->{cwd} ) { /\A(.*)\Z/s; $_ = $1 } # untaint | |
for my $p (@$paths) { | |
# need to fixup case and map \ to / on Windows | |
my $ortho_root = _IS_MSWIN32 ? _slash_lc($p) : $p; | |
my $ortho_cwd = | |
_IS_MSWIN32 ? _slash_lc( $data->{cwd} ) : $data->{cwd}; | |
my $ortho_root_length = length($ortho_root); | |
$ortho_root_length-- if _IS_VMS; # don't compare '.' with ']' | |
if ( $ortho_root_length && _is_subdir( $ortho_root, $ortho_cwd ) ) { | |
local $! = 0; | |
_error( $data, "cannot remove path when cwd is $data->{cwd}", $p ); | |
next; | |
} | |
if (_IS_MACOS) { | |
$p = ":$p" unless $p =~ /:/; | |
$p .= ":" unless $p =~ /:\z/; | |
} | |
elsif ( _IS_MSWIN32 ) { | |
$p =~ s{[/\\]\z}{}; | |
} | |
else { | |
$p =~ s{/\z}{}; | |
} | |
push @clean_path, $p; | |
} | |
@{$data}{qw(device inode)} = ( lstat $data->{cwd} )[ 0, 1 ] or do { | |
_error( $data, "cannot stat initial working directory", $data->{cwd} ); | |
return 0; | |
}; | |
return _rmtree( $data, \@clean_path ); | |
} | |
sub _rmtree { | |
my $data = shift; | |
my $paths = shift; | |
my $count = 0; | |
my $curdir = File::Spec->curdir(); | |
my $updir = File::Spec->updir(); | |
my ( @files, $root ); | |
ROOT_DIR: | |
foreach my $root (@$paths) { | |
# since we chdir into each directory, it may not be obvious | |
# to figure out where we are if we generate a message about | |
# a file name. We therefore construct a semi-canonical | |
# filename, anchored from the directory being unlinked (as | |
# opposed to being truly canonical, anchored from the root (/). | |
my $canon = | |
$data->{prefix} | |
? File::Spec->catfile( $data->{prefix}, $root ) | |
: $root; | |
my ( $ldev, $lino, $perm ) = ( lstat $root )[ 0, 1, 2 ] | |
or next ROOT_DIR; | |
if ( -d _ ) { | |
$root = VMS::Filespec::vmspath( VMS::Filespec::pathify($root) ) | |
if _IS_VMS; | |
if ( !chdir($root) ) { | |
# see if we can escalate privileges to get in | |
# (e.g. funny protection mask such as -w- instead of rwx) | |
# This uses fchmod to avoid traversing outside of the proper | |
# location (CVE-2017-6512) | |
my $root_fh; | |
if (open($root_fh, '<', $root)) { | |
my ($fh_dev, $fh_inode) = (stat $root_fh )[0,1]; | |
$perm &= oct '7777'; | |
my $nperm = $perm | oct '700'; | |
local $@; | |
if ( | |
!( | |
$data->{safe} | |
or $nperm == $perm | |
or !-d _ | |
or $fh_dev ne $ldev | |
or $fh_inode ne $lino | |
or eval { chmod( $nperm, $root_fh ) } | |
) | |
) | |
{ | |
_error( $data, | |
"cannot make child directory read-write-exec", $canon ); | |
next ROOT_DIR; | |
} | |
close $root_fh; | |
} | |
if ( !chdir($root) ) { | |
_error( $data, "cannot chdir to child", $canon ); | |
next ROOT_DIR; | |
} | |
} | |
my ( $cur_dev, $cur_inode, $perm ) = ( stat $curdir )[ 0, 1, 2 ] | |
or do { | |
_error( $data, "cannot stat current working directory", $canon ); | |
next ROOT_DIR; | |
}; | |
if (_NEED_STAT_CHECK) { | |
( $ldev eq $cur_dev and $lino eq $cur_inode ) | |
or _croak( | |
"directory $canon changed before chdir, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting." | |
); | |
} | |
$perm &= oct '7777'; # don't forget setuid, setgid, sticky bits | |
my $nperm = $perm | oct '700'; | |
# notabene: 0700 is for making readable in the first place, | |
# it's also intended to change it to writable in case we have | |
# to recurse in which case we are better than rm -rf for | |
# subtrees with strange permissions | |
if ( | |
!( | |
$data->{safe} | |
or $nperm == $perm | |
or chmod( $nperm, $curdir ) | |
) | |
) | |
{ | |
_error( $data, "cannot make directory read+writeable", $canon ); | |
$nperm = $perm; | |
} | |
my $d; | |
$d = gensym() if $] < 5.006; | |
if ( !opendir $d, $curdir ) { | |
_error( $data, "cannot opendir", $canon ); | |
@files = (); | |
} | |
else { | |
if ( !defined ${^TAINT} or ${^TAINT} ) { | |
# Blindly untaint dir names if taint mode is active | |
@files = map { /\A(.*)\z/s; $1 } readdir $d; | |
} | |
else { | |
@files = readdir $d; | |
} | |
closedir $d; | |
} | |
if (_IS_VMS) { | |
# Deleting large numbers of files from VMS Files-11 | |
# filesystems is faster if done in reverse ASCIIbetical order. | |
# include '.' to '.;' from blead patch #31775 | |
@files = map { $_ eq '.' ? '.;' : $_ } reverse @files; | |
} | |
@files = grep { $_ ne $updir and $_ ne $curdir } @files; | |
if (@files) { | |
# remove the contained files before the directory itself | |
my $narg = {%$data}; | |
@{$narg}{qw(device inode cwd prefix depth)} = | |
( $cur_dev, $cur_inode, $updir, $canon, $data->{depth} + 1 ); | |
$count += _rmtree( $narg, \@files ); | |
} | |
# restore directory permissions of required now (in case the rmdir | |
# below fails), while we are still in the directory and may do so | |
# without a race via '.' | |
if ( $nperm != $perm and not chmod( $perm, $curdir ) ) { | |
_error( $data, "cannot reset chmod", $canon ); | |
} | |
# don't leave the client code in an unexpected directory | |
chdir( $data->{cwd} ) | |
or | |
_croak("cannot chdir to $data->{cwd} from $canon: $!, aborting."); | |
# ensure that a chdir upwards didn't take us somewhere other | |
# than we expected (see CVE-2002-0435) | |
( $cur_dev, $cur_inode ) = ( stat $curdir )[ 0, 1 ] | |
or _croak( | |
"cannot stat prior working directory $data->{cwd}: $!, aborting." | |
); | |
if (_NEED_STAT_CHECK) { | |
( $data->{device} eq $cur_dev and $data->{inode} eq $cur_inode ) | |
or _croak( "previous directory $data->{cwd} " | |
. "changed before entering $canon, " | |
. "expected dev=$ldev ino=$lino, " | |
. "actual dev=$cur_dev ino=$cur_inode, aborting." | |
); | |
} | |
if ( $data->{depth} or !$data->{keep_root} ) { | |
if ( $data->{safe} | |
&& ( _IS_VMS | |
? !&VMS::Filespec::candelete($root) | |
: !-w $root ) ) | |
{ | |
print "skipped $root\n" if $data->{verbose}; | |
next ROOT_DIR; | |
} | |
if ( _FORCE_WRITABLE and !chmod $perm | oct '700', $root ) { | |
_error( $data, "cannot make directory writeable", $canon ); | |
} | |
print "rmdir $root\n" if $data->{verbose}; | |
if ( rmdir $root ) { | |
push @{ ${ $data->{result} } }, $root if $data->{result}; | |
++$count; | |
} | |
else { | |
_error( $data, "cannot remove directory", $canon ); | |
if ( | |
_FORCE_WRITABLE | |
&& !chmod( $perm, | |
( _IS_VMS ? VMS::Filespec::fileify($root) : $root ) | |
) | |
) | |
{ | |
_error( | |
$data, | |
sprintf( "cannot restore permissions to 0%o", | |
$perm ), | |
$canon | |
); | |
} | |
} | |
} | |
} | |
else { | |
# not a directory | |
$root = VMS::Filespec::vmsify("./$root") | |
if _IS_VMS | |
&& !File::Spec->file_name_is_absolute($root) | |
&& ( $root !~ m/(?<!\^)[\]>]+/ ); # not already in VMS syntax | |
if ( | |
$data->{safe} | |
&& ( | |
_IS_VMS | |
? !&VMS::Filespec::candelete($root) | |
: !( -l $root || -w $root ) | |
) | |
) | |
{ | |
print "skipped $root\n" if $data->{verbose}; | |
next ROOT_DIR; | |
} | |
my $nperm = $perm & oct '7777' | oct '600'; | |
if ( _FORCE_WRITABLE | |
and $nperm != $perm | |
and not chmod $nperm, $root ) | |
{ | |
_error( $data, "cannot make file writeable", $canon ); | |
} | |
print "unlink $canon\n" if $data->{verbose}; | |
# delete all versions under VMS | |
for ( ; ; ) { | |
if ( unlink $root ) { | |
push @{ ${ $data->{result} } }, $root if $data->{result}; | |
} | |
else { | |
_error( $data, "cannot unlink file", $canon ); | |
_FORCE_WRITABLE and chmod( $perm, $root ) | |
or _error( $data, | |
sprintf( "cannot restore permissions to 0%o", $perm ), | |
$canon ); | |
last; | |
} | |
++$count; | |
last unless _IS_VMS && lstat $root; | |
} | |
} | |
} | |
return $count; | |
} | |
sub _slash_lc { | |
# fix up slashes and case on MSWin32 so that we can determine that | |
# c:\path\to\dir is underneath C:/Path/To | |
my $path = shift; | |
$path =~ tr{\\}{/}; | |
return lc($path); | |
} | |
1; | |
__END__ | |
=head1 NAME | |
File::Path - Create or remove directory trees | |
=head1 VERSION | |
2.18 - released November 4 2020. | |
=head1 SYNOPSIS | |
use File::Path qw(make_path remove_tree); | |
@created = make_path('foo/bar/baz', '/zug/zwang'); | |
@created = make_path('foo/bar/baz', '/zug/zwang', { | |
verbose => 1, | |
mode => 0711, | |
}); | |
make_path('foo/bar/baz', '/zug/zwang', { | |
chmod => 0777, | |
}); | |
$removed_count = remove_tree('foo/bar/baz', '/zug/zwang', { | |
verbose => 1, | |
error => \my $err_list, | |
safe => 1, | |
}); | |
# legacy (interface promoted before v2.00) | |
@created = mkpath('/foo/bar/baz'); | |
@created = mkpath('/foo/bar/baz', 1, 0711); | |
@created = mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711); | |
$removed_count = rmtree('foo/bar/baz', 1, 1); | |
$removed_count = rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1); | |
# legacy (interface promoted before v2.06) | |
@created = mkpath('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); | |
$removed_count = rmtree('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); | |
=head1 DESCRIPTION | |
This module provides a convenient way to create directories of | |
arbitrary depth and to delete an entire directory subtree from the | |
filesystem. | |
The following functions are provided: | |
=over | |
=item make_path( $dir1, $dir2, .... ) | |
=item make_path( $dir1, $dir2, ...., \%opts ) | |
The C<make_path> function creates the given directories if they don't | |
exist before, much like the Unix command C<mkdir -p>. | |
The function accepts a list of directories to be created. Its | |
behaviour may be tuned by an optional hashref appearing as the last | |
parameter on the call. | |
The function returns the list of directories actually created during | |
the call; in scalar context the number of directories created. | |
The following keys are recognised in the option hash: | |
=over | |
=item mode => $num | |
The numeric permissions mode to apply to each created directory | |
(defaults to C<0777>), to be modified by the current C<umask>. If the | |
directory already exists (and thus does not need to be created), | |
the permissions will not be modified. | |
C<mask> is recognised as an alias for this parameter. | |
=item chmod => $num | |
Takes a numeric mode to apply to each created directory (not | |
modified by the current C<umask>). If the directory already exists | |
(and thus does not need to be created), the permissions will | |
not be modified. | |
=item verbose => $bool | |
If present, will cause C<make_path> to print the name of each directory | |
as it is created. By default nothing is printed. | |
=item error => \$err | |
If present, it should be a reference to a scalar. | |
This scalar will be made to reference an array, which will | |
be used to store any errors that are encountered. See the L</"ERROR | |
HANDLING"> section for more information. | |
If this parameter is not used, certain error conditions may raise | |
a fatal error that will cause the program to halt, unless trapped | |
in an C<eval> block. | |
=item owner => $owner | |
=item user => $owner | |
=item uid => $owner | |
If present, will cause any created directory to be owned by C<$owner>. | |
If the value is numeric, it will be interpreted as a uid; otherwise a | |
username is assumed. An error will be issued if the username cannot be | |
mapped to a uid, the uid does not exist or the process lacks the | |
privileges to change ownership. | |
Ownership of directories that already exist will not be changed. | |
C<user> and C<uid> are aliases of C<owner>. | |
=item group => $group | |
If present, will cause any created directory to be owned by the group | |
C<$group>. If the value is numeric, it will be interpreted as a gid; | |
otherwise a group name is assumed. An error will be issued if the | |
group name cannot be mapped to a gid, the gid does not exist or the | |
process lacks the privileges to change group ownership. | |
Group ownership of directories that already exist will not be changed. | |
make_path '/var/tmp/webcache', {owner=>'nobody', group=>'nogroup'}; | |
=back | |
=item mkpath( $dir ) | |
=item mkpath( $dir, $verbose, $mode ) | |
=item mkpath( [$dir1, $dir2,...], $verbose, $mode ) | |
=item mkpath( $dir1, $dir2,..., \%opt ) | |
The C<mkpath()> function provide the legacy interface of | |
C<make_path()> with a different interpretation of the arguments | |
passed. The behaviour and return value of the function is otherwise | |
identical to C<make_path()>. | |
=item remove_tree( $dir1, $dir2, .... ) | |
=item remove_tree( $dir1, $dir2, ...., \%opts ) | |
The C<remove_tree> function deletes the given directories and any | |
files and subdirectories they might contain, much like the Unix | |
command C<rm -rf> or the Windows commands C<rmdir /s> and C<rd /s>. | |
The function accepts a list of directories to be removed. (In point of fact, | |
it will also accept filesystem entries which are not directories, such as | |
regular files and symlinks. But, as its name suggests, its intent is to | |
remove trees rather than individual files.) | |
C<remove_tree()>'s behaviour may be tuned by an optional hashref | |
appearing as the last parameter on the call. If an empty string is | |
passed to C<remove_tree>, an error will occur. | |
B<NOTE:> For security reasons, we strongly advise use of the | |
hashref-as-final-argument syntax -- specifically, with a setting of the C<safe> | |
element to a true value. | |
remove_tree( $dir1, $dir2, ...., | |
{ | |
safe => 1, | |
... # other key-value pairs | |
}, | |
); | |
The function returns the number of files successfully deleted. | |
The following keys are recognised in the option hash: | |
=over | |
=item verbose => $bool | |
If present, will cause C<remove_tree> to print the name of each file as | |
it is unlinked. By default nothing is printed. | |
=item safe => $bool | |
When set to a true value, will cause C<remove_tree> to skip the files | |
for which the process lacks the required privileges needed to delete | |
files, such as delete privileges on VMS. In other words, the code | |
will make no attempt to alter file permissions. Thus, if the process | |
is interrupted, no filesystem object will be left in a more | |
permissive mode. | |
=item keep_root => $bool | |
When set to a true value, will cause all files and subdirectories | |
to be removed, except the initially specified directories. This comes | |
in handy when cleaning out an application's scratch directory. | |
remove_tree( '/tmp', {keep_root => 1} ); | |
=item result => \$res | |
If present, it should be a reference to a scalar. | |
This scalar will be made to reference an array, which will | |
be used to store all files and directories unlinked | |
during the call. If nothing is unlinked, the array will be empty. | |
remove_tree( '/tmp', {result => \my $list} ); | |
print "unlinked $_\n" for @$list; | |
This is a useful alternative to the C<verbose> key. | |
=item error => \$err | |
If present, it should be a reference to a scalar. | |
This scalar will be made to reference an array, which will | |
be used to store any errors that are encountered. See the L</"ERROR | |
HANDLING"> section for more information. | |
Removing things is a much more dangerous proposition than | |
creating things. As such, there are certain conditions that | |
C<remove_tree> may encounter that are so dangerous that the only | |
sane action left is to kill the program. | |
Use C<error> to trap all that is reasonable (problems with | |
permissions and the like), and let it die if things get out | |
of hand. This is the safest course of action. | |
=back | |
=item rmtree( $dir ) | |
=item rmtree( $dir, $verbose, $safe ) | |
=item rmtree( [$dir1, $dir2,...], $verbose, $safe ) | |
=item rmtree( $dir1, $dir2,..., \%opt ) | |
The C<rmtree()> function provide the legacy interface of | |
C<remove_tree()> with a different interpretation of the arguments | |
passed. The behaviour and return value of the function is otherwise | |
identical to C<remove_tree()>. | |
B<NOTE:> For security reasons, we strongly advise use of the | |
hashref-as-final-argument syntax, specifically with a setting of the C<safe> | |
element to a true value. | |
rmtree( $dir1, $dir2, ...., | |
{ | |
safe => 1, | |
... # other key-value pairs | |
}, | |
); | |
=back | |
=head2 ERROR HANDLING | |
=over 4 | |
=item B<NOTE:> | |
The following error handling mechanism is consistent throughout all | |
code paths EXCEPT in cases where the ROOT node is nonexistent. In | |
version 2.11 the maintainers attempted to rectify this inconsistency | |
but too many downstream modules encountered problems. In such case, | |
if you require root node evaluation or error checking prior to calling | |
C<make_path> or C<remove_tree>, you should take additional precautions. | |
=back | |
If C<make_path> or C<remove_tree> encounters an error, a diagnostic | |
message will be printed to C<STDERR> via C<carp> (for non-fatal | |
errors) or via C<croak> (for fatal errors). | |
If this behaviour is not desirable, the C<error> attribute may be | |
used to hold a reference to a variable, which will be used to store | |
the diagnostics. The variable is made a reference to an array of hash | |
references. Each hash contain a single key/value pair where the key | |
is the name of the file, and the value is the error message (including | |
the contents of C<$!> when appropriate). If a general error is | |
encountered the diagnostic key will be empty. | |
An example usage looks like: | |
remove_tree( 'foo/bar', 'bar/rat', {error => \my $err} ); | |
if ($err && @$err) { | |
for my $diag (@$err) { | |
my ($file, $message) = %$diag; | |
if ($file eq '') { | |
print "general error: $message\n"; | |
} | |
else { | |
print "problem unlinking $file: $message\n"; | |
} | |
} | |
} | |
else { | |
print "No error encountered\n"; | |
} | |
Note that if no errors are encountered, C<$err> will reference an | |
empty array. This means that C<$err> will always end up TRUE; so you | |
need to test C<@$err> to determine if errors occurred. | |
=head2 NOTES | |
C<File::Path> blindly exports C<mkpath> and C<rmtree> into the | |
current namespace. These days, this is considered bad style, but | |
to change it now would break too much code. Nonetheless, you are | |
invited to specify what it is you are expecting to use: | |
use File::Path 'rmtree'; | |
The routines C<make_path> and C<remove_tree> are B<not> exported | |
by default. You must specify which ones you want to use. | |
use File::Path 'remove_tree'; | |
Note that a side-effect of the above is that C<mkpath> and C<rmtree> | |
are no longer exported at all. This is due to the way the C<Exporter> | |
module works. If you are migrating a codebase to use the new | |
interface, you will have to list everything explicitly. But that's | |
just good practice anyway. | |
use File::Path qw(remove_tree rmtree); | |
=head3 API CHANGES | |
The API was changed in the 2.0 branch. For a time, C<mkpath> and | |
C<rmtree> tried, unsuccessfully, to deal with the two different | |
calling mechanisms. This approach was considered a failure. | |
The new semantics are now only available with C<make_path> and | |
C<remove_tree>. The old semantics are only available through | |
C<mkpath> and C<rmtree>. Users are strongly encouraged to upgrade | |
to at least 2.08 in order to avoid surprises. | |
=head3 SECURITY CONSIDERATIONS | |
There were race conditions in the 1.x implementations of File::Path's | |
C<rmtree> function (although sometimes patched depending on the OS | |
distribution or platform). The 2.0 version contains code to avoid the | |
problem mentioned in CVE-2002-0435. | |
See the following pages for more information: | |
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286905 | |
http://www.nntp.perl.org/group/perl.perl5.porters/2005/01/msg97623.html | |
http://www.debian.org/security/2005/dsa-696 | |
Additionally, unless the C<safe> parameter is set (or the | |
third parameter in the traditional interface is TRUE), should a | |
C<remove_tree> be interrupted, files that were originally in read-only | |
mode may now have their permissions set to a read-write (or "delete | |
OK") mode. | |
The following CVE reports were previously filed against File-Path and are | |
believed to have been addressed: | |
=over 4 | |
=item * L<http://cve.circl.lu/cve/CVE-2004-0452> | |
=item * L<http://cve.circl.lu/cve/CVE-2005-0448> | |
=back | |
In February 2017 the cPanel Security Team reported an additional vulnerability | |
in File-Path. The C<chmod()> logic to make directories traversable can be | |
abused to set the mode on an attacker-chosen file to an attacker-chosen value. | |
This is due to the time-of-check-to-time-of-use (TOCTTOU) race condition | |
(L<https://en.wikipedia.org/wiki/Time_of_check_to_time_of_use>) between the | |
C<stat()> that decides the inode is a directory and the C<chmod()> that tries | |
to make it user-rwx. CPAN versions 2.13 and later incorporate a patch | |
provided by John Lightsey to address this problem. This vulnerability has | |
been reported as CVE-2017-6512. | |
=head1 DIAGNOSTICS | |
FATAL errors will cause the program to halt (C<croak>), since the | |
problem is so severe that it would be dangerous to continue. (This | |
can always be trapped with C<eval>, but it's not a good idea. Under | |
the circumstances, dying is the best thing to do). | |
SEVERE errors may be trapped using the modern interface. If the | |
they are not trapped, or if the old interface is used, such an error | |
will cause the program will halt. | |
All other errors may be trapped using the modern interface, otherwise | |
they will be C<carp>ed about. Program execution will not be halted. | |
=over 4 | |
=item mkdir [path]: [errmsg] (SEVERE) | |
C<make_path> was unable to create the path. Probably some sort of | |
permissions error at the point of departure or insufficient resources | |
(such as free inodes on Unix). | |
=item No root path(s) specified | |
C<make_path> was not given any paths to create. This message is only | |
emitted if the routine is called with the traditional interface. | |
The modern interface will remain silent if given nothing to do. | |
=item No such file or directory | |
On Windows, if C<make_path> gives you this warning, it may mean that | |
you have exceeded your filesystem's maximum path length. | |
=item cannot fetch initial working directory: [errmsg] | |
C<remove_tree> attempted to determine the initial directory by calling | |
C<Cwd::getcwd>, but the call failed for some reason. No attempt | |
will be made to delete anything. | |
=item cannot stat initial working directory: [errmsg] | |
C<remove_tree> attempted to stat the initial directory (after having | |
successfully obtained its name via C<getcwd>), however, the call | |
failed for some reason. No attempt will be made to delete anything. | |
=item cannot chdir to [dir]: [errmsg] | |
C<remove_tree> attempted to set the working directory in order to | |
begin deleting the objects therein, but was unsuccessful. This is | |
usually a permissions issue. The routine will continue to delete | |
other things, but this directory will be left intact. | |
=item directory [dir] changed before chdir, expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) | |
C<remove_tree> recorded the device and inode of a directory, and then | |
moved into it. It then performed a C<stat> on the current directory | |
and detected that the device and inode were no longer the same. As | |
this is at the heart of the race condition problem, the program | |
will die at this point. | |
=item cannot make directory [dir] read+writeable: [errmsg] | |
C<remove_tree> attempted to change the permissions on the current directory | |
to ensure that subsequent unlinkings would not run into problems, | |
but was unable to do so. The permissions remain as they were, and | |
the program will carry on, doing the best it can. | |
=item cannot read [dir]: [errmsg] | |
C<remove_tree> tried to read the contents of the directory in order | |
to acquire the names of the directory entries to be unlinked, but | |
was unsuccessful. This is usually a permissions issue. The | |
program will continue, but the files in this directory will remain | |
after the call. | |
=item cannot reset chmod [dir]: [errmsg] | |
C<remove_tree>, after having deleted everything in a directory, attempted | |
to restore its permissions to the original state but failed. The | |
directory may wind up being left behind. | |
=item cannot remove [dir] when cwd is [dir] | |
The current working directory of the program is F</some/path/to/here> | |
and you are attempting to remove an ancestor, such as F</some/path>. | |
The directory tree is left untouched. | |
The solution is to C<chdir> out of the child directory to a place | |
outside the directory tree to be removed. | |
=item cannot chdir to [parent-dir] from [child-dir]: [errmsg], aborting. (FATAL) | |
C<remove_tree>, after having deleted everything and restored the permissions | |
of a directory, was unable to chdir back to the parent. The program | |
halts to avoid a race condition from occurring. | |
=item cannot stat prior working directory [dir]: [errmsg], aborting. (FATAL) | |
C<remove_tree> was unable to stat the parent directory after having returned | |
from the child. Since there is no way of knowing if we returned to | |
where we think we should be (by comparing device and inode) the only | |
way out is to C<croak>. | |
=item previous directory [parent-dir] changed before entering [child-dir], expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) | |
When C<remove_tree> returned from deleting files in a child directory, a | |
check revealed that the parent directory it returned to wasn't the one | |
it started out from. This is considered a sign of malicious activity. | |
=item cannot make directory [dir] writeable: [errmsg] | |
Just before removing a directory (after having successfully removed | |
everything it contained), C<remove_tree> attempted to set the permissions | |
on the directory to ensure it could be removed and failed. Program | |
execution continues, but the directory may possibly not be deleted. | |
=item cannot remove directory [dir]: [errmsg] | |
C<remove_tree> attempted to remove a directory, but failed. This may be because | |
some objects that were unable to be removed remain in the directory, or | |
it could be a permissions issue. The directory will be left behind. | |
=item cannot restore permissions of [dir] to [0nnn]: [errmsg] | |
After having failed to remove a directory, C<remove_tree> was unable to | |
restore its permissions from a permissive state back to a possibly | |
more restrictive setting. (Permissions given in octal). | |
=item cannot make file [file] writeable: [errmsg] | |
C<remove_tree> attempted to force the permissions of a file to ensure it | |
could be deleted, but failed to do so. It will, however, still attempt | |
to unlink the file. | |
=item cannot unlink file [file]: [errmsg] | |
C<remove_tree> failed to remove a file. Probably a permissions issue. | |
=item cannot restore permissions of [file] to [0nnn]: [errmsg] | |
After having failed to remove a file, C<remove_tree> was also unable | |
to restore the permissions on the file to a possibly less permissive | |
setting. (Permissions given in octal). | |
=item unable to map [owner] to a uid, ownership not changed"); | |
C<make_path> was instructed to give the ownership of created | |
directories to the symbolic name [owner], but C<getpwnam> did | |
not return the corresponding numeric uid. The directory will | |
be created, but ownership will not be changed. | |
=item unable to map [group] to a gid, group ownership not changed | |
C<make_path> was instructed to give the group ownership of created | |
directories to the symbolic name [group], but C<getgrnam> did | |
not return the corresponding numeric gid. The directory will | |
be created, but group ownership will not be changed. | |
=back | |
=head1 SEE ALSO | |
=over 4 | |
=item * | |
L<File::Remove> | |
Allows files and directories to be moved to the Trashcan/Recycle | |
Bin (where they may later be restored if necessary) if the operating | |
system supports such functionality. This feature may one day be | |
made available directly in C<File::Path>. | |
=item * | |
L<File::Find::Rule> | |
When removing directory trees, if you want to examine each file to | |
decide whether to delete it (and possibly leaving large swathes | |
alone), F<File::Find::Rule> offers a convenient and flexible approach | |
to examining directory trees. | |
=back | |
=head1 BUGS AND LIMITATIONS | |
The following describes F<File::Path> limitations and how to report bugs. | |
=head2 MULTITHREADED APPLICATIONS | |
F<File::Path> C<rmtree> and C<remove_tree> will not work with | |
multithreaded applications due to its use of C<chdir>. At this time, | |
no warning or error is generated in this situation. You will | |
certainly encounter unexpected results. | |
The implementation that surfaces this limitation will not be changed. See the | |
F<File::Path::Tiny> module for functionality similar to F<File::Path> but which does | |
not C<chdir>. | |
=head2 NFS Mount Points | |
F<File::Path> is not responsible for triggering the automounts, mirror mounts, | |
and the contents of network mounted filesystems. If your NFS implementation | |
requires an action to be performed on the filesystem in order for | |
F<File::Path> to perform operations, it is strongly suggested you assure | |
filesystem availability by reading the root of the mounted filesystem. | |
=head2 REPORTING BUGS | |
Please report all bugs on the RT queue, either via the web interface: | |
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Path> | |
or by email: | |
bug-File-Path@rt.cpan.org | |
In either case, please B<attach> patches to the bug report rather than | |
including them inline in the web post or the body of the email. | |
You can also send pull requests to the Github repository: | |
L<https://github.com/rpcme/File-Path> | |
=head1 ACKNOWLEDGEMENTS | |
Paul Szabo identified the race condition originally, and Brendan | |
O'Dea wrote an implementation for Debian that addressed the problem. | |
That code was used as a basis for the current code. Their efforts | |
are greatly appreciated. | |
Gisle Aas made a number of improvements to the documentation for | |
2.07 and his advice and assistance is also greatly appreciated. | |
=head1 AUTHORS | |
Prior authors and maintainers: Tim Bunce, Charles Bailey, and | |
David Landgren <F<david@landgren.net>>. | |
Current maintainers are Richard Elberger <F<riche@cpan.org>> and | |
James (Jim) Keenan <F<jkeenan@cpan.org>>. | |
=head1 CONTRIBUTORS | |
Contributors to File::Path, in alphabetical order by first name. | |
=over 1 | |
=item <F<bulkdd@cpan.org>> | |
=item Charlie Gonzalez <F<itcharlie@cpan.org>> | |
=item Craig A. Berry <F<craigberry@mac.com>> | |
=item James E Keenan <F<jkeenan@cpan.org>> | |
=item John Lightsey <F<john@perlsec.org>> | |
=item Nigel Horne <F<njh@bandsman.co.uk>> | |
=item Richard Elberger <F<riche@cpan.org>> | |
=item Ryan Yee <F<ryee@cpan.org>> | |
=item Skye Shaw <F<shaw@cpan.org>> | |
=item Tom Lutz <F<tommylutz@gmail.com>> | |
=item Will Sheppard <F<willsheppard@github>> | |
=back | |
=head1 COPYRIGHT | |
This module is copyright (C) Charles Bailey, Tim Bunce, David Landgren, | |
James Keenan and Richard Elberger 1995-2020. All rights reserved. | |
=head1 LICENSE | |
This library is free software; you can redistribute it and/or modify | |
it under the same terms as Perl itself. | |
=cut | |
FILE_PATH | |
$fatpacked{"OLE/Storage_Lite.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'OLE_STORAGE_LITE'; | |
# OLE::Storage_Lite | |
# by Kawai, Takanori (Hippo2000) 2000.11.4, 8, 14 | |
# This Program is Still ALPHA version. | |
#////////////////////////////////////////////////////////////////////////////// | |
# OLE::Storage_Lite::PPS Object | |
#////////////////////////////////////////////////////////////////////////////// | |
#============================================================================== | |
# OLE::Storage_Lite::PPS | |
#============================================================================== | |
package OLE::Storage_Lite::PPS; | |
require Exporter; | |
use strict; | |
use vars qw($VERSION @ISA); | |
@ISA = qw(Exporter); | |
$VERSION = '0.19'; | |
#------------------------------------------------------------------------------ | |
# new (OLE::Storage_Lite::PPS) | |
#------------------------------------------------------------------------------ | |
sub new ($$$$$$$$$$;$$) { | |
#1. Constructor for General Usage | |
my($sClass, $iNo, $sNm, $iType, $iPrev, $iNext, $iDir, | |
$raTime1st, $raTime2nd, $iStart, $iSize, $sData, $raChild) = @_; | |
if($iType == OLE::Storage_Lite::PpsType_File()) { #FILE | |
return OLE::Storage_Lite::PPS::File->_new | |
($iNo, $sNm, $iType, $iPrev, $iNext, $iDir, $raTime1st, $raTime2nd, | |
$iStart, $iSize, $sData, $raChild); | |
} | |
elsif($iType == OLE::Storage_Lite::PpsType_Dir()) { #DIRECTRY | |
return OLE::Storage_Lite::PPS::Dir->_new | |
($iNo, $sNm, $iType, $iPrev, $iNext, $iDir, $raTime1st, $raTime2nd, | |
$iStart, $iSize, $sData, $raChild); | |
} | |
elsif($iType == OLE::Storage_Lite::PpsType_Root()) { #ROOT | |
return OLE::Storage_Lite::PPS::Root->_new | |
($iNo, $sNm, $iType, $iPrev, $iNext, $iDir, $raTime1st, $raTime2nd, | |
$iStart, $iSize, $sData, $raChild); | |
} | |
else { | |
die "Error PPS:$iType $sNm\n"; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _new (OLE::Storage_Lite::PPS) | |
# for OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub _new ($$$$$$$$$$$;$$) { | |
my($sClass, $iNo, $sNm, $iType, $iPrev, $iNext, $iDir, | |
$raTime1st, $raTime2nd, $iStart, $iSize, $sData, $raChild) = @_; | |
#1. Constructor for OLE::Storage_Lite | |
my $oThis = { | |
No => $iNo, | |
Name => $sNm, | |
Type => $iType, | |
PrevPps => $iPrev, | |
NextPps => $iNext, | |
DirPps => $iDir, | |
Time1st => $raTime1st, | |
Time2nd => $raTime2nd, | |
StartBlock => $iStart, | |
Size => $iSize, | |
Data => $sData, | |
Child => $raChild, | |
}; | |
bless $oThis, $sClass; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# _DataLen (OLE::Storage_Lite::PPS) | |
# Check for update | |
#------------------------------------------------------------------------------ | |
sub _DataLen($) { | |
my($oSelf) =@_; | |
return 0 unless(defined($oSelf->{Data})); | |
return ($oSelf->{_PPS_FILE})? | |
($oSelf->{_PPS_FILE}->stat())[7] : length($oSelf->{Data}); | |
} | |
#------------------------------------------------------------------------------ | |
# _makeSmallData (OLE::Storage_Lite::PPS) | |
#------------------------------------------------------------------------------ | |
sub _makeSmallData($$$) { | |
my($oThis, $aList, $rhInfo) = @_; | |
my ($sRes); | |
my $FILE = $rhInfo->{_FILEH_}; | |
my $iSmBlk = 0; | |
foreach my $oPps (@$aList) { | |
#1. Make SBD, small data string | |
if($oPps->{Type}==OLE::Storage_Lite::PpsType_File()) { | |
next if($oPps->{Size}<=0); | |
if($oPps->{Size} < $rhInfo->{_SMALL_SIZE}) { | |
my $iSmbCnt = int($oPps->{Size} / $rhInfo->{_SMALL_BLOCK_SIZE}) | |
+ (($oPps->{Size} % $rhInfo->{_SMALL_BLOCK_SIZE})? 1: 0); | |
#1.1 Add to SBD | |
for (my $i = 0; $i<($iSmbCnt-1); $i++) { | |
print {$FILE} (pack("V", $i+$iSmBlk+1)); | |
} | |
print {$FILE} (pack("V", -2)); | |
#1.2 Add to Data String(this will be written for RootEntry) | |
#Check for update | |
if($oPps->{_PPS_FILE}) { | |
my $sBuff; | |
$oPps->{_PPS_FILE}->seek(0, 0); #To The Top | |
while($oPps->{_PPS_FILE}->read($sBuff, 4096)) { | |
$sRes .= $sBuff; | |
} | |
} | |
else { | |
$sRes .= $oPps->{Data}; | |
} | |
$sRes .= ("\x00" x | |
($rhInfo->{_SMALL_BLOCK_SIZE} - ($oPps->{Size}% $rhInfo->{_SMALL_BLOCK_SIZE}))) | |
if($oPps->{Size}% $rhInfo->{_SMALL_BLOCK_SIZE}); | |
#1.3 Set for PPS | |
$oPps->{StartBlock} = $iSmBlk; | |
$iSmBlk += $iSmbCnt; | |
} | |
} | |
} | |
my $iSbCnt = int($rhInfo->{_BIG_BLOCK_SIZE}/ OLE::Storage_Lite::LongIntSize()); | |
print {$FILE} (pack("V", -1) x ($iSbCnt - ($iSmBlk % $iSbCnt))) | |
if($iSmBlk % $iSbCnt); | |
#2. Write SBD with adjusting length for block | |
return $sRes; | |
} | |
#------------------------------------------------------------------------------ | |
# _savePpsWk (OLE::Storage_Lite::PPS) | |
#------------------------------------------------------------------------------ | |
sub _savePpsWk($$) | |
{ | |
my($oThis, $rhInfo) = @_; | |
#1. Write PPS | |
my $FILE = $rhInfo->{_FILEH_}; | |
print {$FILE} ( | |
$oThis->{Name} | |
. ("\x00" x (64 - length($oThis->{Name}))) #64 | |
, pack("v", length($oThis->{Name}) + 2) #66 | |
, pack("c", $oThis->{Type}) #67 | |
, pack("c", 0x00) #UK #68 | |
, pack("V", $oThis->{PrevPps}) #Prev #72 | |
, pack("V", $oThis->{NextPps}) #Next #76 | |
, pack("V", $oThis->{DirPps}) #Dir #80 | |
, "\x00\x09\x02\x00" #84 | |
, "\x00\x00\x00\x00" #88 | |
, "\xc0\x00\x00\x00" #92 | |
, "\x00\x00\x00\x46" #96 | |
, "\x00\x00\x00\x00" #100 | |
, OLE::Storage_Lite::LocalDate2OLE($oThis->{Time1st}) #108 | |
, OLE::Storage_Lite::LocalDate2OLE($oThis->{Time2nd}) #116 | |
, pack("V", defined($oThis->{StartBlock})? | |
$oThis->{StartBlock}:0) #116 | |
, pack("V", defined($oThis->{Size})? | |
$oThis->{Size} : 0) #124 | |
, pack("V", 0), #128 | |
); | |
} | |
#////////////////////////////////////////////////////////////////////////////// | |
# OLE::Storage_Lite::PPS::Root Object | |
#////////////////////////////////////////////////////////////////////////////// | |
#============================================================================== | |
# OLE::Storage_Lite::PPS::Root | |
#============================================================================== | |
package OLE::Storage_Lite::PPS::Root; | |
require Exporter; | |
use strict; | |
use IO::File; | |
use IO::Handle; | |
use Fcntl; | |
use vars qw($VERSION @ISA); | |
@ISA = qw(OLE::Storage_Lite::PPS Exporter); | |
$VERSION = '0.19'; | |
sub _savePpsSetPnt($$$); | |
sub _savePpsSetPnt2($$$); | |
#------------------------------------------------------------------------------ | |
# new (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub new ($;$$$) { | |
my($sClass, $raTime1st, $raTime2nd, $raChild) = @_; | |
OLE::Storage_Lite::PPS::_new( | |
$sClass, | |
undef, | |
OLE::Storage_Lite::Asc2Ucs('Root Entry'), | |
5, | |
undef, | |
undef, | |
undef, | |
$raTime1st, | |
$raTime2nd, | |
undef, | |
undef, | |
undef, | |
$raChild); | |
} | |
#------------------------------------------------------------------------------ | |
# save (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub save($$;$$) { | |
my($oThis, $sFile, $bNoAs, $rhInfo) = @_; | |
#0.Initial Setting for saving | |
$rhInfo = {} unless($rhInfo); | |
$rhInfo->{_BIG_BLOCK_SIZE} = 2** | |
(($rhInfo->{_BIG_BLOCK_SIZE})? | |
_adjust2($rhInfo->{_BIG_BLOCK_SIZE}) : 9); | |
$rhInfo->{_SMALL_BLOCK_SIZE}= 2 ** | |
(($rhInfo->{_SMALL_BLOCK_SIZE})? | |
_adjust2($rhInfo->{_SMALL_BLOCK_SIZE}): 6); | |
$rhInfo->{_SMALL_SIZE} = 0x1000; | |
$rhInfo->{_PPS_SIZE} = 0x80; | |
my $closeFile = 1; | |
#1.Open File | |
#1.1 $sFile is Ref of scalar | |
if(ref($sFile) eq 'SCALAR') { | |
require IO::Scalar; | |
my $oIo = new IO::Scalar $sFile, O_WRONLY; | |
$rhInfo->{_FILEH_} = $oIo; | |
} | |
#1.1.1 $sFile is a IO::Scalar object | |
# Now handled as a filehandle ref below. | |
#1.2 $sFile is a IO::Handle object | |
elsif(UNIVERSAL::isa($sFile, 'IO::Handle')) { | |
# Not all filehandles support binmode() so try it in an eval. | |
eval{ binmode $sFile }; | |
$rhInfo->{_FILEH_} = $sFile; | |
} | |
#1.3 $sFile is a simple filename string | |
elsif(!ref($sFile)) { | |
if($sFile ne '-') { | |
my $oIo = new IO::File; | |
$oIo->open(">$sFile") || return undef; | |
binmode($oIo); | |
$rhInfo->{_FILEH_} = $oIo; | |
} | |
else { | |
my $oIo = new IO::Handle; | |
$oIo->fdopen(fileno(STDOUT),"w") || return undef; | |
binmode($oIo); | |
$rhInfo->{_FILEH_} = $oIo; | |
} | |
} | |
#1.4 Assume that if $sFile is a ref then it is a valid filehandle | |
else { | |
# Not all filehandles support binmode() so try it in an eval. | |
eval{ binmode $sFile }; | |
$rhInfo->{_FILEH_} = $sFile; | |
# Caller controls filehandle closing | |
$closeFile = 0; | |
} | |
my $iBlk = 0; | |
#1. Make an array of PPS (for Save) | |
my @aList=(); | |
if($bNoAs) { | |
_savePpsSetPnt2([$oThis], \@aList, $rhInfo); | |
} | |
else { | |
_savePpsSetPnt([$oThis], \@aList, $rhInfo); | |
} | |
my ($iSBDcnt, $iBBcnt, $iPPScnt) = $oThis->_calcSize(\@aList, $rhInfo); | |
#2.Save Header | |
$oThis->_saveHeader($rhInfo, $iSBDcnt, $iBBcnt, $iPPScnt); | |
#3.Make Small Data string (write SBD) | |
my $sSmWk = $oThis->_makeSmallData(\@aList, $rhInfo); | |
$oThis->{Data} = $sSmWk; #Small Datas become RootEntry Data | |
#4. Write BB | |
my $iBBlk = $iSBDcnt; | |
$oThis->_saveBigData(\$iBBlk, \@aList, $rhInfo); | |
#5. Write PPS | |
$oThis->_savePps(\@aList, $rhInfo); | |
#6. Write BD and BDList and Adding Header informations | |
$oThis->_saveBbd($iSBDcnt, $iBBcnt, $iPPScnt, $rhInfo); | |
#7.Close File | |
return $rhInfo->{_FILEH_}->close if $closeFile; | |
} | |
#------------------------------------------------------------------------------ | |
# _calcSize (OLE::Storage_Lite::PPS) | |
#------------------------------------------------------------------------------ | |
sub _calcSize($$) | |
{ | |
my($oThis, $raList, $rhInfo) = @_; | |
#0. Calculate Basic Setting | |
my ($iSBDcnt, $iBBcnt, $iPPScnt) = (0,0,0); | |
my $iSmallLen = 0; | |
my $iSBcnt = 0; | |
foreach my $oPps (@$raList) { | |
if($oPps->{Type}==OLE::Storage_Lite::PpsType_File()) { | |
$oPps->{Size} = $oPps->_DataLen(); #Mod | |
if($oPps->{Size} < $rhInfo->{_SMALL_SIZE}) { | |
$iSBcnt += int($oPps->{Size} / $rhInfo->{_SMALL_BLOCK_SIZE}) | |
+ (($oPps->{Size} % $rhInfo->{_SMALL_BLOCK_SIZE})? 1: 0); | |
} | |
else { | |
$iBBcnt += | |
(int($oPps->{Size}/ $rhInfo->{_BIG_BLOCK_SIZE}) + | |
(($oPps->{Size}% $rhInfo->{_BIG_BLOCK_SIZE})? 1: 0)); | |
} | |
} | |
} | |
$iSmallLen = $iSBcnt * $rhInfo->{_SMALL_BLOCK_SIZE}; | |
my $iSlCnt = int($rhInfo->{_BIG_BLOCK_SIZE}/ OLE::Storage_Lite::LongIntSize()); | |
$iSBDcnt = int($iSBcnt / $iSlCnt)+ (($iSBcnt % $iSlCnt)? 1:0); | |
$iBBcnt += (int($iSmallLen/ $rhInfo->{_BIG_BLOCK_SIZE}) + | |
(( $iSmallLen% $rhInfo->{_BIG_BLOCK_SIZE})? 1: 0)); | |
my $iCnt = scalar(@$raList); | |
my $iBdCnt = $rhInfo->{_BIG_BLOCK_SIZE}/OLE::Storage_Lite::PpsSize(); | |
$iPPScnt = (int($iCnt/$iBdCnt) + (($iCnt % $iBdCnt)? 1: 0)); | |
return ($iSBDcnt, $iBBcnt, $iPPScnt); | |
} | |
#------------------------------------------------------------------------------ | |
# _adjust2 (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub _adjust2($) { | |
my($i2) = @_; | |
my $iWk; | |
$iWk = log($i2)/log(2); | |
return ($iWk > int($iWk))? int($iWk)+1:$iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _saveHeader (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub _saveHeader($$$$$) { | |
my($oThis, $rhInfo, $iSBDcnt, $iBBcnt, $iPPScnt) = @_; | |
my $FILE = $rhInfo->{_FILEH_}; | |
#0. Calculate Basic Setting | |
my $iBlCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize(); | |
my $i1stBdL = int(($rhInfo->{_BIG_BLOCK_SIZE} - 0x4C) / OLE::Storage_Lite::LongIntSize()); | |
my $i1stBdMax = $i1stBdL * $iBlCnt - $i1stBdL; | |
my $iBdExL = 0; | |
my $iAll = $iBBcnt + $iPPScnt + $iSBDcnt; | |
my $iAllW = $iAll; | |
my $iBdCntW = int($iAllW / $iBlCnt) + (($iAllW % $iBlCnt)? 1: 0); | |
my $iBdCnt = int(($iAll + $iBdCntW) / $iBlCnt) + ((($iAllW+$iBdCntW) % $iBlCnt)? 1: 0); | |
my $i; | |
if ($iBdCnt > $i1stBdL) { | |
#0.1 Calculate BD count | |
$iBlCnt--; #the BlCnt is reduced in the count of the last sect is used for a pointer the next Bl | |
my $iBBleftover = $iAll - $i1stBdMax; | |
if ($iAll >$i1stBdMax) { | |
while(1) { | |
$iBdCnt = int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0); | |
$iBdExL = int(($iBdCnt) / $iBlCnt) + ((($iBdCnt) % $iBlCnt)? 1: 0); | |
$iBBleftover = $iBBleftover + $iBdExL; | |
last if($iBdCnt == (int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0))); | |
} | |
} | |
$iBdCnt += $i1stBdL; | |
#print "iBdCnt = $iBdCnt \n"; | |
} | |
#1.Save Header | |
print {$FILE} ( | |
"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" | |
, "\x00\x00\x00\x00" x 4 | |
, pack("v", 0x3b) | |
, pack("v", 0x03) | |
, pack("v", -2) | |
, pack("v", 9) | |
, pack("v", 6) | |
, pack("v", 0) | |
, "\x00\x00\x00\x00" x 2 | |
, pack("V", $iBdCnt), | |
, pack("V", $iBBcnt+$iSBDcnt), #ROOT START | |
, pack("V", 0) | |
, pack("V", 0x1000) | |
, pack("V", $iSBDcnt ? 0 : -2) #Small Block Depot | |
, pack("V", $iSBDcnt) | |
); | |
#2. Extra BDList Start, Count | |
if($iAll <= $i1stBdMax) { | |
print {$FILE} ( | |
pack("V", -2), #Extra BDList Start | |
pack("V", 0), #Extra BDList Count | |
); | |
} | |
else { | |
print {$FILE} ( | |
pack("V", $iAll+$iBdCnt), | |
pack("V", $iBdExL), | |
); | |
} | |
#3. BDList | |
for($i=0; $i<$i1stBdL and $i < $iBdCnt; $i++) { | |
print {$FILE} (pack("V", $iAll+$i)); | |
} | |
print {$FILE} ((pack("V", -1)) x($i1stBdL-$i)) if($i<$i1stBdL); | |
} | |
#------------------------------------------------------------------------------ | |
# _saveBigData (OLE::Storage_Lite::PPS) | |
#------------------------------------------------------------------------------ | |
sub _saveBigData($$$$) { | |
my($oThis, $iStBlk, $raList, $rhInfo) = @_; | |
my $iRes = 0; | |
my $FILE = $rhInfo->{_FILEH_}; | |
#1.Write Big (ge 0x1000) Data into Block | |
foreach my $oPps (@$raList) { | |
if($oPps->{Type}!=OLE::Storage_Lite::PpsType_Dir()) { | |
#print "PPS: $oPps DEF:", defined($oPps->{Data}), "\n"; | |
$oPps->{Size} = $oPps->_DataLen(); #Mod | |
if(($oPps->{Size} >= $rhInfo->{_SMALL_SIZE}) || | |
(($oPps->{Type} == OLE::Storage_Lite::PpsType_Root()) && defined($oPps->{Data}))) { | |
#1.1 Write Data | |
#Check for update | |
if($oPps->{_PPS_FILE}) { | |
my $sBuff; | |
my $iLen = 0; | |
$oPps->{_PPS_FILE}->seek(0, 0); #To The Top | |
while($oPps->{_PPS_FILE}->read($sBuff, 4096)) { | |
$iLen += length($sBuff); | |
print {$FILE} ($sBuff); #Check for update | |
} | |
} | |
else { | |
print {$FILE} ($oPps->{Data}); | |
} | |
print {$FILE} ( | |
"\x00" x | |
($rhInfo->{_BIG_BLOCK_SIZE} - | |
($oPps->{Size} % $rhInfo->{_BIG_BLOCK_SIZE})) | |
) if ($oPps->{Size} % $rhInfo->{_BIG_BLOCK_SIZE}); | |
#1.2 Set For PPS | |
$oPps->{StartBlock} = $$iStBlk; | |
$$iStBlk += | |
(int($oPps->{Size}/ $rhInfo->{_BIG_BLOCK_SIZE}) + | |
(($oPps->{Size}% $rhInfo->{_BIG_BLOCK_SIZE})? 1: 0)); | |
} | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _savePps (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub _savePps($$$) | |
{ | |
my($oThis, $raList, $rhInfo) = @_; | |
#0. Initial | |
my $FILE = $rhInfo->{_FILEH_}; | |
#2. Save PPS | |
foreach my $oItem (@$raList) { | |
$oItem->_savePpsWk($rhInfo); | |
} | |
#3. Adjust for Block | |
my $iCnt = scalar(@$raList); | |
my $iBCnt = $rhInfo->{_BIG_BLOCK_SIZE} / $rhInfo->{_PPS_SIZE}; | |
print {$FILE} ("\x00" x (($iBCnt - ($iCnt % $iBCnt)) * $rhInfo->{_PPS_SIZE})) | |
if($iCnt % $iBCnt); | |
return int($iCnt / $iBCnt) + (($iCnt % $iBCnt)? 1: 0); | |
} | |
#------------------------------------------------------------------------------ | |
# _savePpsSetPnt2 (OLE::Storage_Lite::PPS::Root) | |
# For Test | |
#------------------------------------------------------------------------------ | |
sub _savePpsSetPnt2($$$) | |
{ | |
my($aThis, $raList, $rhInfo) = @_; | |
#1. make Array as Children-Relations | |
#1.1 if No Children | |
if($#$aThis < 0) { | |
return 0xFFFFFFFF; | |
} | |
elsif($#$aThis == 0) { | |
#1.2 Just Only one | |
push @$raList, $aThis->[0]; | |
$aThis->[0]->{No} = $#$raList; | |
$aThis->[0]->{PrevPps} = 0xFFFFFFFF; | |
$aThis->[0]->{NextPps} = 0xFFFFFFFF; | |
$aThis->[0]->{DirPps} = _savePpsSetPnt2($aThis->[0]->{Child}, $raList, $rhInfo); | |
return $aThis->[0]->{No}; | |
} | |
else { | |
#1.3 Array | |
my $iCnt = $#$aThis + 1; | |
#1.3.1 Define Center | |
my $iPos = 0; #int($iCnt/ 2); #$iCnt | |
my @aWk = @$aThis; | |
my @aPrev = ($#$aThis > 1)? splice(@aWk, 1, 1) : (); #$iPos); | |
my @aNext = splice(@aWk, 1); #, $iCnt - $iPos -1); | |
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt2( | |
\@aPrev, $raList, $rhInfo); | |
push @$raList, $aThis->[$iPos]; | |
$aThis->[$iPos]->{No} = $#$raList; | |
#1.3.2 Devide a array into Previous,Next | |
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt2( | |
\@aNext, $raList, $rhInfo); | |
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt2($aThis->[$iPos]->{Child}, $raList, $rhInfo); | |
return $aThis->[$iPos]->{No}; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _savePpsSetPnt2 (OLE::Storage_Lite::PPS::Root) | |
# For Test | |
#------------------------------------------------------------------------------ | |
sub _savePpsSetPnt2s($$$) | |
{ | |
my($aThis, $raList, $rhInfo) = @_; | |
#1. make Array as Children-Relations | |
#1.1 if No Children | |
if($#$aThis < 0) { | |
return 0xFFFFFFFF; | |
} | |
elsif($#$aThis == 0) { | |
#1.2 Just Only one | |
push @$raList, $aThis->[0]; | |
$aThis->[0]->{No} = $#$raList; | |
$aThis->[0]->{PrevPps} = 0xFFFFFFFF; | |
$aThis->[0]->{NextPps} = 0xFFFFFFFF; | |
$aThis->[0]->{DirPps} = _savePpsSetPnt2($aThis->[0]->{Child}, $raList, $rhInfo); | |
return $aThis->[0]->{No}; | |
} | |
else { | |
#1.3 Array | |
my $iCnt = $#$aThis + 1; | |
#1.3.1 Define Center | |
my $iPos = 0; #int($iCnt/ 2); #$iCnt | |
push @$raList, $aThis->[$iPos]; | |
$aThis->[$iPos]->{No} = $#$raList; | |
my @aWk = @$aThis; | |
#1.3.2 Devide a array into Previous,Next | |
my @aPrev = splice(@aWk, 0, $iPos); | |
my @aNext = splice(@aWk, 1, $iCnt - $iPos -1); | |
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt2( | |
\@aPrev, $raList, $rhInfo); | |
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt2( | |
\@aNext, $raList, $rhInfo); | |
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt2($aThis->[$iPos]->{Child}, $raList, $rhInfo); | |
return $aThis->[$iPos]->{No}; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _savePpsSetPnt (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub _savePpsSetPnt($$$) | |
{ | |
my($aThis, $raList, $rhInfo) = @_; | |
#1. make Array as Children-Relations | |
#1.1 if No Children | |
if($#$aThis < 0) { | |
return 0xFFFFFFFF; | |
} | |
elsif($#$aThis == 0) { | |
#1.2 Just Only one | |
push @$raList, $aThis->[0]; | |
$aThis->[0]->{No} = $#$raList; | |
$aThis->[0]->{PrevPps} = 0xFFFFFFFF; | |
$aThis->[0]->{NextPps} = 0xFFFFFFFF; | |
$aThis->[0]->{DirPps} = _savePpsSetPnt($aThis->[0]->{Child}, $raList, $rhInfo); | |
return $aThis->[0]->{No}; | |
} | |
else { | |
#1.3 Array | |
my $iCnt = $#$aThis + 1; | |
#1.3.1 Define Center | |
my $iPos = int($iCnt/ 2); #$iCnt | |
push @$raList, $aThis->[$iPos]; | |
$aThis->[$iPos]->{No} = $#$raList; | |
my @aWk = @$aThis; | |
#1.3.2 Devide a array into Previous,Next | |
my @aPrev = splice(@aWk, 0, $iPos); | |
my @aNext = splice(@aWk, 1, $iCnt - $iPos -1); | |
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt( | |
\@aPrev, $raList, $rhInfo); | |
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt( | |
\@aNext, $raList, $rhInfo); | |
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt($aThis->[$iPos]->{Child}, $raList, $rhInfo); | |
return $aThis->[$iPos]->{No}; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _savePpsSetPnt (OLE::Storage_Lite::PPS::Root) | |
#------------------------------------------------------------------------------ | |
sub _savePpsSetPnt1($$$) | |
{ | |
my($aThis, $raList, $rhInfo) = @_; | |
#1. make Array as Children-Relations | |
#1.1 if No Children | |
if($#$aThis < 0) { | |
return 0xFFFFFFFF; | |
} | |
elsif($#$aThis == 0) { | |
#1.2 Just Only one | |
push @$raList, $aThis->[0]; | |
$aThis->[0]->{No} = $#$raList; | |
$aThis->[0]->{PrevPps} = 0xFFFFFFFF; | |
$aThis->[0]->{NextPps} = 0xFFFFFFFF; | |
$aThis->[0]->{DirPps} = _savePpsSetPnt($aThis->[0]->{Child}, $raList, $rhInfo); | |
return $aThis->[0]->{No}; | |
} | |
else { | |
#1.3 Array | |
my $iCnt = $#$aThis + 1; | |
#1.3.1 Define Center | |
my $iPos = int($iCnt/ 2); #$iCnt | |
push @$raList, $aThis->[$iPos]; | |
$aThis->[$iPos]->{No} = $#$raList; | |
my @aWk = @$aThis; | |
#1.3.2 Devide a array into Previous,Next | |
my @aPrev = splice(@aWk, 0, $iPos); | |
my @aNext = splice(@aWk, 1, $iCnt - $iPos -1); | |
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt( | |
\@aPrev, $raList, $rhInfo); | |
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt( | |
\@aNext, $raList, $rhInfo); | |
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt($aThis->[$iPos]->{Child}, $raList, $rhInfo); | |
return $aThis->[$iPos]->{No}; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _saveBbd (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _saveBbd($$$$) | |
{ | |
my($oThis, $iSbdSize, $iBsize, $iPpsCnt, $rhInfo) = @_; | |
my $FILE = $rhInfo->{_FILEH_}; | |
#0. Calculate Basic Setting | |
my $iBbCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize(); | |
my $iBlCnt = $iBbCnt - 1; | |
my $i1stBdL = int(($rhInfo->{_BIG_BLOCK_SIZE} - 0x4C) / OLE::Storage_Lite::LongIntSize()); | |
my $i1stBdMax = $i1stBdL * $iBbCnt - $i1stBdL; | |
my $iBdExL = 0; | |
my $iAll = $iBsize + $iPpsCnt + $iSbdSize; | |
my $iAllW = $iAll; | |
my $iBdCntW = int($iAllW / $iBbCnt) + (($iAllW % $iBbCnt)? 1: 0); | |
my $iBdCnt = 0; | |
my $i; | |
#0.1 Calculate BD count | |
my $iBBleftover = $iAll - $i1stBdMax; | |
if ($iAll >$i1stBdMax) { | |
while(1) { | |
$iBdCnt = int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0); | |
$iBdExL = int(($iBdCnt) / $iBlCnt) + ((($iBdCnt) % $iBlCnt)? 1: 0); | |
$iBBleftover = $iBBleftover + $iBdExL; | |
last if($iBdCnt == (int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0))); | |
} | |
} | |
$iAllW += $iBdExL; | |
$iBdCnt += $i1stBdL; | |
#print "iBdCnt = $iBdCnt \n"; | |
#1. Making BD | |
#1.1 Set for SBD | |
if($iSbdSize > 0) { | |
for ($i = 0; $i<($iSbdSize-1); $i++) { | |
print {$FILE} (pack("V", $i+1)); | |
} | |
print {$FILE} (pack("V", -2)); | |
} | |
#1.2 Set for B | |
for ($i = 0; $i<($iBsize-1); $i++) { | |
print {$FILE} (pack("V", $i+$iSbdSize+1)); | |
} | |
print {$FILE} (pack("V", -2)); | |
#1.3 Set for PPS | |
for ($i = 0; $i<($iPpsCnt-1); $i++) { | |
print {$FILE} (pack("V", $i+$iSbdSize+$iBsize+1)); | |
} | |
print {$FILE} (pack("V", -2)); | |
#1.4 Set for BBD itself ( 0xFFFFFFFD : BBD) | |
for($i=0; $i<$iBdCnt;$i++) { | |
print {$FILE} (pack("V", 0xFFFFFFFD)); | |
} | |
#1.5 Set for ExtraBDList | |
for($i=0; $i<$iBdExL;$i++) { | |
print {$FILE} (pack("V", 0xFFFFFFFC)); | |
} | |
#1.6 Adjust for Block | |
print {$FILE} (pack("V", -1) x ($iBbCnt - (($iAllW + $iBdCnt) % $iBbCnt))) | |
if(($iAllW + $iBdCnt) % $iBbCnt); | |
#2.Extra BDList | |
if($iBdCnt > $i1stBdL) { | |
my $iN=0; | |
my $iNb=0; | |
for($i=$i1stBdL;$i<$iBdCnt; $i++, $iN++) { | |
if($iN>=($iBbCnt-1)) { | |
$iN = 0; | |
$iNb++; | |
print {$FILE} (pack("V", $iAll+$iBdCnt+$iNb)); | |
} | |
print {$FILE} (pack("V", $iBsize+$iSbdSize+$iPpsCnt+$i)); | |
} | |
print {$FILE} (pack("V", -1) x (($iBbCnt-1) - (($iBdCnt-$i1stBdL) % ($iBbCnt-1)))) | |
if(($iBdCnt-$i1stBdL) % ($iBbCnt-1)); | |
print {$FILE} (pack("V", -2)); | |
} | |
} | |
#////////////////////////////////////////////////////////////////////////////// | |
# OLE::Storage_Lite::PPS::File Object | |
#////////////////////////////////////////////////////////////////////////////// | |
#============================================================================== | |
# OLE::Storage_Lite::PPS::File | |
#============================================================================== | |
package OLE::Storage_Lite::PPS::File; | |
require Exporter; | |
use strict; | |
use vars qw($VERSION @ISA); | |
@ISA = qw(OLE::Storage_Lite::PPS Exporter); | |
$VERSION = '0.19'; | |
#------------------------------------------------------------------------------ | |
# new (OLE::Storage_Lite::PPS::File) | |
#------------------------------------------------------------------------------ | |
sub new ($$$) { | |
my($sClass, $sNm, $sData) = @_; | |
OLE::Storage_Lite::PPS::_new( | |
$sClass, | |
undef, | |
$sNm, | |
2, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
$sData, | |
undef); | |
} | |
#------------------------------------------------------------------------------ | |
# newFile (OLE::Storage_Lite::PPS::File) | |
#------------------------------------------------------------------------------ | |
sub newFile ($$;$) { | |
my($sClass, $sNm, $sFile) = @_; | |
my $oSelf = | |
OLE::Storage_Lite::PPS::_new( | |
$sClass, | |
undef, | |
$sNm, | |
2, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
undef, | |
'', | |
undef); | |
# | |
if((!defined($sFile)) or ($sFile eq '')) { | |
$oSelf->{_PPS_FILE} = IO::File->new_tmpfile(); | |
} | |
elsif(UNIVERSAL::isa($sFile, 'IO::Handle')) { | |
$oSelf->{_PPS_FILE} = $sFile; | |
} | |
elsif(!ref($sFile)) { | |
#File Name | |
$oSelf->{_PPS_FILE} = new IO::File; | |
return undef unless($oSelf->{_PPS_FILE}); | |
$oSelf->{_PPS_FILE}->open("$sFile", "r+") || return undef; | |
} | |
else { | |
return undef; | |
} | |
if($oSelf->{_PPS_FILE}) { | |
$oSelf->{_PPS_FILE}->seek(0, 2); | |
binmode($oSelf->{_PPS_FILE}); | |
$oSelf->{_PPS_FILE}->autoflush(1); | |
} | |
return $oSelf; | |
} | |
#------------------------------------------------------------------------------ | |
# append (OLE::Storage_Lite::PPS::File) | |
#------------------------------------------------------------------------------ | |
sub append ($$) { | |
my($oSelf, $sData) = @_; | |
if($oSelf->{_PPS_FILE}) { | |
print {$oSelf->{_PPS_FILE}} $sData; | |
} | |
else { | |
$oSelf->{Data} .= $sData; | |
} | |
} | |
#////////////////////////////////////////////////////////////////////////////// | |
# OLE::Storage_Lite::PPS::Dir Object | |
#////////////////////////////////////////////////////////////////////////////// | |
#------------------------------------------------------------------------------ | |
# new (OLE::Storage_Lite::PPS::Dir) | |
#------------------------------------------------------------------------------ | |
package OLE::Storage_Lite::PPS::Dir; | |
require Exporter; | |
use strict; | |
use vars qw($VERSION @ISA); | |
@ISA = qw(OLE::Storage_Lite::PPS Exporter); | |
$VERSION = '0.19'; | |
sub new ($$;$$$) { | |
my($sClass, $sName, $raTime1st, $raTime2nd, $raChild) = @_; | |
OLE::Storage_Lite::PPS::_new( | |
$sClass, | |
undef, | |
$sName, | |
1, | |
undef, | |
undef, | |
undef, | |
$raTime1st, | |
$raTime2nd, | |
undef, | |
undef, | |
undef, | |
$raChild); | |
} | |
#============================================================================== | |
# OLE::Storage_Lite | |
#============================================================================== | |
package OLE::Storage_Lite; | |
require Exporter; | |
use strict; | |
use IO::File; | |
use Time::Local 'timegm'; | |
use vars qw($VERSION @ISA @EXPORT); | |
@ISA = qw(Exporter); | |
$VERSION = '0.19'; | |
sub _getPpsSearch($$$$$;$); | |
sub _getPpsTree($$$;$); | |
#------------------------------------------------------------------------------ | |
# Const for OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
#0. Constants | |
sub PpsType_Root {5}; | |
sub PpsType_Dir {1}; | |
sub PpsType_File {2}; | |
sub DataSizeSmall{0x1000}; | |
sub LongIntSize {4}; | |
sub PpsSize {0x80}; | |
#------------------------------------------------------------------------------ | |
# new OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub new($$) { | |
my($sClass, $sFile) = @_; | |
my $oThis = { | |
_FILE => $sFile, | |
}; | |
bless $oThis; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# getPpsTree: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub getPpsTree($;$) | |
{ | |
my($oThis, $bData) = @_; | |
#0.Init | |
my $rhInfo = _initParse($oThis->{_FILE}); | |
return undef unless($rhInfo); | |
#1. Get Data | |
my ($oPps) = _getPpsTree(0, $rhInfo, $bData); | |
close(IN); | |
return $oPps; | |
} | |
#------------------------------------------------------------------------------ | |
# getSearch: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub getPpsSearch($$;$$) | |
{ | |
my($oThis, $raName, $bData, $iCase) = @_; | |
#0.Init | |
my $rhInfo = _initParse($oThis->{_FILE}); | |
return undef unless($rhInfo); | |
#1. Get Data | |
my @aList = _getPpsSearch(0, $rhInfo, $raName, $bData, $iCase); | |
close(IN); | |
return @aList; | |
} | |
#------------------------------------------------------------------------------ | |
# getNthPps: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub getNthPps($$;$) | |
{ | |
my($oThis, $iNo, $bData) = @_; | |
#0.Init | |
my $rhInfo = _initParse($oThis->{_FILE}); | |
return undef unless($rhInfo); | |
#1. Get Data | |
my $oPps = _getNthPps($iNo, $rhInfo, $bData); | |
close IN; | |
return $oPps; | |
} | |
#------------------------------------------------------------------------------ | |
# _initParse: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub _initParse($) { | |
my($sFile)=@_; | |
my $oIo; | |
#1. $sFile is Ref of scalar | |
if(ref($sFile) eq 'SCALAR') { | |
require IO::Scalar; | |
$oIo = new IO::Scalar; | |
$oIo->open($sFile); | |
} | |
#2. $sFile is a IO::Handle object | |
elsif(UNIVERSAL::isa($sFile, 'IO::Handle')) { | |
$oIo = $sFile; | |
binmode($oIo); | |
} | |
#3. $sFile is a simple filename string | |
elsif(!ref($sFile)) { | |
$oIo = new IO::File; | |
$oIo->open("<$sFile") || return undef; | |
binmode($oIo); | |
} | |
#4 Assume that if $sFile is a ref then it is a valid filehandle | |
else { | |
$oIo = $sFile; | |
# Not all filehandles support binmode() so try it in an eval. | |
eval{ binmode $oIo }; | |
} | |
return _getHeaderInfo($oIo); | |
} | |
#------------------------------------------------------------------------------ | |
# _getPpsTree: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub _getPpsTree($$$;$) { | |
my($iNo, $rhInfo, $bData, $raDone) = @_; | |
if(defined($raDone)) { | |
return () if(grep {$_ ==$iNo} @$raDone); | |
} | |
else { | |
$raDone=[]; | |
} | |
push @$raDone, $iNo; | |
my $iRootBlock = $rhInfo->{_ROOT_START} ; | |
#1. Get Information about itself | |
my $oPps = _getNthPps($iNo, $rhInfo, $bData); | |
#2. Child | |
if($oPps->{DirPps} != 0xFFFFFFFF) { | |
my @aChildL = _getPpsTree($oPps->{DirPps}, $rhInfo, $bData, $raDone); | |
$oPps->{Child} = \@aChildL; | |
} | |
else { | |
$oPps->{Child} = undef; | |
} | |
#3. Previous,Next PPSs | |
my @aList = (); | |
push @aList, _getPpsTree($oPps->{PrevPps}, $rhInfo, $bData, $raDone) | |
if($oPps->{PrevPps} != 0xFFFFFFFF); | |
push @aList, $oPps; | |
push @aList, _getPpsTree($oPps->{NextPps}, $rhInfo, $bData, $raDone) | |
if($oPps->{NextPps} != 0xFFFFFFFF); | |
return @aList; | |
} | |
#------------------------------------------------------------------------------ | |
# _getPpsSearch: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub _getPpsSearch($$$$$;$) { | |
my($iNo, $rhInfo, $raName, $bData, $iCase, $raDone) = @_; | |
my $iRootBlock = $rhInfo->{_ROOT_START} ; | |
my @aRes; | |
#1. Check it self | |
if(defined($raDone)) { | |
return () if(grep {$_==$iNo} @$raDone); | |
} | |
else { | |
$raDone=[]; | |
} | |
push @$raDone, $iNo; | |
my $oPps = _getNthPps($iNo, $rhInfo, undef); | |
# if(grep($_ eq $oPps->{Name}, @$raName)) { | |
if(($iCase && (grep(/^\Q$oPps->{Name}\E$/i, @$raName))) || | |
(grep($_ eq $oPps->{Name}, @$raName))) { | |
$oPps = _getNthPps($iNo, $rhInfo, $bData) if ($bData); | |
@aRes = ($oPps); | |
} | |
else { | |
@aRes = (); | |
} | |
#2. Check Child, Previous, Next PPSs | |
push @aRes, _getPpsSearch($oPps->{DirPps}, $rhInfo, $raName, $bData, $iCase, $raDone) | |
if($oPps->{DirPps} != 0xFFFFFFFF) ; | |
push @aRes, _getPpsSearch($oPps->{PrevPps}, $rhInfo, $raName, $bData, $iCase, $raDone) | |
if($oPps->{PrevPps} != 0xFFFFFFFF ); | |
push @aRes, _getPpsSearch($oPps->{NextPps}, $rhInfo, $raName, $bData, $iCase, $raDone) | |
if($oPps->{NextPps} != 0xFFFFFFFF); | |
return @aRes; | |
} | |
#=================================================================== | |
# Get Header Info (BASE Informain about that file) | |
#=================================================================== | |
sub _getHeaderInfo($){ | |
my($FILE) = @_; | |
my($iWk); | |
my $rhInfo = {}; | |
$rhInfo->{_FILEH_} = $FILE; | |
my $sWk; | |
#0. Check ID | |
$rhInfo->{_FILEH_}->seek(0, 0); | |
$rhInfo->{_FILEH_}->read($sWk, 8); | |
return undef unless($sWk eq "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"); | |
#BIG BLOCK SIZE | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x1E, 2, "v"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_BIG_BLOCK_SIZE} = 2 ** $iWk; | |
#SMALL BLOCK SIZE | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x20, 2, "v"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_SMALL_BLOCK_SIZE} = 2 ** $iWk; | |
#BDB Count | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x2C, 4, "V"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_BDB_COUNT} = $iWk; | |
#START BLOCK | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x30, 4, "V"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_ROOT_START} = $iWk; | |
#MIN SIZE OF BB | |
# $iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x38, 4, "V"); | |
# return undef unless(defined($iWk)); | |
# $rhInfo->{_MIN_SIZE_BB} = $iWk; | |
#SMALL BD START | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x3C, 4, "V"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_SBD_START} = $iWk; | |
#SMALL BD COUNT | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x40, 4, "V"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_SBD_COUNT} = $iWk; | |
#EXTRA BBD START | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x44, 4, "V"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_EXTRA_BBD_START} = $iWk; | |
#EXTRA BD COUNT | |
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x48, 4, "V"); | |
return undef unless(defined($iWk)); | |
$rhInfo->{_EXTRA_BBD_COUNT} = $iWk; | |
#GET BBD INFO | |
$rhInfo->{_BBD_INFO}= _getBbdInfo($rhInfo); | |
#GET ROOT PPS | |
my $oRoot = _getNthPps(0, $rhInfo, undef); | |
$rhInfo->{_SB_START} = $oRoot->{StartBlock}; | |
$rhInfo->{_SB_SIZE} = $oRoot->{Size}; | |
return $rhInfo; | |
} | |
#------------------------------------------------------------------------------ | |
# _getInfoFromFile | |
#------------------------------------------------------------------------------ | |
sub _getInfoFromFile($$$$) { | |
my($FILE, $iPos, $iLen, $sFmt) =@_; | |
my($sWk); | |
return undef unless($FILE); | |
return undef if($FILE->seek($iPos, 0)==0); | |
return undef if($FILE->read($sWk, $iLen)!=$iLen); | |
return unpack($sFmt, $sWk); | |
} | |
#------------------------------------------------------------------------------ | |
# _getBbdInfo | |
#------------------------------------------------------------------------------ | |
sub _getBbdInfo($) { | |
my($rhInfo) =@_; | |
my @aBdList = (); | |
my $iBdbCnt = $rhInfo->{_BDB_COUNT}; | |
my $iGetCnt; | |
my $sWk; | |
my $i1stCnt = int(($rhInfo->{_BIG_BLOCK_SIZE} - 0x4C) / OLE::Storage_Lite::LongIntSize()); | |
my $iBdlCnt = int($rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize()) - 1; | |
#1. 1st BDlist | |
$rhInfo->{_FILEH_}->seek(0x4C, 0); | |
$iGetCnt = ($iBdbCnt < $i1stCnt)? $iBdbCnt: $i1stCnt; | |
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize()*$iGetCnt); | |
push @aBdList, unpack("V$iGetCnt", $sWk); | |
$iBdbCnt -= $iGetCnt; | |
#2. Extra BDList | |
my $iBlock = $rhInfo->{_EXTRA_BBD_START}; | |
while(($iBdbCnt> 0) && _isNormalBlock($iBlock)){ | |
_setFilePos($iBlock, 0, $rhInfo); | |
$iGetCnt= ($iBdbCnt < $iBdlCnt)? $iBdbCnt: $iBdlCnt; | |
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize()*$iGetCnt); | |
push @aBdList, unpack("V$iGetCnt", $sWk); | |
$iBdbCnt -= $iGetCnt; | |
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize()); | |
$iBlock = unpack("V", $sWk); | |
} | |
#3.Get BDs | |
my @aWk; | |
my %hBd; | |
my $iBlkNo = 0; | |
my $iBdL; | |
my $i; | |
my $iBdCnt = int($rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize()); | |
foreach $iBdL (@aBdList) { | |
_setFilePos($iBdL, 0, $rhInfo); | |
$rhInfo->{_FILEH_}->read($sWk, $rhInfo->{_BIG_BLOCK_SIZE}); | |
@aWk = unpack("V$iBdCnt", $sWk); | |
for($i=0;$i<$iBdCnt;$i++, $iBlkNo++) { | |
if($aWk[$i] != ($iBlkNo+1)){ | |
$hBd{$iBlkNo} = $aWk[$i]; | |
} | |
} | |
} | |
return \%hBd; | |
} | |
#------------------------------------------------------------------------------ | |
# getNthPps (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getNthPps($$$){ | |
my($iPos, $rhInfo, $bData) = @_; | |
my($iPpsStart) = ($rhInfo->{_ROOT_START}); | |
my($iPpsBlock, $iPpsPos); | |
my $sWk; | |
my $iBlock; | |
my $iBaseCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::PpsSize(); | |
$iPpsBlock = int($iPos / $iBaseCnt); | |
$iPpsPos = $iPos % $iBaseCnt; | |
$iBlock = _getNthBlockNo($iPpsStart, $iPpsBlock, $rhInfo); | |
return undef unless(defined($iBlock)); | |
_setFilePos($iBlock, OLE::Storage_Lite::PpsSize()*$iPpsPos, $rhInfo); | |
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::PpsSize()); | |
return undef unless($sWk); | |
my $iNmSize = unpack("v", substr($sWk, 0x40, 2)); | |
$iNmSize = ($iNmSize > 2)? $iNmSize - 2 : $iNmSize; | |
my $sNm= substr($sWk, 0, $iNmSize); | |
my $iType = unpack("C", substr($sWk, 0x42, 2)); | |
my $lPpsPrev = unpack("V", substr($sWk, 0x44, OLE::Storage_Lite::LongIntSize())); | |
my $lPpsNext = unpack("V", substr($sWk, 0x48, OLE::Storage_Lite::LongIntSize())); | |
my $lDirPps = unpack("V", substr($sWk, 0x4C, OLE::Storage_Lite::LongIntSize())); | |
my @raTime1st = | |
(($iType == OLE::Storage_Lite::PpsType_Root()) or ($iType == OLE::Storage_Lite::PpsType_Dir()))? | |
OLEDate2Local(substr($sWk, 0x64, 8)) : undef , | |
my @raTime2nd = | |
(($iType == OLE::Storage_Lite::PpsType_Root()) or ($iType == OLE::Storage_Lite::PpsType_Dir()))? | |
OLEDate2Local(substr($sWk, 0x6C, 8)) : undef, | |
my($iStart, $iSize) = unpack("VV", substr($sWk, 0x74, 8)); | |
if($bData) { | |
my $sData = _getData($iType, $iStart, $iSize, $rhInfo); | |
return OLE::Storage_Lite::PPS->new( | |
$iPos, $sNm, $iType, $lPpsPrev, $lPpsNext, $lDirPps, | |
\@raTime1st, \@raTime2nd, $iStart, $iSize, $sData, undef); | |
} | |
else { | |
return OLE::Storage_Lite::PPS->new( | |
$iPos, $sNm, $iType, $lPpsPrev, $lPpsNext, $lDirPps, | |
\@raTime1st, \@raTime2nd, $iStart, $iSize, undef, undef); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _setFilePos (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _setFilePos($$$){ | |
my($iBlock, $iPos, $rhInfo) = @_; | |
$rhInfo->{_FILEH_}->seek(($iBlock+1)*$rhInfo->{_BIG_BLOCK_SIZE}+$iPos, 0); | |
} | |
#------------------------------------------------------------------------------ | |
# _getNthBlockNo (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getNthBlockNo($$$){ | |
my($iStBlock, $iNth, $rhInfo) = @_; | |
my $iSv; | |
my $iNext = $iStBlock; | |
for(my $i =0; $i<$iNth; $i++) { | |
$iSv = $iNext; | |
$iNext = _getNextBlockNo($iSv, $rhInfo); | |
return undef unless _isNormalBlock($iNext); | |
} | |
return $iNext; | |
} | |
#------------------------------------------------------------------------------ | |
# _getData (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getData($$$$) | |
{ | |
my($iType, $iBlock, $iSize, $rhInfo) = @_; | |
if ($iType == OLE::Storage_Lite::PpsType_File()) { | |
if($iSize < OLE::Storage_Lite::DataSizeSmall()) { | |
return _getSmallData($iBlock, $iSize, $rhInfo); | |
} | |
else { | |
return _getBigData($iBlock, $iSize, $rhInfo); | |
} | |
} | |
elsif($iType == OLE::Storage_Lite::PpsType_Root()) { #Root | |
return _getBigData($iBlock, $iSize, $rhInfo); | |
} | |
elsif($iType == OLE::Storage_Lite::PpsType_Dir()) { # Directory | |
return undef; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _getBigData (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getBigData($$$) | |
{ | |
my($iBlock, $iSize, $rhInfo) = @_; | |
my($iRest, $sWk, $sRes); | |
return '' unless(_isNormalBlock($iBlock)); | |
$iRest = $iSize; | |
my($i, $iGetSize, $iNext); | |
$sRes = ''; | |
my @aKeys= sort({$a<=>$b} keys(%{$rhInfo->{_BBD_INFO}})); | |
while ($iRest > 0) { | |
my @aRes = grep($_ >= $iBlock, @aKeys); | |
my $iNKey = $aRes[0]; | |
$i = $iNKey - $iBlock; | |
$iNext = $rhInfo->{_BBD_INFO}{$iNKey}; | |
_setFilePos($iBlock, 0, $rhInfo); | |
my $iGetSize = ($rhInfo->{_BIG_BLOCK_SIZE} * ($i+1)); | |
$iGetSize = $iRest if($iRest < $iGetSize); | |
$rhInfo->{_FILEH_}->read( $sWk, $iGetSize); | |
$sRes .= $sWk; | |
$iRest -= $iGetSize; | |
$iBlock= $iNext; | |
} | |
return $sRes; | |
} | |
#------------------------------------------------------------------------------ | |
# _getNextBlockNo (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getNextBlockNo($$){ | |
my($iBlockNo, $rhInfo) = @_; | |
my $iRes = $rhInfo->{_BBD_INFO}->{$iBlockNo}; | |
return defined($iRes)? $iRes: $iBlockNo+1; | |
} | |
#------------------------------------------------------------------------------ | |
# _isNormalBlock (OLE::Storage_Lite) | |
# 0xFFFFFFFC : BDList, 0xFFFFFFFD : BBD, | |
# 0xFFFFFFFE: End of Chain 0xFFFFFFFF : unused | |
#------------------------------------------------------------------------------ | |
sub _isNormalBlock($){ | |
my($iBlock) = @_; | |
return ($iBlock < 0xFFFFFFFC)? 1: undef; | |
} | |
#------------------------------------------------------------------------------ | |
# _getSmallData (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getSmallData($$$) | |
{ | |
my($iSmBlock, $iSize, $rhInfo) = @_; | |
my($sRes, $sWk); | |
my $iRest = $iSize; | |
$sRes = ''; | |
while ($iRest > 0) { | |
_setFilePosSmall($iSmBlock, $rhInfo); | |
$rhInfo->{_FILEH_}->read($sWk, | |
($iRest >= $rhInfo->{_SMALL_BLOCK_SIZE})? | |
$rhInfo->{_SMALL_BLOCK_SIZE}: $iRest); | |
$sRes .= $sWk; | |
$iRest -= $rhInfo->{_SMALL_BLOCK_SIZE}; | |
$iSmBlock= _getNextSmallBlockNo($iSmBlock, $rhInfo); | |
} | |
return $sRes; | |
} | |
#------------------------------------------------------------------------------ | |
# _setFilePosSmall(OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _setFilePosSmall($$) | |
{ | |
my($iSmBlock, $rhInfo) = @_; | |
my $iSmStart = $rhInfo->{_SB_START}; | |
my $iBaseCnt = $rhInfo->{_BIG_BLOCK_SIZE} / $rhInfo->{_SMALL_BLOCK_SIZE}; | |
my $iNth = int($iSmBlock/$iBaseCnt); | |
my $iPos = $iSmBlock % $iBaseCnt; | |
my $iBlk = _getNthBlockNo($iSmStart, $iNth, $rhInfo); | |
_setFilePos($iBlk, $iPos * $rhInfo->{_SMALL_BLOCK_SIZE}, $rhInfo); | |
} | |
#------------------------------------------------------------------------------ | |
# _getNextSmallBlockNo (OLE::Storage_Lite) | |
#------------------------------------------------------------------------------ | |
sub _getNextSmallBlockNo($$) | |
{ | |
my($iSmBlock, $rhInfo) = @_; | |
my($sWk); | |
my $iBaseCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize(); | |
my $iNth = int($iSmBlock/$iBaseCnt); | |
my $iPos = $iSmBlock % $iBaseCnt; | |
my $iBlk = _getNthBlockNo($rhInfo->{_SBD_START}, $iNth, $rhInfo); | |
_setFilePos($iBlk, $iPos * OLE::Storage_Lite::LongIntSize(), $rhInfo); | |
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize()); | |
return unpack("V", $sWk); | |
} | |
#------------------------------------------------------------------------------ | |
# Asc2Ucs: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub Asc2Ucs($) | |
{ | |
my($sAsc) = @_; | |
return join("\x00", split //, $sAsc) . "\x00"; | |
} | |
#------------------------------------------------------------------------------ | |
# Ucs2Asc: OLE::Storage_Lite | |
#------------------------------------------------------------------------------ | |
sub Ucs2Asc($) | |
{ | |
my($sUcs) = @_; | |
return join('', map(pack('c', $_), unpack('v*', $sUcs))); | |
} | |
#------------------------------------------------------------------------------ | |
# OLEDate2Local() | |
# | |
# Convert from a Window FILETIME structure to a localtime array. FILETIME is | |
# a 64-bit value representing the number of 100-nanosecond intervals since | |
# January 1 1601. | |
# | |
# We first convert the FILETIME to seconds and then subtract the difference | |
# between the 1601 epoch and the 1970 Unix epoch. | |
# | |
sub OLEDate2Local { | |
my $oletime = shift; | |
# Unpack the FILETIME into high and low longs. | |
my ( $lo, $hi ) = unpack 'V2', $oletime; | |
# Convert the longs to a double. | |
my $nanoseconds = $hi * 2**32 + $lo; | |
# Convert the 100 nanosecond units into seconds. | |
my $time = $nanoseconds / 1e7; | |
# Subtract the number of seconds between the 1601 and 1970 epochs. | |
$time -= 11644473600; | |
# Convert to a localtime (actually gmtime) structure. | |
my @localtime = gmtime($time); | |
return @localtime; | |
} | |
#------------------------------------------------------------------------------ | |
# LocalDate2OLE() | |
# | |
# Convert from a a localtime array to a Window FILETIME structure. FILETIME is | |
# a 64-bit value representing the number of 100-nanosecond intervals since | |
# January 1 1601. | |
# | |
# We first convert the localtime (actually gmtime) to seconds and then add the | |
# difference between the 1601 epoch and the 1970 Unix epoch. We convert that to | |
# 100 nanosecond units, divide it into high and low longs and return it as a | |
# packed 64bit structure. | |
# | |
sub LocalDate2OLE { | |
my $localtime = shift; | |
return "\x00" x 8 unless $localtime; | |
# Convert from localtime (actually gmtime) to seconds. | |
my @localtimecopy = @{$localtime}; | |
$localtimecopy[5] += 1900 unless $localtimecopy[5] > 99; | |
my $time = timegm( @localtimecopy ); | |
# Add the number of seconds between the 1601 and 1970 epochs. | |
$time += 11644473600; | |
# The FILETIME seconds are in units of 100 nanoseconds. | |
my $nanoseconds = $time * 1E7; | |
use POSIX 'fmod'; | |
# Pack the total nanoseconds into 64 bits... | |
my $hi = int( $nanoseconds / 2**32 ); | |
my $lo = fmod($nanoseconds, 2**32); | |
my $oletime = pack "VV", $lo, $hi; | |
return $oletime; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
OLE::Storage_Lite - Simple Class for OLE document interface. | |
=head1 SYNOPSIS | |
use OLE::Storage_Lite; | |
# Initialize. | |
# From a file | |
my $oOl = OLE::Storage_Lite->new("some.xls"); | |
# From a filehandle object | |
use IO::File; | |
my $oIo = new IO::File; | |
$oIo->open("<iofile.xls"); | |
binmode($oIo); | |
my $oOl = OLE::Storage_Lite->new($oFile); | |
# Read data | |
my $oPps = $oOl->getPpsTree(1); | |
# Save Data | |
# To a File | |
$oPps->save("kaba.xls"); #kaba.xls | |
$oPps->save('-'); #STDOUT | |
# To a filehandle object | |
my $oIo = new IO::File; | |
$oIo->open(">iofile.xls"); | |
bimode($oIo); | |
$oPps->save($oIo); | |
=head1 DESCRIPTION | |
OLE::Storage_Lite allows you to read and write an OLE structured file. | |
OLE::Storage_Lite::PPS is a class representing PPS. OLE::Storage_Lite::PPS::Root, OLE::Storage_Lite::PPS::File and OLE::Storage_Lite::PPS::Dir | |
are subclasses of OLE::Storage_Lite::PPS. | |
=head2 new() | |
Constructor. | |
$oOle = OLE::Storage_Lite->new($sFile); | |
Creates a OLE::Storage_Lite object for C<$sFile>. C<$sFile> must be a correct file name. | |
The C<new()> constructor also accepts a valid filehandle. Remember to C<binmode()> the filehandle first. | |
=head2 getPpsTree() | |
$oPpsRoot = $oOle->getPpsTree([$bData]); | |
Returns PPS as an OLE::Storage_Lite::PPS::Root object. | |
Other PPS objects will be included as its children. | |
If C<$bData> is true, the objects will have data in the file. | |
=head2 getPpsSearch() | |
$oPpsRoot = $oOle->getPpsTree($raName [, $bData][, $iCase] ); | |
Returns PPSs as OLE::Storage_Lite::PPS objects that has the name specified in C<$raName> array. | |
If C<$bData> is true, the objects will have data in the file. | |
If C<$iCase> is true, search is case insensitive. | |
=head2 getNthPps() | |
$oPpsRoot = $oOle->getNthPps($iNth [, $bData]); | |
Returns PPS as C<OLE::Storage_Lite::PPS> object specified number C<$iNth>. | |
If C<$bData> is true, the objects will have data in the file. | |
=head2 Asc2Ucs() | |
$sUcs2 = OLE::Storage_Lite::Asc2Ucs($sAsc>); | |
Utility function. Just adds 0x00 after every characters in C<$sAsc>. | |
=head2 Ucs2Asc() | |
$sAsc = OLE::Storage_Lite::Ucs2Asc($sUcs2); | |
Utility function. Just deletes 0x00 after words in C<$sUcs>. | |
=head1 OLE::Storage_Lite::PPS | |
OLE::Storage_Lite::PPS has these properties: | |
=over 4 | |
=item No | |
Order number in saving. | |
=item Name | |
Its name in UCS2 (a.k.a Unicode). | |
=item Type | |
Its type (1:Dir, 2:File (Data), 5: Root) | |
=item PrevPps | |
Previous pps (as No) | |
=item NextPps | |
Next pps (as No) | |
=item DirPps | |
Dir pps (as No). | |
=item Time1st | |
Timestamp 1st in array ref as similar fomat of localtime. | |
=item Time2nd | |
Timestamp 2nd in array ref as similar fomat of localtime. | |
=item StartBlock | |
Start block number | |
=item Size | |
Size of the pps | |
=item Data | |
Its data | |
=item Child | |
Its child PPSs in array ref | |
=back | |
=head1 OLE::Storage_Lite::PPS::Root | |
OLE::Storage_Lite::PPS::Root has 2 methods. | |
=head2 new() | |
$oRoot = OLE::Storage_Lite::PPS::Root->new( | |
$raTime1st, | |
$raTime2nd, | |
$raChild); | |
Constructor. | |
C<$raTime1st>, C<$raTime2nd> are array refs with ($iSec, $iMin, $iHour, $iDay, $iMon, $iYear). | |
$iSec means seconds, $iMin means minutes. $iHour means hours. | |
$iDay means day. $iMon is month -1. $iYear is year - 1900. | |
C<$raChild> is a array ref of children PPSs. | |
=head2 save() | |
$oRoot = $oRoot>->save( | |
$sFile, | |
$bNoAs); | |
Saves information into C<$sFile>. If C<$sFile> is '-', this will use STDOUT. | |
The C<new()> constructor also accepts a valid filehandle. Remember to C<binmode()> the filehandle first. | |
If C<$bNoAs> is defined, this function will use the No of PPSs for saving order. | |
If C<$bNoAs> is undefined, this will calculate PPS saving order. | |
=head1 OLE::Storage_Lite::PPS::Dir | |
OLE::Storage_Lite::PPS::Dir has 1 method. | |
=head2 new() | |
$oRoot = OLE::Storage_Lite::PPS::Dir->new( | |
$sName, | |
[, $raTime1st] | |
[, $raTime2nd] | |
[, $raChild>]); | |
Constructor. | |
C<$sName> is a name of the PPS. | |
C<$raTime1st>, C<$raTime2nd> is a array ref as | |
($iSec, $iMin, $iHour, $iDay, $iMon, $iYear). | |
$iSec means seconds, $iMin means minutes. $iHour means hours. | |
$iDay means day. $iMon is month -1. $iYear is year - 1900. | |
C<$raChild> is a array ref of children PPSs. | |
=head1 OLE::Storage_Lite::PPS::File | |
OLE::Storage_Lite::PPS::File has 3 method. | |
=head2 new | |
$oRoot = OLE::Storage_Lite::PPS::File->new($sName, $sData); | |
C<$sName> is name of the PPS. | |
C<$sData> is data of the PPS. | |
=head2 newFile() | |
$oRoot = OLE::Storage_Lite::PPS::File->newFile($sName, $sFile); | |
This function makes to use file handle for geting and storing data. | |
C<$sName> is name of the PPS. | |
If C<$sFile> is scalar, it assumes that is a filename. | |
If C<$sFile> is an IO::Handle object, it uses that specified handle. | |
If C<$sFile> is undef or '', it uses temporary file. | |
CAUTION: Take care C<$sFile> will be updated by C<append> method. | |
So if you want to use IO::Handle and append a data to it, | |
you should open the handle with "r+". | |
=head2 append() | |
$oRoot = $oPps->append($sData); | |
appends specified data to that PPS. | |
C<$sData> is appending data for that PPS. | |
=head1 CAUTION | |
A saved file with VBA (a.k.a Macros) by this module will not work correctly. | |
However modules can get the same information from the file, | |
the file occurs a error in application(Word, Excel ...). | |
=head1 DEPRECATED FEATURES | |
Older version of C<OLE::Storage_Lite> autovivified a scalar ref in the C<new()> constructors into a scalar filehandle. This functionality is still there for backwards compatibility but it is highly recommended that you do not use it. Instead create a filehandle (scalar or otherwise) and pass that in. | |
=head1 COPYRIGHT | |
The OLE::Storage_Lite module is Copyright (c) 2000,2001 Kawai Takanori. Japan. | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public | |
License or the Artistic License, as specified in the Perl README file. | |
=head1 ACKNOWLEDGEMENTS | |
First of all, I would like to acknowledge to Martin Schwartz and his module OLE::Storage. | |
=head1 AUTHOR | |
Kawai Takanori kwitknr@cpan.org | |
This module is currently maintained by John McNamara jmcnamara@cpan.org | |
=head1 SEE ALSO | |
OLE::Storage | |
Documentation for the OLE Compound document has been released by Microsoft under the I<Open Specification Promise>. See http://www.microsoft.com/interop/docs/supportingtechnologies.mspx | |
The Digital Imaging Group have also detailed the OLE format in the JPEG2000 specification: see Appendix A of http://www.i3a.org/pdf/wg1n1017.pdf | |
=cut | |
OLE_STORAGE_LITE | |
$fatpacked{"Spreadsheet/ParseExcel.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL'; | |
package Spreadsheet::ParseExcel; | |
############################################################################## | |
# | |
# Spreadsheet::ParseExcel - Extract information from an Excel file. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2008 Takanori Kawai | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use 5.008; | |
use OLE::Storage_Lite; | |
use File::Basename qw(fileparse); | |
use IO::File; | |
use Config; | |
use Crypt::RC4; | |
use Digest::Perl::MD5; | |
our $VERSION = '0.65'; | |
use Spreadsheet::ParseExcel::Workbook; | |
use Spreadsheet::ParseExcel::Worksheet; | |
use Spreadsheet::ParseExcel::Font; | |
use Spreadsheet::ParseExcel::Format; | |
use Spreadsheet::ParseExcel::Cell; | |
use Spreadsheet::ParseExcel::FmtDefault; | |
my $currentbook; | |
my @aColor = ( | |
'000000', # 0x00 | |
'FFFFFF', 'FFFFFF', 'FFFFFF', 'FFFFFF', | |
'FFFFFF', 'FFFFFF', 'FFFFFF', '000000', # 0x08 | |
'FFFFFF', 'FF0000', '00FF00', '0000FF', | |
'FFFF00', 'FF00FF', '00FFFF', '800000', # 0x10 | |
'008000', '000080', '808000', '800080', | |
'008080', 'C0C0C0', '808080', '9999FF', # 0x18 | |
'993366', 'FFFFCC', 'CCFFFF', '660066', | |
'FF8080', '0066CC', 'CCCCFF', '000080', # 0x20 | |
'FF00FF', 'FFFF00', '00FFFF', '800080', | |
'800000', '008080', '0000FF', '00CCFF', # 0x28 | |
'CCFFFF', 'CCFFCC', 'FFFF99', '99CCFF', | |
'FF99CC', 'CC99FF', 'FFCC99', '3366FF', # 0x30 | |
'33CCCC', '99CC00', 'FFCC00', 'FF9900', | |
'FF6600', '666699', '969696', '003366', # 0x38 | |
'339966', '003300', '333300', '993300', | |
'993366', '333399', '333333', '000000' # 0x40 | |
); | |
use constant verExcel95 => 0x500; | |
use constant verExcel97 => 0x600; | |
use constant verBIFF2 => 0x00; | |
use constant verBIFF3 => 0x02; | |
use constant verBIFF4 => 0x04; | |
use constant verBIFF5 => 0x08; | |
use constant verBIFF8 => 0x18; | |
use constant MS_BIFF_CRYPTO_NONE => 0; | |
use constant MS_BIFF_CRYPTO_XOR => 1; | |
use constant MS_BIFF_CRYPTO_RC4 => 2; | |
use constant sizeof_BIFF_8_FILEPASS => ( 6 + 3 * 16 ); | |
use constant REKEY_BLOCK => 0x400; | |
# Error code for some of the common parsing errors. | |
use constant ErrorNone => 0; | |
use constant ErrorNoFile => 1; | |
use constant ErrorNoExcelData => 2; | |
use constant ErrorFileEncrypted => 3; | |
# Color index for the 'auto' color | |
use constant AutoColor => 64; | |
our %error_strings = ( | |
ErrorNone, '', # 0 | |
ErrorNoFile, 'File not found', # 1 | |
ErrorNoExcelData, 'No Excel data found in file', # 2 | |
ErrorFileEncrypted, 'File is encrypted', # 3 | |
); | |
our %ProcTbl = ( | |
#Develpers' Kit P291 | |
0x14 => \&_subHeader, # Header | |
0x15 => \&_subFooter, # Footer | |
0x18 => \&_subName, # NAME(?) | |
0x1A => \&_subVPageBreak, # Vertical Page Break | |
0x1B => \&_subHPageBreak, # Horizontal Page Break | |
0x22 => \&_subFlg1904, # 1904 Flag | |
0x26 => \&_subMargin, # Left Margin | |
0x27 => \&_subMargin, # Right Margin | |
0x28 => \&_subMargin, # Top Margin | |
0x29 => \&_subMargin, # Bottom Margin | |
0x2A => \&_subPrintHeaders, # Print Headers | |
0x2B => \&_subPrintGridlines, # Print Gridlines | |
0x3C => \&_subContinue, # Continue | |
0x3D => \&_subWindow1, # Window1 | |
0x43 => \&_subXF, # XF for Excel < 4. | |
0x0443 => \&_subXF, # XF for Excel = 4. | |
0x862 => \&_subSheetLayout, # Sheet Layout | |
0x1B8 => \&_subHyperlink, # HYPERLINK | |
#Develpers' Kit P292 | |
0x55 => \&_subDefColWidth, # Consider | |
0x5C => \&_subWriteAccess, # WRITEACCESS | |
0x7D => \&_subColInfo, # Colinfo | |
0x7E => \&_subRK, # RK | |
0x81 => \&_subWSBOOL, # WSBOOL | |
0x83 => \&_subHcenter, # HCENTER | |
0x84 => \&_subVcenter, # VCENTER | |
0x85 => \&_subBoundSheet, # BoundSheet | |
0x92 => \&_subPalette, # Palette, fgp | |
0x99 => \&_subStandardWidth, # Standard Col | |
#Develpers' Kit P293 | |
0xA1 => \&_subSETUP, # SETUP | |
0xBD => \&_subMulRK, # MULRK | |
0xBE => \&_subMulBlank, # MULBLANK | |
0xD6 => \&_subRString, # RString | |
#Develpers' Kit P294 | |
0xE0 => \&_subXF, # ExTended Format | |
0xE5 => \&_subMergeArea, # MergeArea (Not Documented) | |
0xFC => \&_subSST, # Shared String Table | |
0xFD => \&_subLabelSST, # Label SST | |
#Develpers' Kit P295 | |
0x201 => \&_subBlank, # Blank | |
0x202 => \&_subInteger, # Integer(Not Documented) | |
0x203 => \&_subNumber, # Number | |
0x204 => \&_subLabel, # Label | |
0x205 => \&_subBoolErr, # BoolErr | |
0x207 => \&_subString, # STRING | |
0x208 => \&_subRow, # RowData | |
0x221 => \&_subArray, # Array (Consider) | |
0x225 => \&_subDefaultRowHeight, # Consider | |
0x31 => \&_subFont, # Font | |
0x231 => \&_subFont, # Font | |
0x27E => \&_subRK, # RK | |
0x41E => \&_subFormat, # Format | |
0x06 => \&_subFormula, # Formula | |
0x406 => \&_subFormula, # Formula | |
0x009 => \&_subBOF, # BOF(BIFF2) | |
0x209 => \&_subBOF, # BOF(BIFF3) | |
0x409 => \&_subBOF, # BOF(BIFF4) | |
0x809 => \&_subBOF, # BOF(BIFF5-8) | |
); | |
our $BIGENDIAN; | |
our $PREFUNC; | |
our $_use_perlio; | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->new | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $class, %hParam ) = @_; | |
if ( not defined $_use_perlio ) { | |
if ( exists $Config{useperlio} | |
&& defined $Config{useperlio} | |
&& $Config{useperlio} eq "define" ) | |
{ | |
$_use_perlio = 1; | |
} | |
else { | |
$_use_perlio = 0; | |
require IO::Scalar; | |
import IO::Scalar; | |
} | |
} | |
# Check ENDIAN(Little: Intel etc. BIG: Sparc etc) | |
$BIGENDIAN = | |
( defined $hParam{Endian} ) ? $hParam{Endian} | |
: ( unpack( "H08", pack( "L", 2 ) ) eq '02000000' ) ? 0 | |
: 1; | |
my $self = {}; | |
bless $self, $class; | |
$self->{GetContent} = \&_subGetContent; | |
if ( $hParam{EventHandlers} ) { | |
$self->SetEventHandlers( $hParam{EventHandlers} ); | |
} | |
else { | |
$self->SetEventHandlers( \%ProcTbl ); | |
} | |
if ( $hParam{AddHandlers} ) { | |
foreach my $sKey ( keys( %{ $hParam{AddHandlers} } ) ) { | |
$self->SetEventHandler( $sKey, $hParam{AddHandlers}->{$sKey} ); | |
} | |
} | |
$self->{CellHandler} = $hParam{CellHandler}; | |
$self->{NotSetCell} = $hParam{NotSetCell}; | |
$self->{Object} = $hParam{Object}; | |
if ( defined $hParam{Password} ) { | |
$self->{Password} = $hParam{Password}; | |
} | |
else { | |
$self->{Password} = 'VelvetSweatshop'; | |
} | |
$self->{_error_status} = ErrorNone; | |
return $self; | |
} | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->SetEventHandler | |
#------------------------------------------------------------------------------ | |
sub SetEventHandler { | |
my ( $self, $key, $sub_ref ) = @_; | |
$self->{FuncTbl}->{$key} = $sub_ref; | |
} | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->SetEventHandlers | |
#------------------------------------------------------------------------------ | |
sub SetEventHandlers { | |
my ( $self, $rhTbl ) = @_; | |
$self->{FuncTbl} = undef; | |
foreach my $sKey ( keys %$rhTbl ) { | |
$self->{FuncTbl}->{$sKey} = $rhTbl->{$sKey}; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# Decryption routines | |
# based on sources of gnumeric (ms-biff.c ms-excel-read.c) | |
#------------------------------------------------------------------------------ | |
sub md5state { | |
my ( $md5 ) = @_; | |
my $s = ''; | |
for ( my $i = 0 ; $i < 4 ; $i++ ) { | |
my $v = $md5->{_state}[$i]; | |
$s .= chr( $v & 0xff ); | |
$s .= chr( ( $v >> 8 ) & 0xff ); | |
$s .= chr( ( $v >> 16 ) & 0xff ); | |
$s .= chr( ( $v >> 24 ) & 0xff ); | |
} | |
return $s; | |
} | |
sub MakeKey { | |
my ( $block, $key, $valContext ) = @_; | |
my $pwarray = "\0" x 64; | |
substr( $pwarray, 0, 5 ) = substr( $valContext, 0, 5 ); | |
substr( $pwarray, 5, 1 ) = chr( $block & 0xff ); | |
substr( $pwarray, 6, 1 ) = chr( ( $block >> 8 ) & 0xff ); | |
substr( $pwarray, 7, 1 ) = chr( ( $block >> 16 ) & 0xff ); | |
substr( $pwarray, 8, 1 ) = chr( ( $block >> 24 ) & 0xff ); | |
substr( $pwarray, 9, 1 ) = "\x80"; | |
substr( $pwarray, 56, 1 ) = "\x48"; | |
my $md5 = Digest::Perl::MD5->new(); | |
$md5->add( $pwarray ); | |
my $s = md5state( $md5 ); | |
${$key} = Crypt::RC4->new( $s ); | |
} | |
sub VerifyPassword { | |
my ( $password, $docid, $salt_data, $hashedsalt_data, $valContext ) = @_; | |
my $pwarray = "\0" x 64; | |
my $i; | |
my $md5 = Digest::Perl::MD5->new(); | |
for ( $i = 0 ; $i < length( $password ) ; $i++ ) { | |
my $o = ord( substr( $password, $i, 1 ) ); | |
substr( $pwarray, 2 * $i, 1 ) = chr( $o & 0xff ); | |
substr( $pwarray, 2 * $i + 1, 1 ) = chr( ( $o >> 8 ) & 0xff ); | |
} | |
substr( $pwarray, 2 * $i, 1 ) = chr( 0x80 ); | |
substr( $pwarray, 56, 1 ) = chr( ( $i << 4 ) & 0xff ); | |
$md5->add( $pwarray ); | |
my $mdContext1 = md5state( $md5 ); | |
my $offset = 0; | |
my $keyoffset = 0; | |
my $tocopy = 5; | |
$md5->reset; | |
while ( $offset != 16 ) { | |
if ( ( 64 - $offset ) < 5 ) { | |
$tocopy = 64 - $offset; | |
} | |
substr( $pwarray, $offset, $tocopy ) = | |
substr( $mdContext1, $keyoffset, $tocopy ); | |
$offset += $tocopy; | |
if ( $offset == 64 ) { | |
$md5->add( $pwarray ); | |
$keyoffset = $tocopy; | |
$tocopy = 5 - $tocopy; | |
$offset = 0; | |
next; | |
} | |
$keyoffset = 0; | |
$tocopy = 5; | |
substr( $pwarray, $offset, 16 ) = $docid; | |
$offset += 16; | |
} | |
substr( $pwarray, 16, 1 ) = "\x80"; | |
substr( $pwarray, 17, 47 ) = "\0" x 47; | |
substr( $pwarray, 56, 1 ) = "\x80"; | |
substr( $pwarray, 57, 1 ) = "\x0a"; | |
$md5->add( $pwarray ); | |
${$valContext} = md5state( $md5 ); | |
my $key; | |
MakeKey( 0, \$key, ${$valContext} ); | |
my $salt = $key->RC4( $salt_data ); | |
my $hashedsalt = $key->RC4( $hashedsalt_data ); | |
$salt .= "\x80" . "\0" x 47; | |
substr( $salt, 56, 1 ) = "\x80"; | |
$md5->reset; | |
$md5->add( $salt ); | |
my $mdContext2 = md5state( $md5 ); | |
return ( $mdContext2 eq $hashedsalt ); | |
} | |
sub SkipBytes { | |
my ( $q, $start, $count ) = @_; | |
my $scratch = "\0" x REKEY_BLOCK; | |
my $block; | |
$block = int( ( $start + $count ) / REKEY_BLOCK ); | |
if ( $block != $q->{block} ) { | |
MakeKey( $q->{block} = $block, \$q->{rc4_key}, $q->{md5_ctxt} ); | |
$count = ( $start + $count ) % REKEY_BLOCK; | |
} | |
$q->{rc4_key}->RC4( substr( $scratch, 0, $count ) ); | |
return 1; | |
} | |
sub SetDecrypt { | |
my ( $q, $version, $password ) = @_; | |
if ( $q->{opcode} != 0x2f ) { | |
return 0; | |
} | |
if ( $password eq '' ) { | |
return 0; | |
} | |
# TODO old versions decryption | |
#if (version < MS_BIFF_V8 || q->data[0] == 0) | |
# return ms_biff_pre_biff8_query_set_decrypt (q, password); | |
if ( $q->{length} != sizeof_BIFF_8_FILEPASS ) { | |
return 0; | |
} | |
unless ( | |
VerifyPassword( | |
$password, | |
substr( $q->{data}, 6, 16 ), | |
substr( $q->{data}, 22, 16 ), | |
substr( $q->{data}, 38, 16 ), | |
\$q->{md5_ctxt} | |
) | |
) | |
{ | |
return 0; | |
} | |
$q->{encryption} = MS_BIFF_CRYPTO_RC4; | |
$q->{block} = -1; | |
# The first record after FILEPASS seems to be unencrypted | |
$q->{dont_decrypt_next_record} = 1; | |
# Pretend to decrypt the entire stream up till this point, it was | |
# encrypted, but do it anyway to keep the rc4 state in sync | |
SkipBytes( $q, 0, $q->{streamPos} ); | |
return 1; | |
} | |
sub InitStream { | |
my ( $stream_data ) = @_; | |
my %q; | |
$q{opcode} = 0; | |
$q{length} = 0; | |
$q{data} = ''; | |
$q{stream} = $stream_data; # data stream | |
$q{streamLen} = length( $stream_data ); # stream length | |
$q{streamPos} = 0; # stream position | |
$q{encryption} = 0; | |
$q{xor_key} = ''; | |
$q{rc4_key} = ''; | |
$q{md5_ctxt} = ''; | |
$q{block} = 0; | |
$q{dont_decrypt_next_record} = 0; | |
return \%q; | |
} | |
sub QueryNext { | |
my ( $q ) = @_; | |
if ( $q->{streamPos} + 4 >= $q->{streamLen} ) { | |
return 0; | |
} | |
my $data = substr( $q->{stream}, $q->{streamPos}, 4 ); | |
( $q->{opcode}, $q->{length} ) = unpack( 'v2', $data ); | |
# No biff record should be larger than around 20,000. | |
if ( $q->{length} >= 20000 ) { | |
return 0; | |
} | |
if ( $q->{length} > 0 ) { | |
$q->{data} = substr( $q->{stream}, $q->{streamPos} + 4, $q->{length} ); | |
} | |
else { | |
$q->{data} = undef; | |
$q->{dont_decrypt_next_record} = 1; | |
} | |
if ( $q->{encryption} == MS_BIFF_CRYPTO_RC4 ) { | |
if ( $q->{dont_decrypt_next_record} ) { | |
SkipBytes( $q, $q->{streamPos}, 4 + $q->{length} ); | |
$q->{dont_decrypt_next_record} = 0; | |
} | |
else { | |
my $pos = $q->{streamPos}; | |
my $data = $q->{data}; | |
my $len = $q->{length}; | |
my $res = ''; | |
# Pretend to decrypt header. | |
SkipBytes( $q, $pos, 4 ); | |
$pos += 4; | |
while ( $q->{block} != int( ( $pos + $len ) / REKEY_BLOCK ) ) { | |
my $step = REKEY_BLOCK - ( $pos % REKEY_BLOCK ); | |
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $step ) ); | |
$data = substr( $data, $step ); | |
$pos += $step; | |
$len -= $step; | |
MakeKey( ++$q->{block}, \$q->{rc4_key}, $q->{md5_ctxt} ); | |
} | |
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $len ) ); | |
$q->{data} = $res; | |
} | |
} | |
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_XOR ) { | |
# not implemented | |
return 0; | |
} | |
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_NONE ) { | |
} | |
$q->{streamPos} += 4 + $q->{length}; | |
return 1; | |
} | |
############################################################################### | |
# | |
# Parse() | |
# | |
# Parse the Excel file and convert it into a tree of objects.. | |
# | |
sub parse { | |
my ( $self, $source, $formatter ) = @_; | |
my $workbook = Spreadsheet::ParseExcel::Workbook->new(); | |
$currentbook = $workbook; | |
$workbook->{SheetCount} = 0; | |
$workbook->{CellHandler} = $self->{CellHandler}; | |
$workbook->{NotSetCell} = $self->{NotSetCell}; | |
$workbook->{Object} = $self->{Object}; | |
$workbook->{aColor} = [ @aColor ]; | |
my ( $biff_data, $data_length ) = $self->_get_content( $source, $workbook ); | |
return undef if not $biff_data; | |
if ( $formatter ) { | |
$workbook->{FmtClass} = $formatter; | |
} | |
else { | |
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new(); | |
} | |
# Parse the BIFF data. | |
my $stream = InitStream( $biff_data ); | |
while ( QueryNext( $stream ) ) { | |
my $record = $stream->{opcode}; | |
my $record_length = $stream->{length}; | |
my $record_header = $stream->{data}; | |
# If the file contains a FILEPASS record we assume that it is encrypted | |
# and cannot be parsed. | |
if ( $record == 0x002F ) { | |
unless ( SetDecrypt( $stream, '', $self->{Password} ) ) { | |
$self->{_error_status} = ErrorFileEncrypted; | |
return undef; | |
} | |
} | |
# Special case of a formula String with no string. | |
if ( $workbook->{_PrevPos} | |
&& ( defined $self->{FuncTbl}->{$record} ) | |
&& ( $record != 0x207 ) ) | |
{ | |
my $iPos = $workbook->{_PrevPos}; | |
$workbook->{_PrevPos} = undef; | |
my ( $row, $col, $format_index ) = @$iPos; | |
_NewCell( | |
$workbook, $row, $col, | |
Kind => 'Formula String', | |
Val => '', | |
FormatNo => $format_index, | |
Format => $workbook->{Format}[$format_index], | |
Numeric => 0, | |
Code => undef, | |
Book => $workbook, | |
); | |
} | |
# If the BIFF record matches 0x0*09 then it is a BOF record. | |
# We reset the _skip_chart flag to ensure we check the sheet type. | |
if ( ( $record & 0xF0FF ) == 0x09 ) { | |
$workbook->{_skip_chart} = 0; | |
} | |
if ( defined $self->{FuncTbl}->{$record} && !$workbook->{_skip_chart} ) | |
{ | |
$self->{FuncTbl}->{$record} | |
->( $workbook, $record, $record_length, $record_header ); | |
} | |
$PREFUNC = $record if ( $record != 0x3C ); #Not Continue | |
last if defined $workbook->{_ParseAbort}; | |
} | |
foreach my $worksheet (@{$workbook->{Worksheet}} ) { | |
# Install hyperlinks into each cell | |
# Range is undocumented for user; allows reuse of data | |
if ($worksheet->{HyperLinks}) { | |
foreach my $link (@{$worksheet->{HyperLinks}}) { | |
for( my $row = $link->[3]; $row <= $link->[4]; $row++ ) { | |
for( my $col = $link->[5]; $col <= $link->[6]; $col++ ) { | |
$worksheet->{Cells}[$row][$col]{Hyperlink} = $link; | |
} | |
} | |
} | |
} | |
} | |
return $workbook; | |
} | |
############################################################################### | |
# | |
# _get_content() | |
# | |
# Get the Excel BIFF content from the file or filehandle. | |
# | |
sub _get_content { | |
my ( $self, $source, $workbook ) = @_; | |
my ( $biff_data, $data_length ); | |
# Reset the error status in case method is called more than once. | |
$self->{_error_status} = ErrorNone; | |
my $ref = ref($source); | |
if ( $ref ) { | |
if ( $ref eq 'SCALAR' ) { | |
# Specified by a scalar buffer. | |
( $biff_data, $data_length ) = $self->{GetContent}->( $source ); | |
} | |
elsif ( $ref eq 'ARRAY' ) { | |
# Specified by file content | |
$workbook->{File} = undef; | |
my $sData = join( '', @$source ); | |
( $biff_data, $data_length ) = $self->{GetContent}->( \$sData ); | |
} | |
else { | |
# Assume filehandle | |
# For CGI.pm (Light FileHandle) | |
my $sBuff = ''; | |
if ( eval { binmode( $source ) } ) { | |
my $sWk; | |
while ( read( $source, $sWk, 4096 ) ) { | |
$sBuff .= $sWk; | |
} | |
} | |
else { | |
# Assume IO::Wrap or some other filehandle-like OO-only object | |
my $sWk; | |
# IO::Wrap does not implement binmode | |
eval { $source->binmode() }; | |
while ( $source->read( $sWk, 4096 ) ) { | |
$sBuff .= $sWk; | |
} | |
} | |
( $biff_data, $data_length ) = $self->{GetContent}->( \$sBuff ); | |
} | |
} | |
else { | |
# Specified by filename . | |
$workbook->{File} = $source; | |
if ( !-e $source ) { | |
$self->{_error_status} = ErrorNoFile; | |
return undef; | |
} | |
( $biff_data, $data_length ) = $self->{GetContent}->( $source ); | |
} | |
# If the read was successful return the data. | |
if ( $data_length ) { | |
return ( $biff_data, $data_length ); | |
} | |
else { | |
$self->{_error_status} = ErrorNoExcelData; | |
return undef; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subGetContent (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _subGetContent { | |
my ( $sFile ) = @_; | |
my $oOl = OLE::Storage_Lite->new( $sFile ); | |
return ( undef, undef ) unless ( $oOl ); | |
my @aRes = $oOl->getPpsSearch( | |
[ | |
OLE::Storage_Lite::Asc2Ucs( 'Book' ), | |
OLE::Storage_Lite::Asc2Ucs( 'Workbook' ) | |
], | |
1, 1 | |
); | |
return ( undef, undef ) if ( $#aRes < 0 ); | |
#Hack from Herbert | |
if ( $aRes[0]->{Data} ) { | |
return ( $aRes[0]->{Data}, length( $aRes[0]->{Data} ) ); | |
} | |
#Same as OLE::Storage_Lite | |
my $oIo; | |
#1. $sFile is Ref of scalar | |
if ( ref( $sFile ) eq 'SCALAR' ) { | |
if ( $_use_perlio ) { | |
open $oIo, "<", \$sFile; | |
} | |
else { | |
$oIo = IO::Scalar->new; | |
$oIo->open( $sFile ); | |
} | |
} | |
#2. $sFile is a IO::Handle object | |
elsif ( UNIVERSAL::isa( $sFile, 'IO::Handle' ) ) { | |
$oIo = $sFile; | |
binmode( $oIo ); | |
} | |
#3. $sFile is a simple filename string | |
elsif ( !ref( $sFile ) ) { | |
$oIo = IO::File->new; | |
$oIo->open( "<$sFile" ) || return undef; | |
binmode( $oIo ); | |
} | |
my $sWk; | |
my $sBuff = ''; | |
while ( $oIo->read( $sWk, 4096 ) ) { #4_096 has no special meanings | |
$sBuff .= $sWk; | |
} | |
$oIo->close(); | |
#Not Excel file (simple method) | |
return ( undef, undef ) if ( substr( $sBuff, 0, 1 ) ne "\x09" ); | |
return ( $sBuff, length( $sBuff ) ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBOF (for Spreadsheet::ParseExcel) Developers' Kit : P303 | |
#------------------------------------------------------------------------------ | |
sub _subBOF { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iVer, $iDt ) = unpack( "v2", $sWk ); | |
#Workbook Global | |
if ( $iDt == 0x0005 ) { | |
$oBook->{Version} = unpack( "v", $sWk ); | |
$oBook->{BIFFVersion} = | |
( $oBook->{Version} == verExcel95 ) ? verBIFF5 : verBIFF8; | |
$oBook->{_CurSheet} = undef; | |
$oBook->{_CurSheet_} = -1; | |
} | |
#Worksheet or Dialogsheet | |
elsif ( $iDt != 0x0020 ) { #if($iDt == 0x0010) | |
if ( defined $oBook->{_CurSheet_} ) { | |
$oBook->{_CurSheet} = $oBook->{_CurSheet_} + 1; | |
$oBook->{_CurSheet_}++; | |
( | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetVersion}, | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetType}, | |
) | |
= unpack( "v2", $sWk ) | |
if ( length( $sWk ) > 4 ); | |
} | |
else { | |
$oBook->{BIFFVersion} = int( $bOp / 0x100 ); | |
if ( ( $oBook->{BIFFVersion} == verBIFF2 ) | |
|| ( $oBook->{BIFFVersion} == verBIFF3 ) | |
|| ( $oBook->{BIFFVersion} == verBIFF4 ) ) | |
{ | |
$oBook->{Version} = $oBook->{BIFFVersion}; | |
$oBook->{_CurSheet} = 0; | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = | |
Spreadsheet::ParseExcel::Worksheet->new( | |
_Name => '', | |
Name => '', | |
_Book => $oBook, | |
_SheetNo => $oBook->{SheetCount}, | |
); | |
$oBook->{SheetCount}++; | |
} | |
} | |
} | |
else { | |
# Set flag to ignore all chart records until we reach another BOF. | |
$oBook->{_skip_chart} = 1; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subBlank (for Spreadsheet::ParseExcel) DK:P303 | |
#------------------------------------------------------------------------------ | |
sub _subBlank { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'BLANK', | |
Val => '', | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subInteger (for Spreadsheet::ParseExcel) Not in DK | |
#------------------------------------------------------------------------------ | |
sub _subInteger { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF, $sTxt, $sDum ); | |
( $iR, $iC, $iF, $sDum, $sTxt ) = unpack( "v3cv", $sWk ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'INTEGER', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subNumber (for Spreadsheet::ParseExcel) : DK: P354 | |
#------------------------------------------------------------------------------ | |
sub _subNumber { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my $dVal = _convDval( substr( $sWk, 6, 8 ) ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Number', | |
Val => $dVal, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 1, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _convDval (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _convDval { | |
my ( $sWk ) = @_; | |
return | |
unpack( "d", | |
( $BIGENDIAN ) ? pack( "c8", reverse( unpack( "c8", $sWk ) ) ) : $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subRString (for Spreadsheet::ParseExcel) DK:P405 | |
#------------------------------------------------------------------------------ | |
sub _subRString { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF, $iL, $sTxt ); | |
( $iR, $iC, $iF, $iL ) = unpack( "v4", $sWk ); | |
$sTxt = substr( $sWk, 8, $iL ); | |
#Has STRUN | |
if ( length( $sWk ) > ( 8 + $iL ) ) { | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'RString', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => '_native_', #undef, | |
Book => $oBook, | |
Rich => substr( $sWk, ( 8 + $iL ) + 1 ), | |
); | |
} | |
else { | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'RString', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => '_native_', | |
Book => $oBook, | |
); | |
} | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBoolErr (for Spreadsheet::ParseExcel) DK:P306 | |
#------------------------------------------------------------------------------ | |
sub _subBoolErr { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my ( $iVal, $iFlg ) = unpack( "cc", substr( $sWk, 6, 2 ) ); | |
my $sTxt = DecodeBoolErr( $iVal, $iFlg ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'BoolError', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
############################################################################### | |
# | |
# _subRK() | |
# | |
# Decode the RK BIFF record. | |
# | |
sub _subRK { | |
my ( $workbook, $biff_number, $length, $data ) = @_; | |
my ( $row, $col, $format_index, $rk_number ) = unpack( 'vvvV', $data ); | |
my $number = _decode_rk_number( $rk_number ); | |
_NewCell( | |
$workbook, $row, $col, | |
Kind => 'RK', | |
Val => $number, | |
FormatNo => $format_index, | |
Format => $workbook->{Format}->[$format_index], | |
Numeric => 1, | |
Code => undef, | |
Book => $workbook, | |
); | |
# Store the max and min row/col values. | |
_SetDimension( $workbook, $row, $col, $col ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subArray (for Spreadsheet::ParseExcel) DK:P297 | |
#------------------------------------------------------------------------------ | |
sub _subArray { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iBR, $iER, $iBC, $iEC ) = unpack( "v2c2", $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subFormula (for Spreadsheet::ParseExcel) DK:P336 | |
#------------------------------------------------------------------------------ | |
sub _subFormula { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my ( $iFlg ) = unpack( "v", substr( $sWk, 12, 2 ) ); | |
if ( $iFlg == 0xFFFF ) { | |
my ( $iKind ) = unpack( "c", substr( $sWk, 6, 1 ) ); | |
my ( $iVal ) = unpack( "c", substr( $sWk, 8, 1 ) ); | |
if ( ( $iKind == 1 ) or ( $iKind == 2 ) ) { | |
my $sTxt = | |
( $iKind == 1 ) | |
? DecodeBoolErr( $iVal, 0 ) | |
: DecodeBoolErr( $iVal, 1 ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Formula Bool', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
} | |
else { # Result (Reserve Only) | |
$oBook->{_PrevPos} = [ $iR, $iC, $iF ]; | |
} | |
} | |
else { | |
my $dVal = _convDval( substr( $sWk, 6, 8 ) ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Formula Number', | |
Val => $dVal, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 1, | |
Code => undef, | |
Book => $oBook, | |
); | |
} | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subString (for Spreadsheet::ParseExcel) DK:P414 | |
#------------------------------------------------------------------------------ | |
sub _subString { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
#Position (not enough for ARRAY) | |
my $iPos = $oBook->{_PrevPos}; | |
return undef unless ( $iPos ); | |
$oBook->{_PrevPos} = undef; | |
my ( $iR, $iC, $iF ) = @$iPos; | |
my ( $iLen, $sTxt, $sCode ); | |
if ( $oBook->{BIFFVersion} == verBIFF8 ) { | |
my ( $raBuff, $iLen ) = _convBIFF8String( $oBook, $sWk, 1 ); | |
$sTxt = $raBuff->[0]; | |
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef; | |
} | |
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) { | |
$sCode = '_native_'; | |
$iLen = unpack( "v", $sWk ); | |
$sTxt = substr( $sWk, 2, $iLen ); | |
} | |
else { | |
$sCode = '_native_'; | |
$iLen = unpack( "c", $sWk ); | |
$sTxt = substr( $sWk, 1, $iLen ); | |
} | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'String', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => $sCode, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subLabel (for Spreadsheet::ParseExcel) DK:P344 | |
#------------------------------------------------------------------------------ | |
sub _subLabel { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my ( $sLbl, $sCode ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
my ( $raBuff, $iLen, $iStPos, $iLenS ) = | |
_convBIFF8String( $oBook, substr( $sWk, 6 ), 1 ); | |
$sLbl = $raBuff->[0]; | |
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef; | |
} | |
#Before BIFF8 | |
else { | |
$sLbl = substr( $sWk, 8 ); | |
$sCode = '_native_'; | |
} | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Label', | |
Val => $sLbl, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => $sCode, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
############################################################################### | |
# | |
# _subMulRK() | |
# | |
# Decode the Multiple RK BIFF record. | |
# | |
sub _subMulRK { | |
my ( $workbook, $biff_number, $length, $data ) = @_; | |
# JMN: I don't know why this is here. | |
return if $workbook->{SheetCount} <= 0; | |
my ( $row, $first_col ) = unpack( "v2", $data ); | |
my $last_col = unpack( "v", substr( $data, length( $data ) - 2, 2 ) ); | |
# Iterate over the RK array and decode the data. | |
my $pos = 4; | |
for my $col ( $first_col .. $last_col ) { | |
my $data = substr( $data, $pos, 6 ); | |
my ( $format_index, $rk_number ) = unpack 'vV', $data; | |
my $number = _decode_rk_number( $rk_number ); | |
_NewCell( | |
$workbook, $row, $col, | |
Kind => 'MulRK', | |
Val => $number, | |
FormatNo => $format_index, | |
Format => $workbook->{Format}->[$format_index], | |
Numeric => 1, | |
Code => undef, | |
Book => $workbook, | |
); | |
$pos += 6; | |
} | |
# Store the max and min row/col values. | |
_SetDimension( $workbook, $row, $first_col, $last_col ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subMulBlank (for Spreadsheet::ParseExcel) DK:P349 | |
#------------------------------------------------------------------------------ | |
sub _subMulBlank { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iSc ) = unpack( "v2", $sWk ); | |
my $iEc = unpack( "v", substr( $sWk, length( $sWk ) - 2, 2 ) ); | |
my $iPos = 4; | |
for ( my $iC = $iSc ; $iC <= $iEc ; $iC++ ) { | |
my $iF = unpack( 'v', substr( $sWk, $iPos, 2 ) ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'MulBlank', | |
Val => '', | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
$iPos += 2; | |
} | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iSc, $iEc ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subLabelSST (for Spreadsheet::ParseExcel) DK: P345 | |
#------------------------------------------------------------------------------ | |
sub _subLabelSST { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF, $iIdx ) = unpack( 'v3V', $sWk ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'PackedIdx', | |
Val => $oBook->{PkgStr}[$iIdx]->{Text}, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => ( $oBook->{PkgStr}[$iIdx]->{Unicode} ) ? 'ucs2' : undef, | |
Book => $oBook, | |
Rich => $oBook->{PkgStr}[$iIdx]->{Rich}, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subFlg1904 (for Spreadsheet::ParseExcel) DK:P296 | |
#------------------------------------------------------------------------------ | |
sub _subFlg1904 { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
$oBook->{Flg1904} = unpack( "v", $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subRow (for Spreadsheet::ParseExcel) DK:P403 | |
#------------------------------------------------------------------------------ | |
sub _subRow { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#0. Get Worksheet info (MaxRow, MaxCol, MinRow, MinCol) | |
my ( $iR, $iSc, $iEc, $iHght, $undef1, $undef2, $iGr, $iXf ) = | |
unpack( "v8", $sWk ); | |
$iEc--; | |
if ( $iGr & 0x20 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHidden}[$iR] = 1; | |
} | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHeight}[$iR] = $iHght / 20; | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iSc, $iEc ); | |
} | |
#------------------------------------------------------------------------------ | |
# _SetDimension (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _SetDimension { | |
my ( $oBook, $iR, $iSc, $iEc ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
#2.1 MinRow | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} = $iR | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} <= $iR ); | |
#2.2 MaxRow | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} = $iR | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} > $iR ); | |
#2.3 MinCol | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} = $iSc | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} <= $iSc ); | |
#2.4 MaxCol | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} = $iEc | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} > $iEc ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subDefaultRowHeight (for Spreadsheet::ParseExcel) DK: P318 | |
#------------------------------------------------------------------------------ | |
sub _subDefaultRowHeight { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#1. RowHeight | |
my ( $iDum, $iHght ) = unpack( "v2", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{DefRowHeight} = $iHght / 20; | |
} | |
#------------------------------------------------------------------------------ | |
# _subStandardWidth(for Spreadsheet::ParseExcel) DK:P413 | |
#------------------------------------------------------------------------------ | |
sub _subStandardWidth { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my $iW = unpack( "v", $sWk ); | |
$oBook->{StandardWidth} = _convert_col_width( $oBook, $iW ); | |
} | |
############################################################################### | |
# | |
# _subDefColWidth() | |
# | |
# Read the DEFCOLWIDTH Biff record. This gives the width in terms of chars | |
# and is different from the width in the COLINFO record. | |
# | |
sub _subDefColWidth { | |
my ( $self, $record, $length, $data ) = @_; | |
my $width = unpack 'v', $data; | |
# Adjustment for default Arial 10 width. | |
$width = 8.43 if $width == 8; | |
$self->{Worksheet}->[ $self->{_CurSheet} ]->{DefColWidth} = $width; | |
} | |
############################################################################### | |
# | |
# _convert_col_width() | |
# | |
# Converts from the internal Excel column width units to user units seen in the | |
# interface. It is first necessary to convert the internal width to pixels and | |
# then to user units. The conversion is specific to a default font of Arial 10. | |
# TODO, the conversion should be extended to other fonts and sizes. | |
# | |
sub _convert_col_width { | |
my $self = shift; | |
my $excel_width = shift; | |
# Convert from Excel units to pixels (rounded up). | |
my $pixels = int( 0.5 + $excel_width * 7 / 256 ); | |
# Convert from pixels to user units. | |
# The conversion is different for columns <= 1 user unit (12 pixels). | |
my $user_width; | |
if ( $pixels <= 12 ) { | |
$user_width = $pixels / 12; | |
} | |
else { | |
$user_width = ( $pixels - 5 ) / 7; | |
} | |
# Round up to 2 decimal places. | |
$user_width = int( $user_width * 100 + 0.5 ) / 100; | |
return $user_width; | |
} | |
#------------------------------------------------------------------------------ | |
# _subColInfo (for Spreadsheet::ParseExcel) DK:P309 | |
#------------------------------------------------------------------------------ | |
sub _subColInfo { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless defined $oBook->{_CurSheet}; | |
my ( $iSc, $iEc, $iW, $iXF, $iGr ) = unpack( "v5", $sWk ); | |
for ( my $i = $iSc ; $i <= $iEc ; $i++ ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColWidth}[$i] = | |
_convert_col_width( $oBook, $iW ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColFmtNo}[$i] = $iXF; | |
if ( $iGr & 0x01 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColHidden}[$i] = 1; | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subWindow1 Window information P 273 | |
#------------------------------------------------------------------------------ | |
sub _subWindow1 { | |
my ( $workbook, $op, $len, $wk ) = @_; | |
return if ( $workbook->{BIFFVersion} <= verBIFF4() ); | |
my ( | |
$hpos, $vpos, $width, | |
$height, $options, $active, | |
$firsttab, $numselected, $tabbarwidth | |
) = unpack( "v9", $wk ); | |
$workbook->{ActiveSheet} = $active; | |
} | |
#------------------------------------------------------------------------------ | |
# _subSheetLayout OpenOffice 5.96 (P207) | |
#------------------------------------------------------------------------------ | |
sub _subSheetLayout { | |
my ( $workbook, $op, $len, $wk ) = @_; | |
my @unused; | |
( | |
my $rc, | |
@unused[ 1 .. 10 ], | |
@unused[ 11 .. 14 ], | |
my $color, @unused[ 15, 16 ] | |
) = unpack( "vC10C4vC2", $wk ); | |
return unless ( $rc == 0x0862 ); | |
$workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{TabColor} = $color; | |
} | |
#------------------------------------------------------------------------------ | |
# _subHyperlink OpenOffice 5.96 (P182) | |
# | |
# Also see: http://msdn.microsoft.com/en-us/library/gg615407(v=office.14).aspx | |
#------------------------------------------------------------------------------ | |
# Helper: Extract a GID, returns as text string | |
sub _getguid { | |
my( $wk ) = @_; | |
my( $text, $guidl, $guids1, $guids2, @guidb ); | |
( $guidl, $guids1, $guids2, @guidb[0..7] ) = unpack( 'Vv2C8', $wk ); | |
$text = sprintf( '%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X', $guidl, $guids1, $guids2, @guidb); | |
return $text; | |
} | |
# Helper: Extract a counted (16-bit) unicode string, returns string, | |
# updates $offset | |
# $zterm == 1 if string is null-terminated. | |
# $bc if length is in bytes (not chars) | |
sub _getustr { | |
my( $wk, $offset, $zterm, $bc ) = @_; | |
my $len = unpack( 'V', substr( $wk, $offset ) ); | |
$offset += 4; | |
if( $bc ) { | |
$len /= 2; | |
} | |
$len -= $zterm; | |
my $text = join( '', map { chr $_ } unpack( "v$len", substr( $wk, $offset ) ) ); | |
$text =~ s/\0.*\z// if( $zterm ); | |
$_[1] = ( $offset += ($len + $zterm) *2 ); | |
return $text; | |
} | |
# HYPERLINK record | |
sub _subHyperlink { | |
my ( $workbook, $op, $len, $wk ) = @_; | |
# REF | |
my( $srow, $erow, $scol, $ecol ) = unpack( 'v4', $wk ); | |
my $guid = _getguid( substr( $wk, 8 ) ); | |
return unless( $guid eq '79EAC9D0-BAF9-11CE-8C82-00AA004BA90B' ); | |
my( $stmvers, $flags ) = unpack( 'VV', substr( $wk, 24 ) ); | |
return if( $flags & 0x60 || $stmvers != 2 ); | |
my $offset = 32; | |
my( $desc,$frame, $link, $mark ); | |
if( ($flags & 0x14) == 0x14 ) { | |
$desc = _getustr( $wk, $offset, 1, 0 ); | |
} | |
if( $flags & 0x80 ) { | |
$frame = _getustr( $wk, $offset, 1, 0 ); | |
} | |
$link = ''; | |
if( $flags & 0x100 ) { | |
# UNC path | |
$link = 'file:///' . _getustr( $wk, $offset, 1, 0 ); | |
} elsif( $flags & 0x1 ) { | |
# Has link (URI) | |
$guid = _getguid( substr( $wk, $offset ) ); | |
$offset += 16; | |
if( $guid eq '79EAC9E0-BAF9-11CE-8C82-00AA004BA90B' ) { | |
# URI | |
$link = _getustr( $wk, $offset, 1, 1 ); | |
} elsif( $guid eq '00000303-0000-0000-C000-000000000046' ) { | |
# Local file | |
$link = 'file:///'; | |
# !($flags & 2) = 'relative path' | |
if( !($flags & 0x2) ) { | |
my $file = $workbook->{File}; | |
if( defined $file && length $file ) { | |
$link .= (fileparse($file))[1]; | |
} | |
else { | |
$link .= '%REL%' | |
} | |
} | |
my $dirn = unpack( 'v', substr( $wk, $offset ) ); | |
$offset += 2; | |
$link .= '..\\' x $dirn; | |
my $namelen = unpack( 'V', substr( $wk, $offset ) ); | |
$offset += 4; | |
my $name = unpack( 'Z*', substr( $wk, $offset ) ); | |
$offset += $namelen; | |
$offset += 24; | |
my $size = unpack( 'V', substr( $wk, $offset ) ); | |
$offset += 4; | |
if( $size ) { | |
my $xlen = unpack( 'V', substr( $wk, $offset ) ) / 2; | |
$name = join( '', map { chr $_} unpack( "v$xlen", substr( $wk, $offset+4+2) ) ); | |
$offset += $size; | |
} | |
$link .= $name; | |
} else { | |
return; | |
} | |
} | |
# Text mark (Fragment identifier) | |
if( $flags & 0x8 ) { | |
# Cellrefs contain reserved characters, so url-encode | |
my $fragment = _getustr( $wk, $offset, 1 ); | |
$fragment =~ s/([^\w.~-])/sprintf( '%%%02X', ord( $1 ) )/gems; | |
$link .= '#' . $fragment; | |
} | |
# Update loop at end of parse() if this changes | |
push @{ $workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{HyperLinks} }, [ | |
$desc, $link, $frame, $srow, $erow, $scol, $ecol ]; | |
} | |
#------------------------------------------------------------------------------ | |
# _subSST (for Spreadsheet::ParseExcel) DK:P413 | |
#------------------------------------------------------------------------------ | |
sub _subSST { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
_subStrWk( $oBook, substr( $sWk, 8 ) ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subContinue (for Spreadsheet::ParseExcel) DK:P311 | |
#------------------------------------------------------------------------------ | |
sub _subContinue { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
#if(defined $self->{FuncTbl}->{$bOp}) { | |
# $self->{FuncTbl}->{$PREFUNC}->($oBook, $bOp, $bLen, $sWk); | |
#} | |
_subStrWk( $oBook, $sWk, 1 ) if ( $PREFUNC == 0xFC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subWriteAccess (for Spreadsheet::ParseExcel) DK:P451 | |
#------------------------------------------------------------------------------ | |
sub _subWriteAccess { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return if ( defined $oBook->{_Author} ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
$oBook->{Author} = _convBIFF8String( $oBook, $sWk ); | |
} | |
#Before BIFF8 | |
else { | |
my ( $iLen ) = unpack( "c", $sWk ); | |
$oBook->{Author} = | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' ); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _convBIFF8String (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _convBIFF8String { | |
my ( $oBook, $sWk, $iCnvFlg ) = @_; | |
my ( $iLen, $iFlg ) = unpack( "vc", $sWk ); | |
my ( $iHigh, $iExt, $iRich ) = ( $iFlg & 0x01, $iFlg & 0x04, $iFlg & 0x08 ); | |
my ( $iStPos, $iExtCnt, $iRichCnt, $sStr ); | |
#2. Rich and Ext | |
if ( $iRich && $iExt ) { | |
$iStPos = 9; | |
( $iRichCnt, $iExtCnt ) = unpack( 'vV', substr( $sWk, 3, 6 ) ); | |
} | |
elsif ( $iRich ) { #Only Rich | |
$iStPos = 5; | |
$iRichCnt = unpack( 'v', substr( $sWk, 3, 2 ) ); | |
$iExtCnt = 0; | |
} | |
elsif ( $iExt ) { #Only Ext | |
$iStPos = 7; | |
$iRichCnt = 0; | |
$iExtCnt = unpack( 'V', substr( $sWk, 3, 4 ) ); | |
} | |
else { #Nothing Special | |
$iStPos = 3; | |
$iExtCnt = 0; | |
$iRichCnt = 0; | |
} | |
#3.Get String | |
if ( $iHigh ) { #Compressed | |
$iLen *= 2; | |
$sStr = substr( $sWk, $iStPos, $iLen ); | |
_SwapForUnicode( \$sStr ); | |
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, 'ucs2' ) | |
unless ( $iCnvFlg ); | |
} | |
else { #Not Compressed | |
$sStr = substr( $sWk, $iStPos, $iLen ); | |
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, undef ) unless ( $iCnvFlg ); | |
} | |
#4. return | |
if ( wantarray ) { | |
#4.1 Get Rich and Ext | |
if ( length( $sWk ) < $iStPos + $iLen + $iRichCnt * 4 + $iExtCnt ) { | |
return ( | |
[ undef, $iHigh, undef, undef ], | |
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt, | |
$iStPos, $iLen | |
); | |
} | |
else { | |
return ( | |
[ | |
$sStr, | |
$iHigh, | |
substr( $sWk, $iStPos + $iLen, $iRichCnt * 4 ), | |
substr( $sWk, $iStPos + $iLen + $iRichCnt * 4, $iExtCnt ) | |
], | |
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt, | |
$iStPos, $iLen | |
); | |
} | |
} | |
else { | |
return $sStr; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subXF (for Spreadsheet::ParseExcel) DK:P453 | |
#------------------------------------------------------------------------------ | |
sub _subXF { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iFnt, $iIdx ); | |
my ( | |
$iLock, $iHidden, $iStyle, $i123, $iAlH, $iWrap, | |
$iAlV, $iJustL, $iRotate, $iInd, $iShrink, $iMerge, | |
$iReadDir, $iBdrD, $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB, | |
$iBdrSD, $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB, $iBdrCD, | |
$iFillP, $iFillCF, $iFillCB | |
); | |
if ( $oBook->{BIFFVersion} == verBIFF4 ) { | |
# Minimal support for Excel 4. We just get the font and format indices | |
# so that the cell data value can be formatted. | |
( $iFnt, $iIdx, ) = unpack( "CC", $sWk ); | |
} | |
elsif ( $oBook->{BIFFVersion} == verBIFF8 ) { | |
my ( $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn ); | |
( $iFnt, $iIdx, $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn ) | |
= unpack( "v7Vv", $sWk ); | |
$iLock = ( $iGen & 0x01 ) ? 1 : 0; | |
$iHidden = ( $iGen & 0x02 ) ? 1 : 0; | |
$iStyle = ( $iGen & 0x04 ) ? 1 : 0; | |
$i123 = ( $iGen & 0x08 ) ? 1 : 0; | |
$iAlH = ( $iAlign & 0x07 ); | |
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0; | |
$iAlV = ( $iAlign & 0x70 ) / 0x10; | |
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0; | |
$iRotate = ( ( $iAlign & 0xFF00 ) / 0x100 ) & 0x00FF; | |
$iRotate = 90 if ( $iRotate == 255 ); | |
$iRotate = 90 - $iRotate if ( $iRotate > 90 ); | |
$iInd = ( $iGen2 & 0x0F ); | |
$iShrink = ( $iGen2 & 0x10 ) ? 1 : 0; | |
$iMerge = ( $iGen2 & 0x20 ) ? 1 : 0; | |
$iReadDir = ( ( $iGen2 & 0xC0 ) / 0x40 ) & 0x03; | |
$iBdrSL = $iBdr1 & 0x0F; | |
$iBdrSR = ( ( $iBdr1 & 0xF0 ) / 0x10 ) & 0x0F; | |
$iBdrST = ( ( $iBdr1 & 0xF00 ) / 0x100 ) & 0x0F; | |
$iBdrSB = ( ( $iBdr1 & 0xF000 ) / 0x1000 ) & 0x0F; | |
$iBdrCL = ( ( $iBdr2 & 0x7F ) ) & 0x7F; | |
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F; | |
$iBdrD = ( ( $iBdr2 & 0xC000 ) / 0x4000 ) & 0x3; | |
$iBdrCT = ( ( $iBdr3 & 0x7F ) ) & 0x7F; | |
$iBdrCB = ( ( $iBdr3 & 0x3F80 ) / 0x80 ) & 0x7F; | |
$iBdrCD = ( ( $iBdr3 & 0x1FC000 ) / 0x4000 ) & 0x7F; | |
$iBdrSD = ( ( $iBdr3 & 0x1E00000 ) / 0x200000 ) & 0xF; | |
$iFillP = ( ( $iBdr3 & 0xFC000000 ) / 0x4000000 ) & 0x3F; | |
$iFillCF = ( $iPtn & 0x7F ); | |
$iFillCB = ( ( $iPtn & 0x3F80 ) / 0x80 ) & 0x7F; | |
} | |
else { | |
my ( $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 ); | |
( $iFnt, $iIdx, $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 ) = | |
unpack( "v8", $sWk ); | |
$iLock = ( $iGen & 0x01 ) ? 1 : 0; | |
$iHidden = ( $iGen & 0x02 ) ? 1 : 0; | |
$iStyle = ( $iGen & 0x04 ) ? 1 : 0; | |
$i123 = ( $iGen & 0x08 ) ? 1 : 0; | |
$iAlH = ( $iAlign & 0x07 ); | |
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0; | |
$iAlV = ( $iAlign & 0x70 ) / 0x10; | |
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0; | |
$iRotate = ( ( $iAlign & 0x300 ) / 0x100 ) & 0x3; | |
$iFillCF = ( $iPtn & 0x7F ); | |
$iFillCB = ( ( $iPtn & 0x1F80 ) / 0x80 ) & 0x7F; | |
$iFillP = ( $iPtn2 & 0x3F ); | |
$iBdrSB = ( ( $iPtn2 & 0x1C0 ) / 0x40 ) & 0x7; | |
$iBdrCB = ( ( $iPtn2 & 0xFE00 ) / 0x200 ) & 0x7F; | |
$iBdrST = ( $iBdr1 & 0x07 ); | |
$iBdrSL = ( ( $iBdr1 & 0x38 ) / 0x8 ) & 0x07; | |
$iBdrSR = ( ( $iBdr1 & 0x1C0 ) / 0x40 ) & 0x07; | |
$iBdrCT = ( ( $iBdr1 & 0xFE00 ) / 0x200 ) & 0x7F; | |
$iBdrCL = ( $iBdr2 & 0x7F ) & 0x7F; | |
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F; | |
} | |
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new( | |
FontNo => $iFnt, | |
Font => $oBook->{Font}[$iFnt], | |
FmtIdx => $iIdx, | |
Lock => $iLock, | |
Hidden => $iHidden, | |
Style => $iStyle, | |
Key123 => $i123, | |
AlignH => $iAlH, | |
Wrap => $iWrap, | |
AlignV => $iAlV, | |
JustLast => $iJustL, | |
Rotate => $iRotate, | |
Indent => $iInd, | |
Shrink => $iShrink, | |
Merge => $iMerge, | |
ReadDir => $iReadDir, | |
BdrStyle => [ $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB ], | |
BdrColor => [ $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB ], | |
BdrDiag => [ $iBdrD, $iBdrSD, $iBdrCD ], | |
Fill => [ $iFillP, $iFillCF, $iFillCB ], | |
); | |
} | |
#------------------------------------------------------------------------------ | |
# _subFormat (for Spreadsheet::ParseExcel) DK: P336 | |
#------------------------------------------------------------------------------ | |
sub _subFormat { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my $sFmt; | |
if ( $oBook->{BIFFVersion} <= verBIFF5 ) { | |
$sFmt = substr( $sWk, 3, unpack( 'c', substr( $sWk, 2, 1 ) ) ); | |
$sFmt = $oBook->{FmtClass}->TextFmt( $sFmt, '_native_' ); | |
} | |
else { | |
$sFmt = _convBIFF8String( $oBook, substr( $sWk, 2 ) ); | |
} | |
my $format_index = unpack( 'v', substr( $sWk, 0, 2 ) ); | |
# Excel 4 and earlier used an index of 0 to indicate that a built-in format | |
# that was stored implicitly. | |
if ( $oBook->{BIFFVersion} <= verBIFF4 && $format_index == 0 ) { | |
$format_index = keys %{ $oBook->{FormatStr} }; | |
} | |
$oBook->{FormatStr}->{$format_index} = $sFmt; | |
} | |
#------------------------------------------------------------------------------ | |
# _subPalette (for Spreadsheet::ParseExcel) DK: P393 | |
#------------------------------------------------------------------------------ | |
sub _subPalette { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
for ( my $i = 0 ; $i < unpack( 'v', $sWk ) ; $i++ ) { | |
# push @aColor, unpack('H6', substr($sWk, $i*4+2)); | |
$oBook->{aColor}[ $i + 8 ] = unpack( 'H6', substr( $sWk, $i * 4 + 2 ) ); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subFont (for Spreadsheet::ParseExcel) DK:P333 | |
#------------------------------------------------------------------------------ | |
sub _subFont { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline, $sFntName ); | |
my ( $bBold, $bItalic, $bUnderline, $bStrikeout ); | |
if ( $oBook->{BIFFVersion} == verBIFF8 ) { | |
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) = | |
unpack( "v5c", $sWk ); | |
my ( $iSize, $iHigh ) = unpack( 'cc', substr( $sWk, 14, 2 ) ); | |
if ( $iHigh ) { | |
$sFntName = substr( $sWk, 16, $iSize * 2 ); | |
_SwapForUnicode( \$sFntName ); | |
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, 'ucs2' ); | |
} | |
else { | |
$sFntName = substr( $sWk, 16, $iSize ); | |
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, '_native_' ); | |
} | |
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0; | |
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0; | |
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0; | |
$bUnderline = ( $iUnderline ) ? 1 : 0; | |
} | |
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) { | |
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) = | |
unpack( "v5c", $sWk ); | |
$sFntName = | |
$oBook->{FmtClass} | |
->TextFmt( substr( $sWk, 15, unpack( "c", substr( $sWk, 14, 1 ) ) ), | |
'_native_' ); | |
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0; | |
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0; | |
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0; | |
$bUnderline = ( $iUnderline ) ? 1 : 0; | |
} | |
else { | |
( $iHeight, $iAttr ) = unpack( "v2", $sWk ); | |
$iCIdx = undef; | |
$iSuper = 0; | |
$bBold = ( $iAttr & 0x01 ) ? 1 : 0; | |
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0; | |
$bUnderline = ( $iAttr & 0x04 ) ? 1 : 0; | |
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0; | |
$sFntName = substr( $sWk, 5, unpack( "c", substr( $sWk, 4, 1 ) ) ); | |
} | |
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new( | |
Height => $iHeight / 20.0, | |
Attr => $iAttr, | |
Color => $iCIdx, | |
Super => $iSuper, | |
UnderlineStyle => $iUnderline, | |
Name => $sFntName, | |
Bold => $bBold, | |
Italic => $bItalic, | |
Underline => $bUnderline, | |
Strikeout => $bStrikeout, | |
); | |
#Skip Font[4] | |
push @{ $oBook->{Font} }, {} if ( scalar( @{ $oBook->{Font} } ) == 4 ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBoundSheet (for Spreadsheet::ParseExcel): DK: P307 | |
#------------------------------------------------------------------------------ | |
sub _subBoundSheet { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iPos, $iGr, $iKind ) = unpack( "Lc2", $sWk ); | |
$iKind &= 0x0F; | |
return if ( ( $iKind != 0x00 ) && ( $iKind != 0x01 ) ); | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
my ( $iSize, $iUni ) = unpack( "cc", substr( $sWk, 6, 2 ) ); | |
my $sWsName = substr( $sWk, 8 ); | |
if ( $iUni & 0x01 ) { | |
_SwapForUnicode( \$sWsName ); | |
$sWsName = $oBook->{FmtClass}->TextFmt( $sWsName, 'ucs2' ); | |
} | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = | |
Spreadsheet::ParseExcel::Worksheet->new( | |
Name => $sWsName, | |
Kind => $iKind, | |
_Pos => $iPos, | |
_Book => $oBook, | |
_SheetNo => $oBook->{SheetCount}, | |
SheetHidden => $iGr & 0x03 | |
); | |
} | |
else { | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = | |
Spreadsheet::ParseExcel::Worksheet->new( | |
Name => | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 7 ), '_native_' ), | |
Kind => $iKind, | |
_Pos => $iPos, | |
_Book => $oBook, | |
_SheetNo => $oBook->{SheetCount}, | |
SheetHidden => $iGr & 0x03 | |
); | |
} | |
$oBook->{SheetCount}++; | |
} | |
#------------------------------------------------------------------------------ | |
# _subHeader (for Spreadsheet::ParseExcel) DK: P340 | |
#------------------------------------------------------------------------------ | |
sub _subHeader { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $sW; | |
if ( !defined $sWk ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = undef; | |
return; | |
} | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
$sW = _convBIFF8String( $oBook, $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = | |
( $sW eq "\x00" ) ? undef : $sW; | |
} | |
#Before BIFF8 | |
else { | |
my ( $iLen ) = unpack( "c", $sWk ); | |
$sW = | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = | |
( $sW eq "\x00\x00\x00" ) ? undef : $sW; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subFooter (for Spreadsheet::ParseExcel) DK: P335 | |
#------------------------------------------------------------------------------ | |
sub _subFooter { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $sW; | |
if ( !defined $sWk ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = undef; | |
return; | |
} | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
$sW = _convBIFF8String( $oBook, $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = | |
( $sW eq "\x00" ) ? undef : $sW; | |
} | |
#Before BIFF8 | |
else { | |
my ( $iLen ) = unpack( "c", $sWk ); | |
$sW = | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = | |
( $sW eq "\x00\x00\x00" ) ? undef : $sW; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subHPageBreak (for Spreadsheet::ParseExcel) DK: P341 | |
#------------------------------------------------------------------------------ | |
sub _subHPageBreak { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my @aBreak; | |
my $iCnt = unpack( "v", $sWk ); | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iRow, $iColB, $iColE ) = | |
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) ); | |
# push @aBreak, [$iRow, $iColB, $iColE]; | |
push @aBreak, $iRow; | |
} | |
} | |
#Before BIFF8 | |
else { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iRow ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) ); | |
push @aBreak, $iRow; | |
# push @aBreak, [$iRow, 0, 255]; | |
} | |
} | |
@aBreak = sort { $a <=> $b } @aBreak; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HPageBreak} = \@aBreak; | |
} | |
#------------------------------------------------------------------------------ | |
# _subVPageBreak (for Spreadsheet::ParseExcel) DK: P447 | |
#------------------------------------------------------------------------------ | |
sub _subVPageBreak { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my @aBreak; | |
my $iCnt = unpack( "v", $sWk ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iCol, $iRowB, $iRowE ) = | |
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) ); | |
push @aBreak, $iCol; | |
# push @aBreak, [$iCol, $iRowB, $iRowE]; | |
} | |
} | |
#Before BIFF8 | |
else { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iCol ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) ); | |
push @aBreak, $iCol; | |
# push @aBreak, [$iCol, 0, 65535]; | |
} | |
} | |
@aBreak = sort { $a <=> $b } @aBreak; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VPageBreak} = \@aBreak; | |
} | |
#------------------------------------------------------------------------------ | |
# _subMargin (for Spreadsheet::ParseExcel) DK: P306, 345, 400, 440 | |
#------------------------------------------------------------------------------ | |
sub _subMargin { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
# The "Mergin" options are a workaround for a backward compatible typo. | |
my $dWk = _convDval( substr( $sWk, 0, 8 ) ); | |
if ( $bOp == 0x26 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMargin} = $dWk; | |
} | |
elsif ( $bOp == 0x27 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMargin} = $dWk; | |
} | |
elsif ( $bOp == 0x28 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMargin} = $dWk; | |
} | |
elsif ( $bOp == 0x29 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMargin} = $dWk; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subHcenter (for Spreadsheet::ParseExcel) DK: P340 | |
#------------------------------------------------------------------------------ | |
sub _subHcenter { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HCenter} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subVcenter (for Spreadsheet::ParseExcel) DK: P447 | |
#------------------------------------------------------------------------------ | |
sub _subVcenter { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VCenter} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subPrintGridlines (for Spreadsheet::ParseExcel) DK: P397 | |
#------------------------------------------------------------------------------ | |
sub _subPrintGridlines { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintGrid} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subPrintHeaders (for Spreadsheet::ParseExcel) DK: P397 | |
#------------------------------------------------------------------------------ | |
sub _subPrintHeaders { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintHeaders} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subSETUP (for Spreadsheet::ParseExcel) DK: P409 | |
#------------------------------------------------------------------------------ | |
sub _subSETUP { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
# Workaround for some apps and older Excels that don't write a | |
# complete SETUP record. | |
return undef if $bLen != 34; | |
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ]; | |
my $iGrBit; | |
( | |
$oWkS->{PaperSize}, $oWkS->{Scale}, $oWkS->{PageStart}, | |
$oWkS->{FitWidth}, $oWkS->{FitHeight}, $iGrBit, | |
$oWkS->{Res}, $oWkS->{VRes}, | |
) = unpack( 'v8', $sWk ); | |
$oWkS->{HeaderMargin} = _convDval( substr( $sWk, 16, 8 ) ); | |
$oWkS->{FooterMargin} = _convDval( substr( $sWk, 24, 8 ) ); | |
$oWkS->{Copis} = unpack( 'v2', substr( $sWk, 32, 2 ) ); | |
$oWkS->{LeftToRight} = ( ( $iGrBit & 0x01 ) ? 1 : 0 ); | |
$oWkS->{Landscape} = ( ( $iGrBit & 0x02 ) ? 1 : 0 ); | |
$oWkS->{NoPls} = ( ( $iGrBit & 0x04 ) ? 1 : 0 ); | |
$oWkS->{NoColor} = ( ( $iGrBit & 0x08 ) ? 1 : 0 ); | |
$oWkS->{Draft} = ( ( $iGrBit & 0x10 ) ? 1 : 0 ); | |
$oWkS->{Notes} = ( ( $iGrBit & 0x20 ) ? 1 : 0 ); | |
$oWkS->{NoOrient} = ( ( $iGrBit & 0x40 ) ? 1 : 0 ); | |
$oWkS->{UsePage} = ( ( $iGrBit & 0x80 ) ? 1 : 0 ); | |
# The NoPls flag indicates that the values have not been taken from an | |
# actual printer and thus may not be accurate. | |
# Set default scale if NoPls otherwise it may be an invalid value of 0XFF. | |
$oWkS->{Scale} = 100 if $oWkS->{NoPls}; | |
# Workaround for a backward compatible typo. | |
$oWkS->{HeaderMergin} = $oWkS->{HeaderMargin}; | |
$oWkS->{FooterMergin} = $oWkS->{FooterMargin}; | |
} | |
#------------------------------------------------------------------------------ | |
# _subName (for Spreadsheet::ParseExcel) DK: P350 | |
#------------------------------------------------------------------------------ | |
sub _subName { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( | |
$iGrBit, $cKey, $cCh, $iCce, $ixAls, | |
$iTab, $cchCust, $cchDsc, $cchHep, $cchStatus | |
) = unpack( 'vc2v3c4', $sWk ); | |
#Builtin Name + Length == 1 | |
if ( ( $iGrBit & 0x20 ) && ( $cCh == 1 ) ) { | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
my $iName = unpack( 'n', substr( $sWk, 14 ) ); | |
my $iSheet = unpack( 'v', substr( $sWk, 8 ) ) - 1; | |
# Workaround for mal-formed Excel workbooks where Print_Title is | |
# set as Global (i.e. itab = 0). Note, this will have to be | |
# treated differently when we get around to handling global names. | |
return undef if $iSheet == -1; | |
if ( $iName == 6 ) { #PrintArea | |
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) ); | |
$oBook->{PrintArea}[$iSheet] = $raArea; | |
} | |
elsif ( $iName == 7 ) { #Title | |
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) ); | |
my @aTtlR = (); | |
my @aTtlC = (); | |
foreach my $raI ( @$raArea ) { | |
if ( $raI->[3] == 0xFF ) { #Row Title | |
push @aTtlR, [ $raI->[0], $raI->[2] ]; | |
} | |
else { #Col Title | |
push @aTtlC, [ $raI->[1], $raI->[3] ]; | |
} | |
} | |
$oBook->{PrintTitle}[$iSheet] = | |
{ Row => \@aTtlR, Column => \@aTtlC }; | |
} | |
} | |
else { | |
my $iName = unpack( 'c', substr( $sWk, 14 ) ); | |
if ( $iName == 6 ) { #PrintArea | |
my ( $iSheet, $raArea ) = | |
_ParseNameArea95( substr( $sWk, 15 ) ); | |
$oBook->{PrintArea}[$iSheet] = $raArea; | |
} | |
elsif ( $iName == 7 ) { #Title | |
my ( $iSheet, $raArea ) = | |
_ParseNameArea95( substr( $sWk, 15 ) ); | |
my @aTtlR = (); | |
my @aTtlC = (); | |
foreach my $raI ( @$raArea ) { | |
if ( $raI->[3] == 0xFF ) { #Row Title | |
push @aTtlR, [ $raI->[0], $raI->[2] ]; | |
} | |
else { #Col Title | |
push @aTtlC, [ $raI->[1], $raI->[3] ]; | |
} | |
} | |
$oBook->{PrintTitle}[$iSheet] = | |
{ Row => \@aTtlR, Column => \@aTtlC }; | |
} | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# ParseNameArea (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d) | |
#------------------------------------------------------------------------------ | |
sub _ParseNameArea { | |
my ( $sObj ) = @_; | |
my ( $iOp ); | |
my @aRes = (); | |
$iOp = unpack( 'C', $sObj ); | |
my $iSheet; | |
if ( $iOp == 0x3b ) { | |
my ( $iWkS, $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v5', substr( $sObj, 1 ) ); | |
$iSheet = $iWkS; | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
elsif ( $iOp == 0x29 ) { | |
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) ); | |
my $iSt = 0; | |
while ( $iSt < $iLen ) { | |
my ( $iOpW, $iWkS, $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'cv5', substr( $sObj, $iSt + 3, 11 ) ); | |
if ( $iOpW == 0x3b ) { | |
$iSheet = $iWkS; | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
if ( $iSt == 0 ) { | |
$iSt += 11; | |
} | |
else { | |
$iSt += 12; #Skip 1 byte; | |
} | |
} | |
} | |
return ( $iSheet, \@aRes ); | |
} | |
#------------------------------------------------------------------------------ | |
# ParseNameArea95 (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d) | |
#------------------------------------------------------------------------------ | |
sub _ParseNameArea95 { | |
my ( $sObj ) = @_; | |
my ( $iOp ); | |
my @aRes = (); | |
$iOp = unpack( 'C', $sObj ); | |
my $iSheet; | |
if ( $iOp == 0x3b ) { | |
$iSheet = unpack( 'v', substr( $sObj, 11, 2 ) ); | |
my ( $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v2C2', substr( $sObj, 15, 6 ) ); | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
elsif ( $iOp == 0x29 ) { | |
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) ); | |
my $iSt = 0; | |
while ( $iSt < $iLen ) { | |
my $iOpW = unpack( 'c', substr( $sObj, $iSt + 3, 6 ) ); | |
$iSheet = unpack( 'v', substr( $sObj, $iSt + 14, 2 ) ); | |
my ( $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v2C2', substr( $sObj, $iSt + 18, 6 ) ); | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ] if ( $iOpW == 0x3b ); | |
if ( $iSt == 0 ) { | |
$iSt += 21; | |
} | |
else { | |
$iSt += 22; #Skip 1 byte; | |
} | |
} | |
} | |
return ( $iSheet, \@aRes ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBOOL (for Spreadsheet::ParseExcel) DK: P452 | |
#------------------------------------------------------------------------------ | |
sub _subWSBOOL { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PageFit} = | |
( ( unpack( 'v', $sWk ) & 0x100 ) ? 1 : 0 ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subMergeArea (for Spreadsheet::ParseExcel) DK: (Not) | |
#------------------------------------------------------------------------------ | |
sub _subMergeArea { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iCnt = unpack( "v", $sWk ); | |
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ]; | |
$oWkS->{MergedArea} = [] unless ( defined $oWkS->{MergedArea} ); | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v4', substr( $sWk, $i * 8 + 2, 8 ) ); | |
for ( my $iR = $iRs ; $iR <= $iRe ; $iR++ ) { | |
for ( my $iC = $iCs ; $iC <= $iCe ; $iC++ ) { | |
$oWkS->{Cells}[$iR][$iC]->{Merged} = 1 | |
if ( defined $oWkS->{Cells}[$iR][$iC] ); | |
} | |
} | |
push @{ $oWkS->{MergedArea} }, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# DecodeBoolErr (for Spreadsheet::ParseExcel) DK: P306 | |
#------------------------------------------------------------------------------ | |
sub DecodeBoolErr { | |
my ( $iVal, $iFlg ) = @_; | |
if ( $iFlg ) { # ERROR | |
if ( $iVal == 0x00 ) { | |
return "#NULL!"; | |
} | |
elsif ( $iVal == 0x07 ) { | |
return "#DIV/0!"; | |
} | |
elsif ( $iVal == 0x0F ) { | |
return "#VALUE!"; | |
} | |
elsif ( $iVal == 0x17 ) { | |
return "#REF!"; | |
} | |
elsif ( $iVal == 0x1D ) { | |
return "#NAME?"; | |
} | |
elsif ( $iVal == 0x24 ) { | |
return "#NUM!"; | |
} | |
elsif ( $iVal == 0x2A ) { | |
return "#N/A!"; | |
} | |
else { | |
return "#ERR"; | |
} | |
} | |
else { | |
return ( $iVal ) ? "TRUE" : "FALSE"; | |
} | |
} | |
############################################################################### | |
# | |
# _decode_rk_number() | |
# | |
# Convert an encoded RK number into a real number. The RK encoding is | |
# explained in some detail in the MS docs. It is a way of storing applicable | |
# ints and doubles in 32bits (30 data + 2 info bits) in order to save space. | |
# | |
sub _decode_rk_number { | |
my $rk_number = shift; | |
my $number; | |
# Check the main RK type. | |
if ( $rk_number & 0x02 ) { | |
# RK Type 2 and 4, a packed integer. | |
# Shift off the info bits. | |
$number = $rk_number >> 2; | |
# Convert from unsigned to signed if required. | |
$number -= 0x40000000 if $number & 0x20000000; | |
} | |
else { | |
# RK Type 1 and 3, a truncated IEEE Double. | |
# Pack the RK number into the high 30 bits of an IEEE double. | |
$number = pack "VV", 0x0000, $rk_number & 0xFFFFFFFC; | |
# Reverse the packed IEEE double on big-endian machines. | |
$number = reverse $number if $BIGENDIAN; | |
# Unpack the number. | |
$number = unpack "d", $number; | |
} | |
# RK Types 3 and 4 were multiplied by 100 prior to encoding. | |
$number /= 100 if $rk_number & 0x01; | |
return $number; | |
} | |
############################################################################### | |
# | |
# _subStrWk() | |
# | |
# Extract the workbook strings from the SST (Shared String Table) record and | |
# any following CONTINUE records. | |
# | |
# The workbook strings are initially contained in the SST block but may also | |
# occupy one or more CONTINUE blocks. Reading the CONTINUE blocks is made a | |
# little tricky by the fact that they can contain an additional initial byte | |
# if a string is continued from a previous block. | |
# | |
# Parsing is further complicated by the fact that the continued section of the | |
# string may have a different encoding (ASCII or UTF-8) from the previous | |
# section. Excel does this to save space. | |
# | |
sub _subStrWk { | |
my ( $self, $biff_data, $is_continue ) = @_; | |
if ( $is_continue ) { | |
# We are reading a CONTINUE record. | |
if ( $self->{_buffer} eq '' ) { | |
# A CONTINUE block with no previous SST. | |
$self->{_buffer} .= $biff_data; | |
} | |
elsif ( !defined $self->{_string_continued} ) { | |
# The CONTINUE block starts with a new (non-continued) string. | |
# Strip the Grbit byte and store the string data. | |
$self->{_buffer} .= substr $biff_data, 1; | |
} | |
else { | |
# A CONTINUE block that starts with a continued string. | |
# The first byte (Grbit) of the CONTINUE record indicates if (0) | |
# the continued string section is single bytes or (1) double bytes. | |
my $grbit = ord $biff_data; | |
my ( $str_position, $str_length ) = @{ $self->{_previous_info} }; | |
my $buff_length = length $self->{_buffer}; | |
if ( $buff_length >= ( $str_position + $str_length ) ) { | |
# Not in a string. | |
$self->{_buffer} .= $biff_data; | |
} | |
elsif ( ( $self->{_string_continued} & 0x01 ) == ( $grbit & 0x01 ) ) | |
{ | |
# Same encoding as the previous block of the string. | |
$self->{_buffer} .= substr( $biff_data, 1 ); | |
} | |
else { | |
# Different encoding to the previous block of the string. | |
if ( $grbit & 0x01 ) { | |
# Current block is UTF-16, previous was ASCII. | |
my ( undef, $cch ) = unpack 'vc', $self->{_buffer}; | |
substr( $self->{_buffer}, 2, 1 ) = pack( 'C', $cch | 0x01 ); | |
# Convert the previous ASCII, single character, portion of | |
# the string into a double character UTF-16 string by | |
# inserting zero bytes. | |
for ( | |
my $i = ( $buff_length - $str_position ) ; | |
$i >= 1 ; | |
$i-- | |
) | |
{ | |
substr( $self->{_buffer}, $str_position + $i, 0 ) = | |
"\x00"; | |
} | |
} | |
else { | |
# Current block is ASCII, previous was UTF-16. | |
# Convert the current ASCII, single character, portion of | |
# the string into a double character UTF-16 string by | |
# inserting null bytes. | |
my $change_length = | |
( $str_position + $str_length ) - $buff_length; | |
# Length of the current CONTINUE record data. | |
my $biff_length = length $biff_data; | |
# Restrict the portion to be changed to the current block | |
# if the string extends over more than one block. | |
if ( $change_length > ( $biff_length - 1 ) * 2 ) { | |
$change_length = ( $biff_length - 1 ) * 2; | |
} | |
# Insert the null bytes. | |
for ( my $i = ( $change_length / 2 ) ; $i >= 1 ; $i-- ) { | |
substr( $biff_data, $i + 1, 0 ) = "\x00"; | |
} | |
} | |
# Strip the Grbit byte and store the string data. | |
$self->{_buffer} .= substr $biff_data, 1; | |
} | |
} | |
} | |
else { | |
# Not a CONTINUE block therefore an SST block. | |
$self->{_buffer} .= $biff_data; | |
} | |
# Reset the state variables. | |
$self->{_string_continued} = undef; | |
$self->{_previous_info} = undef; | |
# Extract out any full strings from the current buffer leaving behind a | |
# partial string that is continued into the next block, or an empty | |
# buffer is no string is continued. | |
while ( length $self->{_buffer} >= 4 ) { | |
my ( $str_info, $length, $str_position, $str_length ) = | |
_convBIFF8String( $self, $self->{_buffer}, 1 ); | |
if ( defined $str_info->[0] ) { | |
push @{ $self->{PkgStr} }, | |
{ | |
Text => $str_info->[0], | |
Unicode => $str_info->[1], | |
Rich => $str_info->[2], | |
Ext => $str_info->[3], | |
}; | |
$self->{_buffer} = substr( $self->{_buffer}, $length ); | |
} | |
else { | |
$self->{_string_continued} = $str_info->[1]; | |
$self->{_previous_info} = [ $str_position, $str_length ]; | |
last; | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _SwapForUnicode (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _SwapForUnicode { | |
my ( $sObj ) = @_; | |
# for(my $i = 0; $i<length($$sObj); $i+=2){ | |
for ( my $i = 0 ; $i < ( int( length( $$sObj ) / 2 ) * 2 ) ; $i += 2 ) { | |
my $sIt = substr( $$sObj, $i, 1 ); | |
substr( $$sObj, $i, 1 ) = substr( $$sObj, $i + 1, 1 ); | |
substr( $$sObj, $i + 1, 1 ) = $sIt; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _NewCell (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _NewCell { | |
my ( $oBook, $iR, $iC, %rhKey ) = @_; | |
my ( $sWk, $iLen ); | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $FmtClass = $oBook->{FmtClass}; | |
$rhKey{Type} = | |
$FmtClass->ChkType( $rhKey{Numeric}, $rhKey{Format}{FmtIdx} ); | |
my $FmtStr = $oBook->{FormatStr}{ $rhKey{Format}{FmtIdx} }; | |
# Set "Date" type if required for numbers in a MulRK BIFF block. | |
if ( defined $FmtStr && $rhKey{Type} eq "Numeric" ) { | |
# Match a range of possible date formats. Note: this isn't important | |
# except for reporting. The number will still be converted to a date | |
# by ExcelFmt() even if 'Type' isn't set to 'Date'. | |
if ( $FmtStr =~ m{^[dmy][-\\/dmy]*$}i ) { | |
$rhKey{Type} = "Date"; | |
} | |
} | |
my $oCell = Spreadsheet::ParseExcel::Cell->new( | |
Val => $rhKey{Val}, | |
FormatNo => $rhKey{FormatNo}, | |
Format => $rhKey{Format}, | |
Code => $rhKey{Code}, | |
Type => $rhKey{Type}, | |
); | |
$oCell->{_Kind} = $rhKey{Kind}; | |
$oCell->{_Value} = $FmtClass->ValFmt( $oCell, $oBook ); | |
if ( $rhKey{Rich} ) { | |
my @aRich = (); | |
my $sRich = $rhKey{Rich}; | |
for ( my $iWk = 0 ; $iWk < length( $sRich ) ; $iWk += 4 ) { | |
my ( $iPos, $iFnt ) = unpack( 'v2', substr( $sRich, $iWk ) ); | |
push @aRich, [ $iPos, $oBook->{Font}[$iFnt] ]; | |
} | |
$oCell->{Rich} = \@aRich; | |
} | |
if ( defined $oBook->{CellHandler} ) { | |
if ( defined $oBook->{Object} ) { | |
no strict; | |
ref( $oBook->{CellHandler} ) eq "CODE" | |
? $oBook->{CellHandler}->( | |
$_Object, $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell | |
) | |
: $oBook->{CellHandler}->callback( $_Object, $oBook, $oBook->{_CurSheet}, | |
$iR, $iC, $oCell ); | |
} | |
else { | |
$oBook->{CellHandler}->( $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell ); | |
} | |
} | |
unless ( $oBook->{NotSetCell} ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Cells}[$iR][$iC] = $oCell; | |
} | |
return $oCell; | |
} | |
#------------------------------------------------------------------------------ | |
# ColorIdxToRGB (for Spreadsheet::ParseExcel) | |
# | |
# Returns for most recently opened book for compatibility, use | |
# Workbook::color_idx_to_rgb instead | |
# | |
#------------------------------------------------------------------------------ | |
sub ColorIdxToRGB { | |
my ( $sPkg, $iIdx ) = @_; | |
unless( defined $currentbook ) { | |
return ( ( defined $aColor[$iIdx] ) ? $aColor[$iIdx] : $aColor[0] ); | |
} | |
return $currentbook->color_idx_to_rgb( $iIdx ); | |
} | |
############################################################################### | |
# | |
# error(). | |
# | |
# Return an error string for a failed parse(). | |
# | |
sub error { | |
my $self = shift; | |
my $parse_error = $self->{_error_status}; | |
if ( exists $error_strings{$parse_error} ) { | |
return $error_strings{$parse_error}; | |
} | |
else { | |
return 'Unknown parse error'; | |
} | |
} | |
############################################################################### | |
# | |
# error_code(). | |
# | |
# Return an error code for a failed parse(). | |
# | |
sub error_code { | |
my $self = shift; | |
return $self->{_error_status}; | |
} | |
############################################################################### | |
# | |
# Mapping between legacy method names and new names. | |
# | |
{ | |
no warnings; # Ignore warnings about variables used only once. | |
*Parse = *parse; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Spreadsheet::ParseExcel - Read information from an Excel file. | |
=head1 SYNOPSIS | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die $parser->error(), ".\n"; | |
} | |
for my $worksheet ( $workbook->worksheets() ) { | |
my ( $row_min, $row_max ) = $worksheet->row_range(); | |
my ( $col_min, $col_max ) = $worksheet->col_range(); | |
for my $row ( $row_min .. $row_max ) { | |
for my $col ( $col_min .. $col_max ) { | |
my $cell = $worksheet->get_cell( $row, $col ); | |
next unless $cell; | |
print "Row, Col = ($row, $col)\n"; | |
print "Value = ", $cell->value(), "\n"; | |
print "Unformatted = ", $cell->unformatted(), "\n"; | |
print "\n"; | |
} | |
} | |
} | |
=head1 DESCRIPTION | |
The Spreadsheet::ParseExcel module can be used to read information from Excel 95-2003 binary files. | |
The module cannot read files in the Excel 2007 Open XML XLSX format. See the L<Spreadsheet::XLSX> module instead. | |
=head1 Parser | |
=head2 new() | |
The C<new()> method is used to create a new C<Spreadsheet::ParseExcel> parser object. | |
my $parser = Spreadsheet::ParseExcel->new(); | |
It is possible to pass a password to decrypt an encrypted file: | |
$parser = Spreadsheet::ParseExcel->new( Password => 'secret' ); | |
Only the default Excel encryption scheme is currently supported. See L</Decryption>. | |
As an advanced feature it is also possible to pass a call-back handler to the parser to control the parsing of the spreadsheet. | |
$parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1, | |
); | |
The call-back can be used to ignore certain cells or to reduce memory usage. See the section L<Reducing the memory usage of Spreadsheet::ParseExcel> for more information. | |
=head2 parse($filename, $formatter) | |
The Parser C<parse()> method returns a L</Workbook> object. | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
If an error occurs C<parse()> returns C<undef>. In general, programs should contain a test for failed parsing as follows: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die $parser->error(), ".\n"; | |
} | |
The C<$filename> parameter is generally the file to be parsed. However, it can also be a filehandle or a scalar reference. | |
The optional C<$formatter> parameter can be an reference to a L</Formatter Class> to format the value of cells. This is useful for parsing workbooks with Unicode or Asian characters: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $formatter = Spreadsheet::ParseExcel::FmtJapan->new(); | |
my $workbook = $parser->parse( 'Book1.xls', $formatter ); | |
The L<Spreadsheet::ParseExcel::FmtJapan> formatter also supports Unicode. If you encounter any encoding problems with the default formatter try that instead. | |
=head2 error() | |
The Parser C<error()> method returns an error string if a C<parse()> fails: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die $parser->error(), ".\n"; | |
} | |
If you wish to generate you own error string you can use the C<error_code()> method instead (see below). The C<error()> and C<error_code()> values are as follows: | |
error() error_code() | |
======= ============ | |
'' 0 | |
'File not found' 1 | |
'No Excel data found in file' 2 | |
'File is encrypted' 3 | |
The C<error_code()> method is explained below. | |
Spreadsheet::ParseExcel will try to decrypt an encrypted Excel file using the default password or a user supplied password passed to C<new()>, see above. If these fail the module will return the C<'File is encrypted'> error. Only the default Excel encryption scheme is currently supported, see L</Decryption>. | |
=head2 error_code() | |
The Parser C<error_code()> method returns an error code if a C<parse()> fails: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die "Got error code ", $parser->error_code, ".\n"; | |
} | |
This can be useful if you wish to employ you own error strings or error handling methods. | |
=head1 Workbook | |
A C<Spreadsheet::ParseExcel::Workbook> is created via the C<Spreadsheet::ParseExcel> C<parse()> method: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
The main methods of the Workbook class are: | |
$workbook->worksheets() | |
$workbook->worksheet() | |
$workbook->worksheet_count() | |
$workbook->get_filename() | |
These more commonly used methods of the Workbook class are outlined below. The other, less commonly used, methods are documented in L<Spreadsheet::ParseExcel::Worksheet>. | |
=head2 worksheets() | |
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook: | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
=head2 worksheet() | |
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
Returns C<undef> if the sheet name or index doesn't exist. | |
=head2 worksheet_count() | |
The C<worksheet_count()> method returns the number of Worksheet objects in the Workbook. | |
my $worksheet_count = $workbook->worksheet_count(); | |
=head2 get_filename() | |
The C<get_filename()> method returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file. | |
my $filename = $workbook->get_filename(); | |
=head2 Other Workbook Methods | |
For full documentation of the methods available via a Workbook object see L<Spreadsheet::ParseExcel::Workbook>. | |
=head1 Worksheet | |
The C<Spreadsheet::ParseExcel::Worksheet> class encapsulates the properties of an Excel worksheet. | |
A Worksheet object is obtained via the L</worksheets()> or L</worksheet()> methods. | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
# Or: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
The most commonly used methods of the Worksheet class are: | |
$worksheet->get_cell() | |
$worksheet->row_range() | |
$worksheet->col_range() | |
$worksheet->get_name() | |
The Spreadsheet::ParseExcel::Worksheet class exposes a lot of methods but in general very few are required unless you are writing an advanced filter. | |
The most commonly used methods are detailed below. The others are documented in L<Spreadsheet::ParseExcel::Worksheet>. | |
=head2 get_cell($row, $col) | |
Return the L</Cell> object at row C<$row> and column C<$col> if it is defined. Otherwise returns undef. | |
my $cell = $worksheet->get_cell($row, $col); | |
=head2 row_range() | |
Returns a two-element list C<($min, $max)> containing the minimum and maximum defined rows in the worksheet. If there is no row defined C<$max> is smaller than C<$min>. | |
my ( $row_min, $row_max ) = $worksheet->row_range(); | |
=head2 col_range() | |
Returns a two-element list C<($min, $max)> containing the minimum and maximum of defined columns in the worksheet. If there is no column defined C<$max> is smaller than C<$min>. | |
my ( $col_min, $col_max ) = $worksheet->col_range(); | |
=head2 get_name() | |
The C<get_name()> method returns the name of the worksheet, such as 'Sheet1'. | |
my $name = $worksheet->get_name(); | |
=head2 Other Worksheet Methods | |
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Worksheet>. | |
=head1 Cell | |
The C<Spreadsheet::ParseExcel::Cell> class has the following main methods. | |
$cell->value() | |
$cell->unformatted() | |
=head2 value() | |
The C<value()> method returns the formatted value of the cell. | |
my $value = $cell->value(); | |
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30. | |
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below. | |
For a defined C<$cell> the C<value()> method will always return a value. | |
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>. | |
=head2 unformatted() | |
The C<unformatted()> method returns the unformatted value of the cell. | |
my $unformatted = $cell->unformatted(); | |
Returns the cell value without a numeric format. See the C<value()> method above. | |
=head2 Other Cell Methods | |
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Cell>. | |
=head1 Format | |
The C<Spreadsheet::ParseExcel::Format> class has the following properties: | |
=head2 Format properties | |
$format->{Font} | |
$format->{AlignH} | |
$format->{AlignV} | |
$format->{Indent} | |
$format->{Wrap} | |
$format->{Shrink} | |
$format->{Rotate} | |
$format->{JustLast} | |
$format->{ReadDir} | |
$format->{BdrStyle} | |
$format->{BdrColor} | |
$format->{BdrDiag} | |
$format->{Fill} | |
$format->{Lock} | |
$format->{Hidden} | |
$format->{Style} | |
These properties are generally only of interest to advanced users. Casual users can skip this section. | |
=head2 $format->{Font} | |
Returns the L</Font> object for the Format. | |
=head2 $format->{AlignH} | |
Returns the horizontal alignment of the format where the value has the following meaning: | |
0 => No alignment | |
1 => Left | |
2 => Center | |
3 => Right | |
4 => Fill | |
5 => Justify | |
6 => Center across | |
7 => Distributed/Equal spaced | |
=head2 $format->{AlignV} | |
Returns the vertical alignment of the format where the value has the following meaning: | |
0 => Top | |
1 => Center | |
2 => Bottom | |
3 => Justify | |
4 => Distributed/Equal spaced | |
=head2 $format->{Indent} | |
Returns the indent level of the C<Left> horizontal alignment. | |
=head2 $format->{Wrap} | |
Returns true if textwrap is on. | |
=head2 $format->{Shrink} | |
Returns true if "Shrink to fit" is set for the format. | |
=head2 $format->{Rotate} | |
Returns the text rotation. In Excel97+, it returns the angle in degrees of the text rotation. | |
In Excel95 or earlier it returns a value as follows: | |
0 => No rotation | |
1 => Top down | |
2 => 90 degrees anti-clockwise, | |
3 => 90 clockwise | |
=head2 $format->{JustLast} | |
Return true if the "justify last" property is set for the format. | |
=head2 $format->{ReadDir} | |
Returns the direction that the text is read from. | |
=head2 $format->{BdrStyle} | |
Returns an array ref of border styles as follows: | |
[ $left, $right, $top, $bottom ] | |
=head2 $format->{BdrColor} | |
Returns an array ref of border color indexes as follows: | |
[ $left, $right, $top, $bottom ] | |
=head2 $format->{BdrDiag} | |
Returns an array ref of diagonal border kind, style and color index as follows: | |
[$kind, $style, $color ] | |
Where kind is: | |
0 => None | |
1 => Right-Down | |
2 => Right-Up | |
3 => Both | |
=head2 $format->{Fill} | |
Returns an array ref of fill pattern and color indexes as follows: | |
[ $pattern, $front_color, $back_color ] | |
=head2 $format->{Lock} | |
Returns true if the cell is locked. | |
=head2 $format->{Hidden} | |
Returns true if the cell is Hidden. | |
=head2 $format->{Style} | |
Returns true if the format is a Style format. | |
=head1 Font | |
I<Spreadsheet::ParseExcel::Font> | |
Format class has these properties: | |
=head1 Font Properties | |
$font->{Name} | |
$font->{Bold} | |
$font->{Italic} | |
$font->{Height} | |
$font->{Underline} | |
$font->{UnderlineStyle} | |
$font->{Color} | |
$font->{Strikeout} | |
$font->{Super} | |
=head2 $font->{Name} | |
Returns the name of the font, for example 'Arial'. | |
=head2 $font->{Bold} | |
Returns true if the font is bold. | |
=head2 $font->{Italic} | |
Returns true if the font is italic. | |
=head2 $font->{Height} | |
Returns the size (height) of the font. | |
=head2 $font->{Underline} | |
Returns true if the font in underlined. | |
=head2 $font->{UnderlineStyle} | |
Returns the style of an underlined font where the value has the following meaning: | |
0 => None | |
1 => Single | |
2 => Double | |
33 => Single accounting | |
34 => Double accounting | |
=head2 $font->{Color} | |
Returns the color index for the font. The mapping to an RGB color is defined by each workbook. | |
The index can be converted to a RGB string using the C<$workbook->ColorIdxToRGB()> Parser method. | |
(Older versions of C<Spreadsheet::ParseExcel> provided the C<ColorIdxToRGB> class method, which is deprecated.) | |
=head2 $font->{Strikeout} | |
Returns true if the font has the strikeout property set. | |
=head2 $font->{Super} | |
Returns one of the following values if the superscript or subscript property of the font is set: | |
0 => None | |
1 => Superscript | |
2 => Subscript | |
=head1 Formatter Class | |
Formatters can be passed to the C<parse()> method to deal with Unicode or Asian formatting. | |
Spreadsheet::ParseExcel includes 2 formatter classes. C<FmtDefault> and C<FmtJapanese>. It is also possible to create a user defined formatting class. | |
The formatter class C<Spreadsheet::ParseExcel::Fmt*> should provide the following functions: | |
=head2 ChkType($self, $is_numeric, $format_index) | |
Method to check the type of data in the cell. Should return C<Date>, C<Numeric> or C<Text>. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $is_numeric | |
If true, the value seems to be number. | |
=item $format_index | |
The index number for the cell Format object. | |
=back | |
=head2 TextFmt($self, $string_data, $string_encoding) | |
Converts the string data in the cell into the correct encoding. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $string_data | |
The original string/text data. | |
=item $string_encoding | |
The character encoding of original string/text. | |
=back | |
=head2 ValFmt($self, $cell, $workbook) | |
Convert the original unformatted cell value into the appropriate formatted value. For instance turn a number into a formatted date. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $cell | |
A scalar reference to the Cell object. | |
=item $workbook | |
A scalar reference to the Workbook object. | |
=back | |
=head2 FmtString($self, $cell, $workbook) | |
Get the format string for the Cell. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $cell | |
A scalar reference to the Cell object. | |
=item $workbook | |
A scalar reference to the Workbook object. | |
=back | |
=head1 Reducing the memory usage of Spreadsheet::ParseExcel | |
In some cases a C<Spreadsheet::ParseExcel> application may consume a lot of memory when processing a large Excel file and, as a result, may fail to complete. The following explains why this can occur and how to resolve it. | |
C<Spreadsheet::ParseExcel> processes an Excel file in two stages. In the first stage it extracts the Excel binary stream from the OLE container file using C<OLE::Storage_Lite>. In the second stage it parses the binary stream to read workbook, worksheet and cell data which it then stores in memory. The majority of the memory usage is required for storing cell data. | |
The reason for this is that as the Excel file is parsed and each cell is encountered a cell handling function creates a relatively large nested cell object that contains the cell value and all of the data that relates to the cell formatting. For large files (a 10MB Excel file on a 256MB system) this overhead can cause the system to grind to a halt. | |
However, in a lot of cases when an Excel file is being processed the only information that is required are the cell values. In these cases it is possible to avoid most of the memory overhead by specifying your own cell handling function and by telling Spreadsheet::ParseExcel not to store the parsed cell data. This is achieved by passing a cell handler function to C<new()> when creating the parse object. Here is an example. | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1 | |
); | |
my $workbook = $parser->parse('file.xls'); | |
sub cell_handler { | |
my $workbook = $_[0]; | |
my $sheet_index = $_[1]; | |
my $row = $_[2]; | |
my $col = $_[3]; | |
my $cell = $_[4]; | |
# Do something useful with the formatted cell value | |
print $cell->value(), "\n"; | |
} | |
The user specified cell handler is passed as a code reference to C<new()> along with the parameter C<NotSetCell> which tells Spreadsheet::ParseExcel not to store the parsed cell. Note, you don't have to iterate over the rows and columns, this happens automatically as part of the parsing. | |
The cell handler is passed 5 arguments. The first, C<$workbook>, is a reference to the C<Spreadsheet::ParseExcel::Workbook> object that represent the parsed workbook. This can be used to access any of the C<Spreadsheet::ParseExcel::Workbook> methods, see L</Workbook>. The second C<$sheet_index> is the zero-based index of the worksheet being parsed. The third and fourth, C<$row> and C<$col>, are the zero-based row and column number of the cell. The fifth, C<$cell>, is a reference to the C<Spreadsheet::ParseExcel::Cell> object. This is used to extract the data from the cell. See L</Cell> for more information. | |
This technique can be useful if you are writing an Excel to database filter since you can put your DB calls in the cell handler. | |
If you don't want all of the data in the spreadsheet you can add some control logic to the cell handler. For example we can extend the previous example so that it only prints the first 10 rows of the first two worksheets in the parsed workbook by adding some C<if()> statements to the cell handler: | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1 | |
); | |
my $workbook = $parser->parse('file.xls'); | |
sub cell_handler { | |
my $workbook = $_[0]; | |
my $sheet_index = $_[1]; | |
my $row = $_[2]; | |
my $col = $_[3]; | |
my $cell = $_[4]; | |
# Skip some worksheets and rows (inefficiently). | |
return if $sheet_index >= 3; | |
return if $row >= 10; | |
# Do something with the formatted cell value | |
print $cell->value(), "\n"; | |
} | |
However, this still processes the entire workbook. If you wish to save some additional processing time you can abort the parsing after you have read the data that you want, using the workbook C<ParseAbort> method: | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1 | |
); | |
my $workbook = $parser->parse('file.xls'); | |
sub cell_handler { | |
my $workbook = $_[0]; | |
my $sheet_index = $_[1]; | |
my $row = $_[2]; | |
my $col = $_[3]; | |
my $cell = $_[4]; | |
# Skip some worksheets and rows (more efficiently). | |
if ( $sheet_index >= 1 and $row >= 10 ) { | |
$workbook->ParseAbort(1); | |
return; | |
} | |
# Do something with the formatted cell value | |
print $cell->value(), "\n"; | |
} | |
=head1 Decryption | |
If a workbook is "protected" then Excel will encrypt the file whether a password is supplied or not. As of version 0.59 Spreadsheet::ParseExcel supports decrypting Excel workbooks using a default or user supplied password. However, only the following encryption scheme is supported: | |
Office 97/2000 Compatible encryption | |
The following encryption methods are not supported: | |
Weak Encryption (XOR) | |
RC4, Microsoft Base Cryptographic Provider v1.0 | |
RC4, Microsoft Base DSS and Diffie-Hellman Cryptographic Provider | |
RC4, Microsoft DH SChannel Cryptographic Provider | |
RC4, Microsoft Enhanced Cryptographic Provider v1.0 | |
RC4, Microsoft Enhanced DSS and Diffie-Hellman Cryptographic Provider | |
RC4, Microsoft Enhanced RSA and AES Cryptographic Provider | |
RC4, Microsoft RSA SChannel Cryptographic Provider | |
RC4, Microsoft Strong Cryptographic Provider | |
See the following for more information on Excel encryption: L<http://office.microsoft.com/en-us/office-2003-resource-kit/important-aspects-of-password-and-encryption-protection-HA001140311.aspx>. | |
=head1 KNOWN PROBLEMS | |
=over | |
=item * Issues reported by users: L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel> | |
=item * This module cannot read the values of formulas from files created with Spreadsheet::WriteExcel unless the user specified the values when creating the file (which is generally not the case). The reason for this is that Spreadsheet::WriteExcel writes the formula but not the formula result since it isn't in a position to calculate arbitrary Excel formulas without access to Excel's formula engine. | |
=item * If Excel has date fields where the specified format is equal to the system-default for the short-date locale, Excel does not store the format, but defaults to an internal format which is system dependent. In these cases ParseExcel uses the date format 'yyyy-mm-dd'. | |
=back | |
=head1 REPORTING A BUG | |
Bugs can be reported via rt.cpan.org. See the following for instructions on bug reporting for Spreadsheet::ParseExcel | |
L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel> | |
=head1 SEE ALSO | |
=over | |
=item * xls2csv by Ken Prows L<http://search.cpan.org/~ken/xls2csv-1.06/script/xls2csv>. | |
=item * xls2csv and xlscat by H.Merijn Brand (these utilities are part of Spreadsheet::Read, see below). | |
=item * excel2txt by Ken Youens-Clark, L<http://search.cpan.org/~kclark/excel2txt/excel2txt>. This is an excellent example of an Excel filter using Spreadsheet::ParseExcel. It can produce CSV, Tab delimited, Html, XML and Yaml. | |
=item * XLSperl by Jon Allen L<http://search.cpan.org/~jonallen/XLSperl/bin/XLSperl>. This application allows you to use Perl "one-liners" with Microsoft Excel files. | |
=item * Spreadsheet::XLSX L<http://search.cpan.org/~dmow/Spreadsheet-XLSX/lib/Spreadsheet/XLSX.pm> by Dmitry Ovsyanko. A module with a similar interface to Spreadsheet::ParseExcel for parsing Excel 2007 XLSX OpenXML files. | |
=item * Spreadsheet::Read L<http://search.cpan.org/~hmbrand/Spreadsheet-Read/Read.pm> by H.Merijn Brand. A single interface for reading several different spreadsheet formats. | |
=item * Spreadsheet::WriteExcel L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm>. A perl module for creating new Excel files. | |
=item * Spreadsheet::ParseExcel::SaveParser L<http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/lib/Spreadsheet/ParseExcel/SaveParser.pm>. This is a combination of Spreadsheet::ParseExcel and Spreadsheet::WriteExcel and it allows you to "rewrite" an Excel file. See the following example L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm#MODIFYING_AND_REWRITING_EXCEL_FILES>. It is part of the Spreadsheet::ParseExcel distro. | |
=item * Text::CSV_XS L<http://search.cpan.org/~hmbrand/Text-CSV_XS/CSV_XS.pm> by H.Merijn Brand. A fast and rigorous module for reading and writing CSV data. Don't consider rolling your own CSV handling, use this module instead. | |
=back | |
=head1 MAILING LIST | |
There is a Google group for discussing and asking questions about Spreadsheet::ParseExcel. This is a good place to search to see if your question has been asked before: L<http://groups-beta.google.com/group/spreadsheet-parseexcel/> | |
=head1 DONATIONS | |
If you'd care to donate to the Spreadsheet::ParseExcel project, you can do so via PayPal: L<http://tinyurl.com/7ayes> | |
=head1 TODO | |
=over | |
=item * The current maintenance work is directed towards making the documentation more useful, improving and simplifying the API, and improving the maintainability of the code base. After that new features will be added. | |
=item * Fix open bugs and documentation for SaveParser. | |
=item * Add Formula support, Hyperlink support, Named Range support. | |
=item * Improve Spreadsheet::ParseExcel::SaveParser compatibility with Spreadsheet::WriteExcel. | |
=item * Improve Unicode and other encoding support. This will probably require dropping support for perls prior to 5.8+. | |
=back | |
=head1 ACKNOWLEDGEMENTS | |
From Kawai Takanori: | |
First of all, I would like to acknowledge the following valuable programs and modules: | |
XHTML, OLE::Storage and Spreadsheet::WriteExcel. | |
In no particular order: Yamaji Haruna, Simamoto Takesi, Noguchi Harumi, Ikezawa Kazuhiro, Suwazono Shugo, Hirofumi Morisada, Michael Edwards, Kim Namusk, Slaven Rezic, Grant Stevens, H.Merijn Brand and many many people + Kawai Mikako. | |
Alexey Mazurin added the decryption facility. | |
=head1 DISCLAIMER OF WARRANTY | |
Because this software is licensed free of charge, there is no warranty for the software, to the extent permitted by applicable law. Except when otherwise stated in writing the copyright holders and/or other parties provide the software "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the software is with you. Should the software prove defective, you assume the cost of all necessary servicing, repair, or correction. | |
In no event unless required by applicable law or agreed to in writing will any copyright holder, or any other party who may modify and/or redistribute the software as permitted by the above licence, be liable to you for damages, including any general, special, incidental, or consequential damages arising out of the use or inability to use the software (including but not limited to loss of data or data being rendered inaccurate or losses sustained by you or third parties or a failure of the software to operate with any other software), even if such holder or other party has been advised of the possibility of such damages. | |
=head1 LICENSE | |
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php> | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori (Hippo2000) kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. This is free software. You may distribute under the terms of either the GNU General Public License or the Artistic License. | |
=cut | |
SPREADSHEET_PARSEEXCEL | |
$fatpacked{"Spreadsheet/ParseExcel/Cell.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_CELL'; | |
package Spreadsheet::ParseExcel::Cell; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
############################################################################### | |
# | |
# new() | |
# | |
# Constructor. | |
# | |
sub new { | |
my ( $package, %properties ) = @_; | |
my $self = \%properties; | |
bless $self, $package; | |
} | |
############################################################################### | |
# | |
# value() | |
# | |
# Returns the formatted value of the cell. | |
# | |
sub value { | |
my $self = shift; | |
return $self->{_Value}; | |
} | |
############################################################################### | |
# | |
# unformatted() | |
# | |
# Returns the unformatted value of the cell. | |
# | |
sub unformatted { | |
my $self = shift; | |
return $self->{Val}; | |
} | |
############################################################################### | |
# | |
# get_format() | |
# | |
# Returns the Format object for the cell. | |
# | |
sub get_format { | |
my $self = shift; | |
return $self->{Format}; | |
} | |
############################################################################### | |
# | |
# type() | |
# | |
# Returns the type of cell such as Text, Numeric or Date. | |
# | |
sub type { | |
my $self = shift; | |
return $self->{Type}; | |
} | |
############################################################################### | |
# | |
# encoding() | |
# | |
# Returns the character encoding of the cell. | |
# | |
sub encoding { | |
my $self = shift; | |
if ( !defined $self->{Code} ) { | |
return 1; | |
} | |
elsif ( $self->{Code} eq 'ucs2' ) { | |
return 2; | |
} | |
elsif ( $self->{Code} eq '_native_' ) { | |
return 3; | |
} | |
else { | |
return 0; | |
} | |
return $self->{Code}; | |
} | |
############################################################################### | |
# | |
# is_merged() | |
# | |
# Returns true if the cell is merged. | |
# | |
sub is_merged { | |
my $self = shift; | |
return $self->{Merged}; | |
} | |
############################################################################### | |
# | |
# get_rich_text() | |
# | |
# Returns an array ref of font information about each string block in a "rich", | |
# i.e. multi-format, string. | |
# | |
sub get_rich_text { | |
my $self = shift; | |
return $self->{Rich}; | |
} | |
############################################################################### | |
# | |
# get_hyperlink { | |
# | |
# Returns an array ref of hyperlink information if the cell contains a hyperlink. | |
# Returns undef otherwise | |
# | |
# [0] : Description of link (You may want $cell->value, as it will have rich text) | |
# [1] : URL - the link expressed as a URL. N.B. relative URLs will be defaulted to | |
# the directory of the input file, if the input file name is known. Otherwise | |
# %REL% will be inserted as a place-holder. Depending on your application, | |
# you should either remove %REL% or replace it with the appropriate path. | |
# [2] : Target frame (or undef if none) | |
sub get_hyperlink { | |
my $self = shift; | |
return $self->{Hyperlink} if exists $self->{Hyperlink}; | |
return undef; | |
} | |
# | |
############################################################################### | |
# | |
# Mapping between legacy method names and new names. | |
# | |
{ | |
no warnings; # Ignore warnings about variables used only once. | |
*Value = \&value; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 Methods | |
The following Cell methods are available: | |
$cell->value() | |
$cell->unformatted() | |
$cell->get_format() | |
$cell->type() | |
$cell->encoding() | |
$cell->is_merged() | |
$cell->get_rich_text() | |
$cell->get_hyperlink() | |
=head2 value() | |
The C<value()> method returns the formatted value of the cell. | |
my $value = $cell->value(); | |
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30. | |
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below. | |
For a defined C<$cell> the C<value()> method will always return a value. | |
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>. | |
=head2 unformatted() | |
The C<unformatted()> method returns the unformatted value of the cell. | |
my $unformatted = $cell->unformatted(); | |
Returns the cell value without a numeric format. See the C<value()> method above. | |
=head2 get_format() | |
The C<get_format()> method returns the L<Spreadsheet::ParseExcel::Format> object for the cell. | |
my $format = $cell->get_format(); | |
If a user defined format hasn't been applied to the cell then the default cell format is returned. | |
=head2 type() | |
The C<type()> method returns the type of cell such as Text, Numeric or Date. If the type was detected as Numeric, and the Cell Format matches C<m{^[dmy][-\\/dmy]*$}i>, it will be treated as a Date type. | |
my $type = $cell->type(); | |
See also L<Dates and Time in Excel>. | |
=head2 encoding() | |
The C<encoding()> method returns the character encoding of the cell. | |
my $encoding = $cell->encoding(); | |
This method is only of interest to developers. In general Spreadsheet::ParseExcel will return all character strings in UTF-8 regardless of the encoding used by Excel. | |
The C<encoding()> method returns one of the following values: | |
=over | |
=item * 0: Unknown format. This shouldn't happen. In the default case the format should be 1. | |
=item * 1: 8bit ASCII or single byte UTF-16. This indicates that the characters are encoded in a single byte. In Excel 95 and earlier This usually meant ASCII or an international variant. In Excel 97 it refers to a compressed UTF-16 character string where all of the high order bytes are 0 and are omitted to save space. | |
=item * 2: UTF-16BE. | |
=item * 3: Native encoding. In Excel 95 and earlier this encoding was used to represent multi-byte character encodings such as SJIS. | |
=back | |
=head2 is_merged() | |
The C<is_merged()> method returns true if the cell is merged. | |
my $is_merged = $cell->is_merged(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_rich_text() | |
The C<get_rich_text()> method returns an array ref of font information about each string block in a "rich", i.e. multi-format, string. | |
my $rich_text = $cell->get_rich_text(); | |
The return value is an arrayref of arrayrefs in the form: | |
[ | |
[ $start_position, $font_object ], | |
..., | |
] | |
Returns undef if the property isn't set. | |
=head2 get_hyperlink() | |
If a cell contains a hyperlink, the C<get_hyperlink()> method returns an array ref of information about it. | |
A cell can contain at most one hyperlink. If it does, it contains no other value. | |
Otherwise, it returns undef; | |
The array contains: | |
=over | |
=item * 0: Description (what's displayed); undef if not present | |
=item * 1: Link, converted to an appropriate URL - Note: Relative links are based on the input file. %REL% is used if the input file is unknown (e.g. a file handle or scalar) | |
=item * 2: Target - target frame (or undef if none) | |
=back | |
=head1 Dates and Time in Excel | |
Dates and times in Excel are represented by real numbers, for example "Jan 1 2001 12:30 PM" is represented by the number 36892.521. | |
The integer part of the number stores the number of days since the epoch and the fractional part stores the percentage of the day. | |
A date or time in Excel is just like any other number. The way in which it is displayed is controlled by the number format: | |
Number format $cell->value() $cell->unformatted() | |
============= ============== ============== | |
'dd/mm/yy' '28/02/08' 39506.5 | |
'mm/dd/yy' '02/28/08' 39506.5 | |
'd-m-yyyy' '28-2-2008' 39506.5 | |
'dd/mm/yy hh:mm' '28/02/08 12:00' 39506.5 | |
'd mmm yyyy' '28 Feb 2008' 39506.5 | |
'mmm d yyyy hh:mm AM/PM' 'Feb 28 2008 12:00 PM' 39506.5 | |
The L<Spreadsheet::ParseExcel::Utility> module contains a function called C<ExcelLocaltime> which will convert between an unformatted Excel date/time number and a C<localtime()> like array. | |
For date conversions using the CPAN C<DateTime> framework see L<DateTime::Format::Excel> http://search.cpan.org/search?dist=DateTime-Format-Excel | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_CELL | |
$fatpacked{"Spreadsheet/ParseExcel/Dump.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_DUMP'; | |
package Spreadsheet::ParseExcel::Dump; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Dump - A class for dumping Excel records. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
my %NameTbl = ( | |
#P291 | |
0x0A => 'EOF', | |
0x0C => 'CALCCOUNT', | |
0x0D => 'CALCMODE', | |
0x0E => 'PRECISION', | |
0x0F => 'REFMODE', | |
0x10 => 'DELTA', | |
0x11 => 'ITERATION', | |
0x12 => 'PROTECT', | |
0x13 => 'PASSWORD', | |
0x14 => 'HEADER', | |
0x15 => 'FOOTER', | |
0x16 => 'EXTERNCOUNT', | |
0x17 => 'EXTERNSHEET', | |
0x19 => 'WINDOWPROTECT', | |
0x1A => 'VERTICALPAGEBREAKS', | |
0x1B => 'HORIZONTALPAGEBREAKS', | |
0x1C => 'NOTE', | |
0x1D => 'SELECTION', | |
0x22 => '1904', | |
0x26 => 'LEFTMARGIN', | |
0x27 => 'RIGHTMARGIN', | |
0x28 => 'TOPMARGIN', | |
0x29 => 'BOTTOMMARGIN', | |
0x2A => 'PRINTHEADERS', | |
0x2B => 'PRINTGRIDLINES', | |
0x2F => 'FILEPASS', | |
0x3C => 'COUNTINUE', | |
0x3D => 'WINDOW1', | |
0x40 => 'BACKUP', | |
0x41 => 'PANE', | |
0x42 => 'CODEPAGE', | |
0x4D => 'PLS', | |
0x50 => 'DCON', | |
0x51 => 'DCONREF', | |
#P292 | |
0x52 => 'DCONNAME', | |
0x55 => 'DEFCOLWIDTH', | |
0x59 => 'XCT', | |
0x5A => 'CRN', | |
0x5B => 'FILESHARING', | |
0x5C => 'WRITEACCES', | |
0x5D => 'OBJ', | |
0x5E => 'UNCALCED', | |
0x5F => 'SAVERECALC', | |
0x60 => 'TEMPLATE', | |
0x63 => 'OBJPROTECT', | |
0x7D => 'COLINFO', | |
0x7E => 'RK', | |
0x7F => 'IMDATA', | |
0x80 => 'GUTS', | |
0x81 => 'WSBOOL', | |
0x82 => 'GRIDSET', | |
0x83 => 'HCENTER', | |
0x84 => 'VCENTER', | |
0x85 => 'BOUNDSHEET', | |
0x86 => 'WRITEPROT', | |
0x87 => 'ADDIN', | |
0x88 => 'EDG', | |
0x89 => 'PUB', | |
0x8C => 'COUNTRY', | |
0x8D => 'HIDEOBJ', | |
0x90 => 'SORT', | |
0x91 => 'SUB', | |
0x92 => 'PALETTE', | |
0x94 => 'LHRECORD', | |
0x95 => 'LHNGRAPH', | |
0x96 => 'SOUND', | |
0x98 => 'LPR', | |
0x99 => 'STANDARDWIDTH', | |
0x9A => 'FNGROUPNAME', | |
0x9B => 'FILTERMODE', | |
0x9C => 'FNGROUPCOUNT', | |
#P293 | |
0x9D => 'AUTOFILTERINFO', | |
0x9E => 'AUTOFILTER', | |
0xA0 => 'SCL', | |
0xA1 => 'SETUP', | |
0xA9 => 'COORDLIST', | |
0xAB => 'GCW', | |
0xAE => 'SCENMAN', | |
0xAF => 'SCENARIO', | |
0xB0 => 'SXVIEW', | |
0xB1 => 'SXVD', | |
0xB2 => 'SXV', | |
0xB4 => 'SXIVD', | |
0xB5 => 'SXLI', | |
0xB6 => 'SXPI', | |
0xB8 => 'DOCROUTE', | |
0xB9 => 'RECIPNAME', | |
0xBC => 'SHRFMLA', | |
0xBD => 'MULRK', | |
0xBE => 'MULBLANK', | |
0xBF => 'TOOLBARHDR', | |
0xC0 => 'TOOLBAREND', | |
0xC1 => 'MMS', | |
0xC2 => 'ADDMENU', | |
0xC3 => 'DELMENU', | |
0xC5 => 'SXDI', | |
0xC6 => 'SXDB', | |
0xCD => 'SXSTRING', | |
0xD0 => 'SXTBL', | |
0xD1 => 'SXTBRGIITM', | |
0xD2 => 'SXTBPG', | |
0xD3 => 'OBPROJ', | |
0xD5 => 'SXISDTM', | |
0xD6 => 'RSTRING', | |
0xD7 => 'DBCELL', | |
0xDA => 'BOOKBOOL', | |
0xDC => 'PARAMQRY', | |
0xDC => 'SXEXT', | |
0xDD => 'SCENPROTECT', | |
0xDE => 'OLESIZE', | |
#P294 | |
0xDF => 'UDDESC', | |
0xE0 => 'XF', | |
0xE1 => 'INTERFACEHDR', | |
0xE2 => 'INTERFACEEND', | |
0xE3 => 'SXVS', | |
0xEA => 'TABIDCONF', | |
0xEB => 'MSODRAWINGGROUP', | |
0xEC => 'MSODRAWING', | |
0xED => 'MSODRAWINGSELECTION', | |
0xEF => 'PHONETICINFO', | |
0xF0 => 'SXRULE', | |
0xF1 => 'SXEXT', | |
0xF2 => 'SXFILT', | |
0xF6 => 'SXNAME', | |
0xF7 => 'SXSELECT', | |
0xF8 => 'SXPAIR', | |
0xF9 => 'SXFMLA', | |
0xFB => 'SXFORMAT', | |
0xFC => 'SST', | |
0xFD => 'LABELSST', | |
0xFF => 'EXTSST', | |
0x100 => 'SXVDEX', | |
0x103 => 'SXFORMULA', | |
0x122 => 'SXDBEX', | |
0x13D => 'TABID', | |
0x160 => 'USESELFS', | |
0x161 => 'DSF', | |
0x162 => 'XL5MODIFY', | |
0x1A5 => 'FILESHARING2', | |
0x1A9 => 'USERBVIEW', | |
0x1AA => 'USERVIEWBEGIN', | |
0x1AB => 'USERSVIEWEND', | |
0x1AD => 'QSI', | |
0x1AE => 'SUPBOOK', | |
0x1AF => 'PROT4REV', | |
0x1B0 => 'CONDFMT', | |
0x1B1 => 'CF', | |
0x1B2 => 'DVAL', | |
#P295 | |
0x1B5 => 'DCONBIN', | |
0x1B6 => 'TXO', | |
0x1B7 => 'REFRESHALL', | |
0x1B8 => 'HLINK', | |
0x1BA => 'CODENAME', | |
0x1BB => 'SXFDBTYPE', | |
0x1BC => 'PROT4REVPASS', | |
0x1BE => 'DV', | |
0x200 => 'DIMENSIONS', | |
0x201 => 'BLANK', | |
0x202 => 'Integer', #Not Documented | |
0x203 => 'NUMBER', | |
0x204 => 'LABEL', | |
0x205 => 'BOOLERR', | |
0x207 => 'STRING', | |
0x208 => 'ROW', | |
0x20B => 'INDEX', | |
0x218 => 'NAME', | |
0x221 => 'ARRAY', | |
0x223 => 'EXTERNNAME', | |
0x225 => 'DEFAULTROWHEIGHT', | |
0x231 => 'FONT', | |
0x236 => 'TABLE', | |
0x23E => 'WINDOW2', | |
0x293 => 'STYLE', | |
0x406 => 'FORMULA', | |
0x41E => 'FORMAT', | |
0x18 => 'NAME', | |
0x06 => 'FORMULA', | |
0x09 => 'BOF(BIFF2)', | |
0x209 => 'BOF(BIFF3)', | |
0x409 => 'BOF(BIFF4)', | |
0x809 => 'BOF(BIFF5-7)', | |
0x31 => 'FONT', 0x27E => 'RK', | |
#Chart/Graph | |
0x1001 => 'UNITS', | |
0x1002 => 'CHART', | |
0x1003 => 'SERISES', | |
0x1006 => 'DATAFORMAT', | |
0x1007 => 'LINEFORMAT', | |
0x1009 => 'MAKERFORMAT', | |
0x100A => 'AREAFORMAT', | |
0x100B => 'PIEFORMAT', | |
0x100C => 'ATTACHEDLABEL', | |
0x100D => 'SERIESTEXT', | |
0x1014 => 'CHARTFORMAT', | |
0x1015 => 'LEGEND', | |
0x1016 => 'SERIESLIST', | |
0x1017 => 'BAR', | |
0x1018 => 'LINE', | |
0x1019 => 'PIE', | |
0x101A => 'AREA', | |
0x101B => 'SCATTER', | |
0x101C => 'CHARTLINE', | |
0x101D => 'AXIS', | |
0x101E => 'TICK', | |
0x101F => 'VALUERANGE', | |
0x1020 => 'CATSERRANGE', | |
0x1021 => 'AXISLINEFORMAT', | |
0x1022 => 'CHARTFORMATLINK', | |
0x1024 => 'DEFAULTTEXT', | |
0x1025 => 'TEXT', | |
0x1026 => 'FONTX', | |
0x1027 => 'OBJECTLINK', | |
0x1032 => 'FRAME', | |
0x1033 => 'BEGIN', | |
0x1034 => 'END', | |
0x1035 => 'PLOTAREA', | |
0x103A => '3D', | |
0x103C => 'PICF', | |
0x103D => 'DROPBAR', | |
0x103E => 'RADAR', | |
0x103F => 'SURFACE', | |
0x1040 => 'RADARAREA', | |
0x1041 => 'AXISPARENT', | |
0x1043 => 'LEGENDXN', | |
0x1044 => 'SHTPROPS', | |
0x1045 => 'SERTOCRT', | |
0x1046 => 'AXESUSED', | |
0x1048 => 'SBASEREF', | |
0x104A => 'SERPARENT', | |
0x104B => 'SERAUXTREND', | |
0x104E => 'IFMT', | |
0x104F => 'POS', | |
0x1050 => 'ALRUNS', | |
0x1051 => 'AI', | |
0x105B => 'SERAUXERRBAR', | |
0x105D => 'SERFMT', | |
0x1060 => 'FBI', | |
0x1061 => 'BOPPOP', | |
0x1062 => 'AXCEXT', | |
0x1063 => 'DAT', | |
0x1064 => 'PLOTGROWTH', | |
0x1065 => 'SINDEX', | |
0x1066 => 'GELFRAME', | |
0x1067 => 'BPOPPOPCUSTOM', | |
); | |
#------------------------------------------------------------------------------ | |
# subDUMP (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub subDUMP { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
printf "%04X:%-23s (Len:%3d) : %s\n", | |
$bOp, OpName($bOp), $bLen, unpack( "H40", $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->OpName | |
#------------------------------------------------------------------------------ | |
sub OpName { | |
my ($bOp) = @_; | |
return ( defined $NameTbl{$bOp} ) ? $NameTbl{$bOp} : 'undef'; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Dump - A class for dumping Excel records. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_DUMP | |
$fatpacked{"Spreadsheet/ParseExcel/FmtDefault.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTDEFAULT'; | |
package Spreadsheet::ParseExcel::FmtDefault; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt); | |
our $VERSION = '0.65'; | |
my %hFmtDefault = ( | |
0x00 => 'General', | |
0x01 => '0', | |
0x02 => '0.00', | |
0x03 => '#,##0', | |
0x04 => '#,##0.00', | |
0x05 => '($#,##0_);($#,##0)', | |
0x06 => '($#,##0_);[Red]($#,##0)', | |
0x07 => '($#,##0.00_);($#,##0.00_)', | |
0x08 => '($#,##0.00_);[Red]($#,##0.00_)', | |
0x09 => '0%', | |
0x0A => '0.00%', | |
0x0B => '0.00E+00', | |
0x0C => '# ?/?', | |
0x0D => '# ??/??', | |
0x0E => 'yyyy-mm-dd', # Was 'm-d-yy', which is bad as system default | |
0x0F => 'd-mmm-yy', | |
0x10 => 'd-mmm', | |
0x11 => 'mmm-yy', | |
0x12 => 'h:mm AM/PM', | |
0x13 => 'h:mm:ss AM/PM', | |
0x14 => 'h:mm', | |
0x15 => 'h:mm:ss', | |
0x16 => 'm-d-yy h:mm', | |
#0x17-0x24 -- Differs in Natinal | |
0x25 => '(#,##0_);(#,##0)', | |
0x26 => '(#,##0_);[Red](#,##0)', | |
0x27 => '(#,##0.00);(#,##0.00)', | |
0x28 => '(#,##0.00);[Red](#,##0.00)', | |
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', | |
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)', | |
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2D => 'mm:ss', | |
0x2E => '[h]:mm:ss', | |
0x2F => 'mm:ss.0', | |
0x30 => '##0.0E+0', | |
0x31 => '@', | |
); | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $sPkg, %hKey ) = @_; | |
my $oThis = {}; | |
bless $oThis; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $oThis, $sTxt, $sCode ) = @_; | |
return $sTxt if ( ( !defined($sCode) ) || ( $sCode eq '_native_' ) ); | |
return pack( 'U*', unpack( 'n*', $sTxt ) ); | |
} | |
#------------------------------------------------------------------------------ | |
# FmtStringDef (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub FmtStringDef { | |
my ( $oThis, $iFmtIdx, $oBook, $rhFmt ) = @_; | |
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx}; | |
if ( !( defined($sFmtStr) ) && defined($rhFmt) ) { | |
$sFmtStr = $rhFmt->{$iFmtIdx}; | |
} | |
$sFmtStr = $hFmtDefault{$iFmtIdx} unless ($sFmtStr); | |
return $sFmtStr; | |
} | |
#------------------------------------------------------------------------------ | |
# FmtString (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub FmtString { | |
my ( $oThis, $oCell, $oBook ) = @_; | |
my $sFmtStr = | |
$oThis->FmtStringDef( $oBook->{Format}[ $oCell->{FormatNo} ]->{FmtIdx}, | |
$oBook ); | |
# Special case for cells that use Lotus123 style leading | |
# apostrophe to designate text formatting. | |
if ( $oBook->{Format}[ $oCell->{FormatNo} ]->{Key123} ) { | |
$sFmtStr = '@'; | |
} | |
unless ( defined($sFmtStr) ) { | |
if ( $oCell->{Type} eq 'Numeric' ) { | |
if ( int( $oCell->{Val} ) != $oCell->{Val} ) { | |
$sFmtStr = '0.00'; | |
} | |
else { | |
$sFmtStr = '0'; | |
} | |
} | |
elsif ( $oCell->{Type} eq 'Date' ) { | |
if ( int( $oCell->{Val} ) <= 0 ) { | |
$sFmtStr = 'h:mm:ss'; | |
} | |
else { | |
$sFmtStr = 'yyyy-mm-dd'; | |
} | |
} | |
else { | |
$sFmtStr = '@'; | |
} | |
} | |
return $sFmtStr; | |
} | |
#------------------------------------------------------------------------------ | |
# ValFmt (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub ValFmt { | |
my ( $oThis, $oCell, $oBook ) = @_; | |
my ( $Dt, $iFmtIdx, $iNumeric, $Flg1904 ); | |
if ( $oCell->{Type} eq 'Text' ) { | |
$Dt = | |
( ( defined $oCell->{Val} ) && ( $oCell->{Val} ne '' ) ) | |
? $oThis->TextFmt( $oCell->{Val}, $oCell->{Code} ) | |
: ''; | |
return $Dt; | |
} | |
else { | |
$Dt = $oCell->{Val}; | |
$Flg1904 = $oBook->{Flg1904}; | |
my $sFmtStr = $oThis->FmtString( $oCell, $oBook ); | |
return ExcelFmt( $sFmtStr, $Dt, $Flg1904, $oCell->{Type} ); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# ChkType (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub ChkType { | |
my ( $oPkg, $iNumeric, $iFmtIdx ) = @_; | |
if ($iNumeric) { | |
if ( ( ( $iFmtIdx >= 0x0E ) && ( $iFmtIdx <= 0x16 ) ) | |
|| ( ( $iFmtIdx >= 0x2D ) && ( $iFmtIdx <= 0x2F ) ) ) | |
{ | |
return "Date"; | |
} | |
else { | |
return "Numeric"; | |
} | |
} | |
else { | |
return "Text"; | |
} | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_FMTDEFAULT | |
$fatpacked{"Spreadsheet/ParseExcel/FmtJapan.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTJAPAN'; | |
package Spreadsheet::ParseExcel::FmtJapan; | |
use utf8; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Encode qw(find_encoding decode); | |
use base 'Spreadsheet::ParseExcel::FmtDefault'; | |
our $VERSION = '0.65'; | |
my %FormatTable = ( | |
0x00 => 'General', | |
0x01 => '0', | |
0x02 => '0.00', | |
0x03 => '#,##0', | |
0x04 => '#,##0.00', | |
0x05 => '(\\#,##0_);(\\#,##0)', | |
0x06 => '(\\#,##0_);[Red](\\#,##0)', | |
0x07 => '(\\#,##0.00_);(\\#,##0.00_)', | |
0x08 => '(\\#,##0.00_);[Red](\\#,##0.00_)', | |
0x09 => '0%', | |
0x0A => '0.00%', | |
0x0B => '0.00E+00', | |
0x0C => '# ?/?', | |
0x0D => '# ??/??', | |
# 0x0E => 'm/d/yy', | |
0x0E => 'yyyy/m/d', | |
0x0F => 'd-mmm-yy', | |
0x10 => 'd-mmm', | |
0x11 => 'mmm-yy', | |
0x12 => 'h:mm AM/PM', | |
0x13 => 'h:mm:ss AM/PM', | |
0x14 => 'h:mm', | |
0x15 => 'h:mm:ss', | |
# 0x16 => 'm/d/yy h:mm', | |
0x16 => 'yyyy/m/d h:mm', | |
#0x17-0x24 -- Differs in Natinal | |
0x1E => 'm/d/yy', | |
0x1F => 'yyyy"年"m"月"d"日"', | |
0x20 => 'h"時"mm"分"', | |
0x21 => 'h"時"mm"分"ss"秒"', | |
#0x17-0x24 -- Differs in Natinal | |
0x25 => '(#,##0_);(#,##0)', | |
0x26 => '(#,##0_);[Red](#,##0)', | |
0x27 => '(#,##0.00);(#,##0.00)', | |
0x28 => '(#,##0.00);[Red](#,##0.00)', | |
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', | |
0x2A => '_(\\*#,##0_);_(\\*(#,##0);_(*"-"_);_(@_)', | |
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2C => '_(\\*#,##0.00_);_(\\*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2D => 'mm:ss', | |
0x2E => '[h]:mm:ss', | |
0x2F => 'mm:ss.0', | |
0x30 => '##0.0E+0', | |
0x31 => '@', | |
0x37 => 'yyyy"年"m"月"', | |
0x38 => 'm"月"d"日"', | |
0x39 => 'ge.m.d', | |
0x3A => 'ggge"年"m"月"d"日"', | |
); | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $class, %args ) = @_; | |
my $encoding = $args{Code} || $args{encoding}; | |
my $self = { Code => $encoding }; | |
if($encoding){ | |
$self->{encoding} = find_encoding($encoding eq 'sjis' ? 'cp932' : $encoding) | |
or do{ | |
require Carp; | |
Carp::croak(qq{Unknown encoding '$encoding'}); | |
}; | |
} | |
return bless $self, $class; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $self, $text, $input_encoding ) = @_; | |
if(!defined $input_encoding){ | |
$input_encoding = 'utf8'; | |
} | |
elsif($input_encoding eq '_native_'){ | |
$input_encoding = 'cp932'; # Shift_JIS in Microsoft products | |
} | |
$text = decode($input_encoding, $text); | |
return $self->{Code} ? $self->{encoding}->encode($text) : $text; | |
} | |
#------------------------------------------------------------------------------ | |
# FmtStringDef (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
sub FmtStringDef { | |
my ( $self, $format_index, $book ) = @_; | |
return $self->SUPER::FmtStringDef( $format_index, $book, \%FormatTable ); | |
} | |
#------------------------------------------------------------------------------ | |
# CnvNengo (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
# Convert A.D. into Japanese Nengo (aka Gengo) | |
my @Nengo = ( | |
{ | |
name => '平成', # Heisei | |
abbr_name => 'H', | |
base => 1988, | |
start => 19890108, | |
}, | |
{ | |
name => '昭和', # Showa | |
abbr_name => 'S', | |
base => 1925, | |
start => 19261225, | |
}, | |
{ | |
name => '大正', # Taisho | |
abbr_name => 'T', | |
base => 1911, | |
start => 19120730, | |
}, | |
{ | |
name => '明治', # Meiji | |
abbr_name => 'M', | |
base => 1867, | |
start => 18680908, | |
}, | |
); | |
# Usage: CnvNengo(name => @tm) or CnvNeng(abbr_name => @tm) | |
sub CnvNengo { | |
my ( $kind, @tm ) = @_; | |
my $year = $tm[5] + 1900; | |
my $wk = ($year * 10000) + ($tm[4] * 100) + ($tm[3] * 1); | |
#my $wk = sprintf( '%04d%02d%02d', $year, $tm[4], $tm[3] ); | |
foreach my $nengo(@Nengo){ | |
if( $wk >= $nengo->{start} ){ | |
return $nengo->{$kind} . ($year - $nengo->{base}); | |
} | |
} | |
return $year; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_FMTJAPAN | |
$fatpacked{"Spreadsheet/ParseExcel/FmtJapan2.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTJAPAN2'; | |
package Spreadsheet::ParseExcel::FmtJapan2; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Jcode; | |
use Unicode::Map; | |
use base 'Spreadsheet::ParseExcel::FmtJapan'; | |
our $VERSION = '0.65'; | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtJapan2) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $sPkg, %hKey ) = @_; | |
my $oMap = Unicode::Map->new('CP932Excel'); | |
die "NO MAP FILE CP932Excel!!" | |
unless ( -r Unicode::Map->mapping("CP932Excel") ); | |
my $oThis = { | |
Code => $hKey{Code}, | |
_UniMap => $oMap, | |
}; | |
bless $oThis; | |
$oThis->SUPER::new(%hKey); | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan2) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $oThis, $sTxt, $sCode ) = @_; | |
# $sCode = 'sjis' if((! defined($sCode)) || ($sCode eq '_native_')); | |
if ( $oThis->{Code} ) { | |
if ( !defined($sCode) ) { | |
$sTxt =~ s/(.)/\x00$1/sg; | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
} | |
elsif ( $sCode eq 'ucs2' ) { | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
} | |
return Jcode::convert( $sTxt, $oThis->{Code}, 'sjis' ); | |
} | |
else { | |
return $sTxt; | |
} | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_FMTJAPAN2 | |
$fatpacked{"Spreadsheet/ParseExcel/FmtUnicode.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTUNICODE'; | |
package Spreadsheet::ParseExcel::FmtUnicode; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Unicode::Map; | |
use base 'Spreadsheet::ParseExcel::FmtDefault'; | |
our $VERSION = '0.65'; | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtUnicode) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $sPkg, %hKey ) = @_; | |
my $sMap = $hKey{Unicode_Map}; | |
my $oMap; | |
$oMap = Unicode::Map->new($sMap) if $sMap; | |
my $oThis = { | |
Unicode_Map => $sMap, | |
_UniMap => $oMap, | |
}; | |
bless $oThis; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtUnicode) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $oThis, $sTxt, $sCode ) = @_; | |
if ( $oThis->{_UniMap} ) { | |
if ( !defined($sCode) ) { | |
my $sSv = $sTxt; | |
$sTxt =~ s/(.)/\x00$1/sg; | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
$sTxt = $sSv unless ($sTxt); | |
} | |
elsif ( $sCode eq 'ucs2' ) { | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
} | |
# $sTxt = $oThis->{_UniMap}->from_unicode($sTxt) | |
# if(defined($sCode) && $sCode eq 'ucs2'); | |
return $sTxt; | |
} | |
else { | |
return $sTxt; | |
} | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_FMTUNICODE | |
$fatpacked{"Spreadsheet/ParseExcel/Font.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FONT'; | |
package Spreadsheet::ParseExcel::Font; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Font - A class for Cell fonts. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
sub new { | |
my ( $class, %rhIni ) = @_; | |
my $self = \%rhIni; | |
bless $self, $class; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Font - A class for Cell fonts. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_FONT | |
$fatpacked{"Spreadsheet/ParseExcel/Format.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FORMAT'; | |
package Spreadsheet::ParseExcel::Format; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Format - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
sub new { | |
my ( $class, %rhIni ) = @_; | |
my $self = \%rhIni; | |
bless $self, $class; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Format - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_FORMAT | |
$fatpacked{"Spreadsheet/ParseExcel/SaveParser.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_SAVEPARSER'; | |
package Spreadsheet::ParseExcel::SaveParser; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Spreadsheet::ParseExcel; | |
use Spreadsheet::ParseExcel::SaveParser::Workbook; | |
use Spreadsheet::ParseExcel::SaveParser::Worksheet; | |
use Spreadsheet::WriteExcel; | |
use base 'Spreadsheet::ParseExcel'; | |
our $VERSION = '0.65'; | |
############################################################################### | |
# | |
# new() | |
# | |
sub new { | |
my ( $package, %params ) = @_; | |
$package->SUPER::new(%params); | |
} | |
############################################################################### | |
# | |
# Create() | |
# | |
sub Create { | |
my ( $self, $formatter ) = @_; | |
#0. New $workbook | |
my $workbook = Spreadsheet::ParseExcel::Workbook->new(); | |
$workbook->{SheetCount} = 0; | |
# User specified formatter class. | |
if ($formatter) { | |
$workbook->{FmtClass} = $formatter; | |
} | |
else { | |
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new(); | |
} | |
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook); | |
} | |
############################################################################### | |
# | |
# Parse() | |
# | |
sub Parse { | |
my ( $self, $sFile, $formatter ) = @_; | |
my $workbook = $self->SUPER::Parse( $sFile, $formatter ); | |
return undef unless defined $workbook; | |
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook); | |
} | |
############################################################################### | |
# | |
# SaveAs() | |
# | |
sub SaveAs { | |
my ( $self, $workbook, $filename ) = @_; | |
$workbook->SaveAs($filename); | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file. | |
=head1 SYNOPSIS | |
Say we start with an Excel file that looks like this: | |
----------------------------------------------------- | |
| | A | B | C | | |
----------------------------------------------------- | |
| 1 | Hello | ... | ... | ... | |
| 2 | World | ... | ... | ... | |
| 3 | *Bold text* | ... | ... | ... | |
| 4 | ... | ... | ... | ... | |
| 5 | ... | ... | ... | ... | |
Then we process it with the following program: | |
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Spreadsheet::ParseExcel; | |
use Spreadsheet::ParseExcel::SaveParser; | |
# Open an existing file with SaveParser | |
my $parser = Spreadsheet::ParseExcel::SaveParser->new(); | |
my $template = $parser->Parse('template.xls'); | |
# Get the first worksheet. | |
my $worksheet = $template->worksheet(0); | |
my $row = 0; | |
my $col = 0; | |
# Overwrite the string in cell A1 | |
$worksheet->AddCell( $row, $col, 'New string' ); | |
# Add a new string in cell B1 | |
$worksheet->AddCell( $row, $col + 1, 'Newer' ); | |
# Add a new string in cell C1 with the format from cell A3. | |
my $cell = $worksheet->get_cell( $row + 2, $col ); | |
my $format_number = $cell->{FormatNo}; | |
$worksheet->AddCell( $row, $col + 2, 'Newest', $format_number ); | |
# Write over the existing file or write a new file. | |
$template->SaveAs('newfile.xls'); | |
We should now have an Excel file that looks like this: | |
----------------------------------------------------- | |
| | A | B | C | | |
----------------------------------------------------- | |
| 1 | New string | Newer | *Newest* | ... | |
| 2 | World | ... | ... | ... | |
| 3 | *Bold text* | ... | ... | ... | |
| 4 | ... | ... | ... | ... | |
| 5 | ... | ... | ... | ... | |
=head1 DESCRIPTION | |
The C<Spreadsheet::ParseExcel::SaveParser> module rewrite an existing Excel file by reading it with C<Spreadsheet::ParseExcel> and rewriting it with C<Spreadsheet::WriteExcel>. | |
=head1 METHODS | |
=head1 Parser | |
=head2 new() | |
$parse = new Spreadsheet::ParseExcel::SaveParser(); | |
Constructor. | |
=head2 Parse() | |
$workbook = $parse->Parse($sFileName); | |
$workbook = $parse->Parse($sFileName , $formatter); | |
Returns a L</Workbook> object. If an error occurs, returns undef. | |
The optional C<$formatter> is a Formatter Class to format the value of cells. | |
=head1 Workbook | |
The C<Parse()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Workbook> object. | |
This is a subclass of the L<Spreadsheet::ParseExcel::Workbook> and has the following methods: | |
=head2 worksheets() | |
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook: | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
=head2 worksheet() | |
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
Returns C<undef> if the sheet name or index doesn't exist. | |
=head2 AddWorksheet() | |
$workbook = $workbook->AddWorksheet($name, %properties); | |
Create a new Worksheet object of type C<Spreadsheet::ParseExcel::Worksheet>. | |
The C<%properties> hash contains the properties of new Worksheet. | |
=head2 AddFont | |
$workbook = $workbook->AddFont(%properties); | |
Create new Font object of type C<Spreadsheet::ParseExcel::Font>. | |
The C<%properties> hash contains the properties of new Font. | |
=head2 AddFormat | |
$workbook = $workbook->AddFormat(%properties); | |
The C<%properties> hash contains the properties of new Font. | |
=head1 Worksheet | |
Spreadsheet::ParseExcel::SaveParser::Worksheet | |
Worksheet is a subclass of Spreadsheet::ParseExcel::Worksheet. | |
And has these methods : | |
The C<Worksbook::worksheet()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Worksheet> object. | |
This is a subclass of the L<Spreadsheet::ParseExcel::Worksheet> and has the following methods: | |
=head1 AddCell | |
$workbook = $worksheet->AddCell($row, $col, $value, $format [$encoding]); | |
Create new Cell object of type C<Spreadsheet::ParseExcel::Cell>. | |
The C<$format> parameter is the format number rather than a full format object. | |
To specify just same as another cell, | |
you can set it like below: | |
$row = 0; | |
$col = 0; | |
$worksheet = $template->worksheet(0); | |
$cell = $worksheet->get_cell( $row, $col ); | |
$format_number = $cell->{FormatNo}; | |
$worksheet->AddCell($row +1, $coll, 'New data', $format_number); | |
=head1 TODO | |
Please note that this module is currently (versions 0.50-0.60) undergoing a major | |
restructuring and rewriting. | |
=head1 Known Problems | |
You can only rewrite the features that Spreadsheet::WriteExcel supports so | |
macros, graphs and some other features in the original Excel file will be lost. | |
Also, formulas aren't rewritten, only the result of a formula is written. | |
Only last print area will remain. (Others will be removed) | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2002 Kawai Takanori and Nippon-RAD Co. OP Division | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_SAVEPARSER | |
$fatpacked{"Spreadsheet/ParseExcel/SaveParser/Workbook.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKBOOK'; | |
package Spreadsheet::ParseExcel::SaveParser::Workbook; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use base 'Spreadsheet::ParseExcel::Workbook'; | |
our $VERSION = '0.65'; | |
#============================================================================== | |
# Spreadsheet::ParseExcel::SaveParser::Workbook | |
#============================================================================== | |
sub new { | |
my ( $sPkg, $oBook ) = @_; | |
return undef unless ( defined $oBook ); | |
my %oThis = %$oBook; | |
bless \%oThis, $sPkg; | |
# re-bless worksheets (and set their _Book properties !!!) | |
my $sWkP = ref($sPkg) || "$sPkg"; | |
$sWkP =~ s/Workbook$/Worksheet/; | |
map { bless( $_, $sWkP ); } @{ $oThis{Worksheet} }; | |
map { $_->{_Book} = \%oThis; } @{ $oThis{Worksheet} }; | |
return \%oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# Parse (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub Parse { | |
my ( $sClass, $sFile, $oWkFmt ) = @_; | |
my $oBook = Spreadsheet::ParseExcel::Workbook->Parse( $sFile, $oWkFmt ); | |
bless $oBook, $sClass; | |
# re-bless worksheets (and set their _Book properties !!!) | |
my $sWkP = ref($sClass) || "$sClass"; | |
$sWkP =~ s/Workbook$/Worksheet/; | |
map { bless( $_, $sWkP ); } @{ $oBook->{Worksheet} }; | |
map { $_->{_Book} = $oBook; } @{ $oBook->{Worksheet} }; | |
return $oBook; | |
} | |
#------------------------------------------------------------------------------ | |
# SaveAs (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub SaveAs { | |
my ( $oBook, $sName ) = @_; | |
# Create a new Excel workbook | |
my $oWrEx = Spreadsheet::WriteExcel->new($sName); | |
$oWrEx->compatibility_mode(); | |
my %hFmt; | |
my $iNo = 0; | |
my @aAlH = ( | |
'left', 'left', 'center', 'right', | |
'fill', 'justify', 'merge', 'equal_space' | |
); | |
my @aAlV = ( 'top', 'vcenter', 'bottom', 'vjustify', 'vequal_space' ); | |
foreach my $pFmt ( @{ $oBook->{Format} } ) { | |
my $oFmt = $oWrEx->addformat(); # Add Formats | |
unless ( $pFmt->{Style} ) { | |
$hFmt{$iNo} = $oFmt; | |
my $rFont = $pFmt->{Font}; | |
$oFmt->set_font( $rFont->{Name} ); | |
$oFmt->set_size( $rFont->{Height} ); | |
$oFmt->set_color( $rFont->{Color} ); | |
$oFmt->set_bold( $rFont->{Bold} ); | |
$oFmt->set_italic( $rFont->{Italic} ); | |
$oFmt->set_underline( $rFont->{Underline} ); | |
$oFmt->set_font_strikeout( $rFont->{Strikeout} ); | |
$oFmt->set_font_script( $rFont->{Super} ); | |
$oFmt->set_hidden( $rFont->{Hidden} ); #Add | |
$oFmt->set_locked( $pFmt->{Lock} ); | |
$oFmt->set_align( $aAlH[ $pFmt->{AlignH} ] ); | |
$oFmt->set_align( $aAlV[ $pFmt->{AlignV} ] ); | |
$oFmt->set_rotation( $pFmt->{Rotate} ); | |
$oFmt->set_num_format( | |
$oBook->{FmtClass}->FmtStringDef( $pFmt->{FmtIdx}, $oBook ) ); | |
$oFmt->set_text_wrap( $pFmt->{Wrap} ); | |
$oFmt->set_pattern( $pFmt->{Fill}->[0] ); | |
$oFmt->set_fg_color( $pFmt->{Fill}->[1] ) | |
if ( ( $pFmt->{Fill}->[1] >= 8 ) | |
&& ( $pFmt->{Fill}->[1] <= 63 ) ); | |
$oFmt->set_bg_color( $pFmt->{Fill}->[2] ) | |
if ( ( $pFmt->{Fill}->[2] >= 8 ) | |
&& ( $pFmt->{Fill}->[2] <= 63 ) ); | |
$oFmt->set_left( | |
( $pFmt->{BdrStyle}->[0] > 7 ) ? 3 : $pFmt->{BdrStyle}->[0] ); | |
$oFmt->set_right( | |
( $pFmt->{BdrStyle}->[1] > 7 ) ? 3 : $pFmt->{BdrStyle}->[1] ); | |
$oFmt->set_top( | |
( $pFmt->{BdrStyle}->[2] > 7 ) ? 3 : $pFmt->{BdrStyle}->[2] ); | |
$oFmt->set_bottom( | |
( $pFmt->{BdrStyle}->[3] > 7 ) ? 3 : $pFmt->{BdrStyle}->[3] ); | |
$oFmt->set_left_color( $pFmt->{BdrColor}->[0] ) | |
if ( ( $pFmt->{BdrColor}->[0] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[0] <= 63 ) ); | |
$oFmt->set_right_color( $pFmt->{BdrColor}->[1] ) | |
if ( ( $pFmt->{BdrColor}->[1] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[1] <= 63 ) ); | |
$oFmt->set_top_color( $pFmt->{BdrColor}->[2] ) | |
if ( ( $pFmt->{BdrColor}->[2] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[2] <= 63 ) ); | |
$oFmt->set_bottom_color( $pFmt->{BdrColor}->[3] ) | |
if ( ( $pFmt->{BdrColor}->[3] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[3] <= 63 ) ); | |
} | |
$iNo++; | |
} | |
for ( my $iSheet = 0 ; $iSheet < $oBook->{SheetCount} ; $iSheet++ ) { | |
my $oWkS = $oBook->{Worksheet}[$iSheet]; | |
my $oWrS = $oWrEx->addworksheet( $oWkS->{Name} ); | |
#Landscape | |
if ( !$oWkS->{Landscape} ) { # Landscape (0:Horizontal, 1:Vertical) | |
$oWrS->set_landscape(); | |
} | |
else { | |
$oWrS->set_portrait(); | |
} | |
#Protect | |
if ( defined $oWkS->{Protect} ) | |
{ # Protect ('':NoPassword, Password:Password) | |
if ( $oWkS->{Protect} ne '' ) { | |
$oWrS->protect( $oWkS->{Protect} ); | |
} | |
else { | |
$oWrS->protect(); | |
} | |
} | |
if ( $oWkS->{Scale} != 100 ) { | |
# Pages on fit with width and Heigt | |
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} ); | |
#Print Scale and reset FitWidth/FitHeight | |
$oWrS->set_print_scale( $oWkS->{Scale} ); | |
} | |
else { | |
#Print Scale | |
$oWrS->set_print_scale( $oWkS->{Scale} ); | |
# Pages on fit with width and Heigt | |
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} ); | |
} | |
# Paper Size | |
$oWrS->set_paper( $oWkS->{PaperSize} ); | |
# Margin | |
$oWrS->set_margin_left( $oWkS->{LeftMargin} ); | |
$oWrS->set_margin_right( $oWkS->{RightMargin} ); | |
$oWrS->set_margin_top( $oWkS->{TopMargin} ); | |
$oWrS->set_margin_bottom( $oWkS->{BottomMargin} ); | |
# HCenter | |
$oWrS->center_horizontally() if ( $oWkS->{HCenter} ); | |
# VCenter | |
$oWrS->center_vertically() if ( $oWkS->{VCenter} ); | |
# Header, Footer | |
$oWrS->set_header( $oWkS->{Header}, $oWkS->{HeaderMargin} ); | |
$oWrS->set_footer( $oWkS->{Footer}, $oWkS->{FooterMargin} ); | |
# Print Area | |
if ( ref( $oBook->{PrintArea}[$iSheet] ) eq 'ARRAY' ) { | |
my $raP; | |
for $raP ( @{ $oBook->{PrintArea}[$iSheet] } ) { | |
$oWrS->print_area(@$raP); | |
} | |
} | |
# Print Title | |
my $raW; | |
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Row} } ) { | |
$oWrS->repeat_rows(@$raW); | |
} | |
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Column} } ) { | |
$oWrS->repeat_columns(@$raW); | |
} | |
# Print Gridlines | |
if ( $oWkS->{PrintGrid} == 1 ) { | |
$oWrS->hide_gridlines(0); | |
} | |
else { | |
$oWrS->hide_gridlines(1); | |
} | |
# Print Headings | |
if ( $oWkS->{PrintHeaders} ) { | |
$oWrS->print_row_col_headers(); | |
} | |
# Horizontal Page Breaks | |
$oWrS->set_h_pagebreaks( @{ $oWkS->{HPageBreak} } ); | |
# Veritical Page Breaks | |
$oWrS->set_v_pagebreaks( @{ $oWkS->{VPageBreak} } ); | |
# PageStart => $oWkS->{PageStart}, # Page number for start | |
# UsePage => $oWkS->{UsePage}, # Use own start page number | |
# NoColor => $oWkS->{NoColor}, # Print in black-white | |
# Draft => $oWkS->{Draft}, # Print in draft mode | |
# Notes => $oWkS->{Notes}, # Print notes | |
# LeftToRight => $oWkS->{LeftToRight}, # Left to Right | |
for ( | |
my $iC = $oWkS->{MinCol} ; | |
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ; | |
$iC++ | |
) | |
{ | |
if ( defined $oWkS->{ColWidth}[$iC] ) { | |
if ( $oWkS->{ColWidth}[$iC] > 0 ) { | |
$oWrS->set_column( $iC, $iC, $oWkS->{ColWidth}[$iC] ) | |
; #, undef, 1) ; | |
} | |
else { | |
$oWrS->set_column( $iC, $iC, 0, undef, 1 ); | |
} | |
} | |
} | |
my $merged_areas = $oWkS->get_merged_areas(); | |
my $merged_areas_h = {}; | |
if ($merged_areas) { | |
foreach my $range (@$merged_areas) { | |
$merged_areas_h->{$range->[0]}{$range->[1]} = $range; | |
} | |
} | |
for ( | |
my $iR = $oWkS->{MinRow} ; | |
defined $oWkS->{MaxRow} && $iR <= $oWkS->{MaxRow} ; | |
$iR++ | |
) | |
{ | |
$oWrS->set_row( $iR, $oWkS->{RowHeight}[$iR] ); | |
for ( | |
my $iC = $oWkS->{MinCol} ; | |
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ; | |
$iC++ | |
) | |
{ | |
my $oWkC = $oWkS->{Cells}[$iR][$iC]; | |
if ($oWkC) { | |
if ( $oWkC->{Merged} and exists $merged_areas_h->{$iR}{$iC} ) { | |
my $oFmtN = $oWrEx->addformat(); | |
$oFmtN->copy( $hFmt{ $oWkC->{FormatNo} } ); | |
$oWrS->merge_range ( | |
@{$merged_areas_h->{$iR}{$iC}}, | |
$oBook->{FmtClass} | |
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ), | |
$oFmtN | |
); | |
} | |
else { | |
$oWrS->write( | |
$iR, | |
$iC, | |
$oBook->{FmtClass} | |
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ), | |
$hFmt{ $oWkC->{FormatNo} } | |
); | |
} | |
} | |
} | |
} | |
} | |
return $oWrEx; | |
} | |
#------------------------------------------------------------------------------ | |
# AddWorksheet (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddWorksheet { | |
my ( $oBook, $sName, %hAttr ) = @_; | |
$oBook->AddFormat if ( $#{ $oBook->{Format} } < 0 ); | |
$hAttr{Name} ||= $sName; | |
$hAttr{LeftMargin} ||= 0; | |
$hAttr{RightMargin} ||= 0; | |
$hAttr{TopMargin} ||= 0; | |
$hAttr{BottomMargin} ||= 0; | |
$hAttr{HeaderMargin} ||= 0; | |
$hAttr{FooterMargin} ||= 0; | |
$hAttr{FitWidth} ||= 0; | |
$hAttr{FitHeight} ||= 0; | |
$hAttr{PrintGrid} ||= 0; | |
my $oWkS = Spreadsheet::ParseExcel::SaveParser::Worksheet->new(%hAttr); | |
$oWkS->{_Book} = $oBook; | |
$oWkS->{_SheetNo} = $oBook->{SheetCount}; | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = $oWkS; | |
$oBook->{SheetCount}++; | |
return $oWkS; #$oBook->{SheetCount} - 1; | |
} | |
#------------------------------------------------------------------------------ | |
# AddFont (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddFont { | |
my ( $oBook, %hAttr ) = @_; | |
$hAttr{Name} ||= 'Arial'; | |
$hAttr{Height} ||= 10; | |
$hAttr{Bold} ||= 0; | |
$hAttr{Italic} ||= 0; | |
$hAttr{Underline} ||= 0; | |
$hAttr{Strikeout} ||= 0; | |
$hAttr{Super} ||= 0; | |
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new(%hAttr); | |
return $#{ $oBook->{Font} }; | |
} | |
#------------------------------------------------------------------------------ | |
# AddFormat (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddFormat { | |
my ( $oBook, %hAttr ) = @_; | |
$hAttr{Fill} ||= [ 0, 0, 0 ]; | |
$hAttr{BdrStyle} ||= [ 0, 0, 0, 0 ]; | |
$hAttr{BdrColor} ||= [ 0, 0, 0, 0 ]; | |
$hAttr{AlignH} ||= 0; | |
$hAttr{AlignV} ||= 0; | |
$hAttr{Rotate} ||= 0; | |
$hAttr{Landscape} ||= 0; | |
$hAttr{FmtIdx} ||= 0; | |
if ( !defined( $hAttr{Font} ) ) { | |
my $oFont; | |
if ( defined $hAttr{FontNo} ) { | |
$oFont = $oBook->{Font}[ $hAttr{FontNo} ]; | |
} | |
elsif ( !defined $oFont ) { | |
if ( $#{ $oBook->{Font} } >= 0 ) { | |
$oFont = $oBook->{Font}[0]; | |
} | |
else { | |
my $iNo = $oBook->AddFont; | |
$oFont = $oBook->{Font}[$iNo]; | |
} | |
} | |
$hAttr{Font} = $oFont; | |
} | |
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new(%hAttr); | |
return $#{ $oBook->{Format} }; | |
} | |
#------------------------------------------------------------------------------ | |
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddCell { | |
my ( $oBook, $iSheet, $iR, $iC, $sVal, $oCell, $sCode ) = @_; | |
my %rhKey; | |
$oCell ||= $oBook->{Worksheet}[$iSheet] | |
->{Cells}[$iR][$iC]->{FormatNo} || 0; | |
my $iFmt = | |
( UNIVERSAL::isa( $oCell, 'Spreadsheet::ParseExcel::Cell' ) ) | |
? $oCell->{FormatNo} | |
: ( ref($oCell) ) ? 0 | |
: $oCell + 0; | |
$rhKey{FormatNo} = $iFmt; | |
$rhKey{Format} = $oBook->{Format}[$iFmt]; | |
$rhKey{Val} = $sVal; | |
$rhKey{Code} = $sCode || '_native_'; | |
$oBook->{_CurSheet} = $iSheet; | |
my $merged_areas = $oBook->{Worksheet}[$iSheet]->get_merged_areas(); | |
my $merged_areas_h = {}; | |
if ($merged_areas) { | |
foreach my $range (@$merged_areas) { | |
$merged_areas_h->{$range->[0]}{$range->[1]} = $range; | |
} | |
} | |
my $oNewCell = | |
Spreadsheet::ParseExcel::_NewCell( $oBook, $iR, $iC, %rhKey ); | |
Spreadsheet::ParseExcel::_SetDimension( $oBook, $iR, $iC, $iC ); | |
$oNewCell->{Merged} = 1 | |
if exists $merged_areas_h->{$iR}{$iC}; | |
return $oNewCell; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKBOOK | |
$fatpacked{"Spreadsheet/ParseExcel/SaveParser/Worksheet.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKSHEET'; | |
package Spreadsheet::ParseExcel::SaveParser::Worksheet; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
#============================================================================== | |
# Spreadsheet::ParseExcel::SaveParser::Worksheet | |
#============================================================================== | |
use base 'Spreadsheet::ParseExcel::Worksheet'; | |
our $VERSION = '0.65'; | |
sub new { | |
my ( $sClass, %rhIni ) = @_; | |
$sClass->SUPER::new(%rhIni); # returns object | |
} | |
#------------------------------------------------------------------------------ | |
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Worksheet) | |
#------------------------------------------------------------------------------ | |
sub AddCell { | |
my ( $oSelf, $iR, $iC, $sVal, $oCell, $sCode ) = @_; | |
$oSelf->{_Book} | |
->AddCell( $oSelf->{_SheetNo}, $iR, $iC, $sVal, $oCell, $sCode ); | |
} | |
#------------------------------------------------------------------------------ | |
# Protect (for Spreadsheet::ParseExcel::SaveParser::Worksheet) | |
# - Password = undef -> No protect | |
# - Password = '' -> Protected. No password | |
# - Password = $pwd -> Protected. Password = $pwd | |
#------------------------------------------------------------------------------ | |
sub Protect { | |
my ( $oSelf, $sPassword ) = @_; | |
$oSelf->{Protect} = $sPassword; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKSHEET | |
$fatpacked{"Spreadsheet/ParseExcel/Utility.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_UTILITY'; | |
package Spreadsheet::ParseExcel::Utility; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Utility - Utility functions for ParseExcel. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
require Exporter; | |
use vars qw(@ISA @EXPORT_OK); | |
@ISA = qw(Exporter); | |
@EXPORT_OK = qw(ExcelFmt LocaltimeExcel ExcelLocaltime | |
col2int int2col sheetRef xls2csv); | |
our $VERSION = '0.65'; | |
my $qrNUMBER = qr/(^[+-]?\d+(\.\d+)?$)|(^[+-]?\d+\.?(\d*)[eE][+-](\d+))$/; | |
############################################################################### | |
# | |
# ExcelFmt() | |
# | |
# This function takes an Excel style number format and converts a number into | |
# that format. for example: 'hh:mm:ss AM/PM' + 0.01023148 = '12:14:44 AM'. | |
# | |
# It does this with a type of templating mechanism. The format string is parsed | |
# to identify tokens that need to be replaced and their position within the | |
# string is recorded. These can be thought of as placeholders. The number is | |
# then converted to the required formats and substituted into the placeholders. | |
# | |
# Interested parties should refer to the Excel documentation on cell formats for | |
# more information: http://office.microsoft.com/en-us/excel/HP051995001033.aspx | |
# The Microsoft documentation for the Excel Binary File Format, [MS-XLS].pdf, | |
# also contains a ABNF grammar for number format strings. | |
# | |
# Maintainers notes: | |
# ================== | |
# | |
# Note on format subsections: | |
# A format string can contain 4 possible sub-sections separated by semi-colons: | |
# Positive numbers, negative numbers, zero values, and text. | |
# For example: _(* #,##0_);_(* (#,##0);_(* "-"_);_(@_) | |
# | |
# Note on conditional formats. | |
# A number format in Excel can have a conditional expression such as: | |
# [>9999999](000)000-0000;000-0000 | |
# This is equivalent to the following in Perl: | |
# $format = $number > 9999999 ? '(000)000-0000' : '000-0000'; | |
# Nested conditionals are also possible but we don't support them. | |
# | |
# Efficiency: The excessive use of substr() isn't very efficient. However, | |
# it probably doesn't merit rewriting this function with a parser or regular | |
# expressions and \G. | |
# | |
# TODO: I think the single quote handling may not be required. Check. | |
# | |
sub ExcelFmt { | |
my ( $format_str, $number, $is_1904, $number_type, $want_subformats ) = @_; | |
# Return text strings without further formatting. | |
return $number unless $number =~ $qrNUMBER; | |
# Handle OpenOffice.org GENERAL format. | |
$format_str = '@' if uc($format_str) eq "GENERAL"; | |
# Check for a conditional at the start of the format. See notes above. | |
my $conditional; | |
if ( $format_str =~ /^\[([<>=][^\]]+)\](.*)$/ ) { | |
$conditional = $1; | |
$format_str = $2; | |
} | |
# Ignore the underscore token which is used to indicate a padding space. | |
$format_str =~ s/_/ /g; | |
# Split the format string into 4 possible sub-sections: positive numbers, | |
# negative numbers, zero values, and text. See notes above. | |
my @formats; | |
my $section = 0; | |
my $double_quote = 0; | |
my $single_quote = 0; | |
# Initial parsing of the format string to remove escape characters. This | |
# also handles quoted strings. See note about single quotes above. | |
CHARACTER: | |
for my $char ( split //, $format_str ) { | |
if ( $double_quote or $single_quote ) { | |
$formats[$section] .= $char; | |
$double_quote = 0 if $char eq '"'; | |
$single_quote = 0; | |
next CHARACTER; | |
} | |
if ( $char eq ';' ) { | |
$section++; | |
next CHARACTER; | |
} | |
elsif ( $char eq '"' ) { | |
$double_quote = 1; | |
} | |
elsif ( $char eq '!' ) { | |
$single_quote = 1; | |
} | |
elsif ( $char eq '\\' ) { | |
$single_quote = 1; | |
} | |
elsif ( $char eq '(' ) { | |
next CHARACTER; # Ignore. | |
} | |
elsif ( $char eq ')' ) { | |
next CHARACTER; # Ignore. | |
} | |
# Convert upper case OpenOffice.org date/time formats to lowercase.. | |
$char = lc($char) if $char =~ /[DMYHS]/; | |
$formats[$section] .= $char; | |
} | |
# Select the appropriate format from the 4 possible sub-sections: | |
# positive numbers, negative numbers, zero values, and text. | |
# We ignore the Text section since non-numeric values are returned | |
# unformatted at the start of the function. | |
my $format; | |
$section = 0; | |
if ( @formats == 1 ) { | |
$section = 0; | |
} | |
elsif ( @formats == 2 ) { | |
if ( $number < 0 ) { | |
$section = 1; | |
} | |
else { | |
$section = 0; | |
} | |
} | |
elsif ( @formats == 3 ) { | |
if ( $number == 0 ) { | |
$section = 2; | |
} | |
elsif ( $number < 0 ) { | |
$section = 1; | |
} | |
else { | |
$section = 0; | |
} | |
} | |
else { | |
$section = 0; | |
} | |
# Override the previous choice if the format is conditional. | |
if ($conditional) { | |
# TODO. Replace string eval with a function. | |
$section = eval "$number $conditional" ? 0 : 1; | |
} | |
# We now have the required format. | |
$format = $formats[$section]; | |
# The format string can contain one of the following colours: | |
# [Black] [Blue] [Cyan] [Green] [Magenta] [Red] [White] [Yellow] | |
# or the string [ColorX] where x is a colour index from 1 to 56. | |
# We don't use the colour but we return it to the caller. | |
# | |
my $color = ''; | |
if ( $format =~ s/^(\[[A-Za-z]{3,}(\d{1,2})?\])// ) { | |
$color = $1; | |
} | |
# Remove the locale, such as [$-409], from the format string. | |
my $locale = ''; | |
if ( $format =~ s/^(\[\$?-F?\d+\])// ) { | |
$locale = $1; | |
} | |
# Replace currency locale, such as [$$-409], with $ in the format string. | |
# See the RT#60547 test cases in 21_number_format_user.t. | |
if ( $format =~ s/(\[\$([^-]+)(-\d+)?\])/$2/s ) { | |
$locale = $1; | |
} | |
# Remove leading # from '# ?/?', '# ??/??' fraction formats. | |
$format =~ s{# \?}{?}g; | |
# Parse the format string and create an AoA of placeholders that contain | |
# the parts of the string to be replaced. The format of the information | |
# stored is: [ $token, $start_pos, $end_pos, $option_info ]. | |
# | |
my $format_mode = ''; # Either: '', 'number', 'date' | |
my $pos = 0; # Character position within format string. | |
my @placeholders = (); # Arefs with parts of the format to be replaced. | |
my $token = ''; # The actual format extracted from the total str. | |
my $start_pos; # A position variable. Initial parser position. | |
my $token_start = -1; # A position variable. | |
my $decimal_pos = -1; # Position of the punctuation char "." or ",". | |
my $comma_count = 0; # Count of the commas in the format. | |
my $is_fraction = 0; # Number format is a fraction. | |
my $is_currency = 0; # Number format is a currency. | |
my $is_percent = 0; # Number format is a percentage. | |
my $is_12_hour = 0; # Time format is using 12 hour clock. | |
my $seen_dot = 0; # Treat only the first "." as the decimal point. | |
# Parse the format. | |
PARSER: | |
while ( $pos < length $format ) { | |
$start_pos = $pos; | |
my $char = substr( $format, $pos, 1 ); | |
# Ignore control format characters such as '#0+-.?eE,%'. However, | |
# only ignore '.' if it is the first one encountered. RT 45502. | |
if ( ( !$seen_dot && $char !~ /[#0\+\-\.\?eE\,\%]/ ) | |
|| $char !~ /[#0\+\-\?eE\,\%]/ ) | |
{ | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start ), | |
$decimal_pos, $pos - $token_start | |
]; | |
$token_start = -1; | |
} | |
} | |
# Processing for quoted strings within the format. See notes above. | |
if ( $char eq '"' ) { | |
$double_quote = $double_quote ? 0 : 1; | |
$pos++; | |
next PARSER; | |
} | |
elsif ( $char eq '!' ) { | |
$single_quote = 1; | |
$pos++; | |
next PARSER; | |
} | |
elsif ( $char eq '\\' ) { | |
if ( $single_quote != 1 ) { | |
$single_quote = 1; | |
$pos++; | |
next PARSER; | |
} | |
} | |
if ( ( defined($double_quote) and ($double_quote) ) | |
or ( defined($single_quote) and ($single_quote) ) | |
or ( $seen_dot && $char eq '.' ) ) | |
{ | |
$single_quote = 0; | |
if ( | |
( $format_mode ne 'date' ) | |
and ( ( substr( $format, $pos, 2 ) eq "\x81\xA2" ) | |
|| ( substr( $format, $pos, 2 ) eq "\x81\xA3" ) | |
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA4" ) | |
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA5" ) ) | |
) | |
{ | |
# The above matches are currency symbols. | |
push @placeholders, | |
[ substr( $format, $pos, 2 ), length($token), 2 ]; | |
$is_currency = 1; | |
$pos += 2; | |
} | |
else { | |
$pos++; | |
} | |
} | |
elsif ( | |
( $char =~ /[#0\+\.\?eE\,\%]/ ) | |
|| ( ( $format_mode ne 'date' ) | |
and ( ( $char eq '-' ) || ( $char eq '(' ) || ( $char eq ')' ) ) | |
) | |
) | |
{ | |
$format_mode = 'number' unless $format_mode; | |
if ( substr( $format, $pos, 1 ) =~ /[#0]/ ) { | |
if ( | |
substr( $format, $pos ) =~ | |
/^([#0]+[\.]?[0#]*[eE][\+\-][0#]+)/ ) | |
{ | |
push @placeholders, [ $1, $pos, length($1) ]; | |
$pos += length($1); | |
} | |
else { | |
if ( $token_start == -1 ) { | |
$token_start = $pos; | |
$decimal_pos = length($token); | |
} | |
} | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '?' ) { | |
# Look for a fraction format like ?/? or ??/?? | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( | |
$format, $token_start, $pos - $token_start + 1 | |
), | |
$decimal_pos, | |
$pos - $token_start + 1 | |
]; | |
} | |
$token_start = $pos; | |
# Find the end of the fraction format. | |
FRACTION: | |
while ( $pos < length($format) ) { | |
if ( substr( $format, $pos, 1 ) eq '/' ) { | |
$is_fraction = 1; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '?' ) { | |
$pos++; | |
next FRACTION; | |
} | |
else { | |
if ( $is_fraction | |
&& ( substr( $format, $pos, 1 ) =~ /[0-9]/ ) ) | |
{ | |
# TODO: Could invert if() logic and remove this. | |
$pos++; | |
next FRACTION; | |
} | |
else { | |
last FRACTION; | |
} | |
} | |
$pos++; | |
} | |
$pos--; | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start + 1 ), | |
length($token), $pos - $token_start + 1 | |
]; | |
$token_start = -1; | |
} | |
elsif ( substr( $format, $pos, 3 ) =~ /^[eE][\+\-][0#]$/ ) { | |
if ( substr( $format, $pos ) =~ /([eE][\+\-][0#]+)/ ) { | |
push @placeholders, [ $1, $pos, length($1) ]; | |
$pos += length($1); | |
} | |
$token_start = -1; | |
} | |
else { | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start ), | |
$decimal_pos, $pos - $token_start | |
]; | |
$token_start = -1; | |
} | |
if ( substr( $format, $pos, 1 ) =~ /[\+\-]/ ) { | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
$is_currency = 1; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '.' ) { | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
$seen_dot = 1; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq ',' ) { | |
$comma_count++; | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '%' ) { | |
$is_percent = 1; | |
} | |
elsif (( substr( $format, $pos, 1 ) eq '(' ) | |
|| ( substr( $format, $pos, 1 ) eq ')' ) ) | |
{ | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
$is_currency = 1; | |
} | |
} | |
$pos++; | |
} | |
elsif ( $char =~ /[ymdhsapg]/i ) { | |
$format_mode = 'date' unless $format_mode; | |
if ( substr( $format, $pos, 5 ) =~ /am\/pm/i ) { | |
push @placeholders, [ 'am/pm', length($token), 5 ]; | |
$is_12_hour = 1; | |
$pos += 5; | |
} | |
elsif ( substr( $format, $pos, 3 ) =~ /a\/p/i ) { | |
push @placeholders, [ 'a/p', length($token), 3 ]; | |
$is_12_hour = 1; | |
$pos += 3; | |
} | |
elsif ( substr( $format, $pos, 5 ) eq 'mmmmm' ) { | |
push @placeholders, [ 'mmmmm', length($token), 5 ]; | |
$pos += 5; | |
} | |
elsif (( substr( $format, $pos, 4 ) eq 'mmmm' ) | |
|| ( substr( $format, $pos, 4 ) eq 'dddd' ) | |
|| ( substr( $format, $pos, 4 ) eq 'yyyy' ) | |
|| ( substr( $format, $pos, 4 ) eq 'ggge' ) ) | |
{ | |
push @placeholders, | |
[ substr( $format, $pos, 4 ), length($token), 4 ]; | |
$pos += 4; | |
} | |
elsif (( substr( $format, $pos, 3 ) eq 'ddd' ) | |
|| ( substr( $format, $pos, 3 ) eq 'mmm' ) | |
|| ( substr( $format, $pos, 3 ) eq 'yyy' ) ) | |
{ | |
push @placeholders, | |
[ substr( $format, $pos, 3 ), length($token), 3 ]; | |
$pos += 3; | |
} | |
elsif (( substr( $format, $pos, 2 ) eq 'yy' ) | |
|| ( substr( $format, $pos, 2 ) eq 'mm' ) | |
|| ( substr( $format, $pos, 2 ) eq 'dd' ) | |
|| ( substr( $format, $pos, 2 ) eq 'hh' ) | |
|| ( substr( $format, $pos, 2 ) eq 'ss' ) | |
|| ( substr( $format, $pos, 2 ) eq 'ge' ) ) | |
{ | |
if ( | |
( substr( $format, $pos, 2 ) eq 'mm' ) | |
&& (@placeholders) | |
&& ( ( $placeholders[-1]->[0] eq 'h' ) | |
or ( $placeholders[-1]->[0] eq 'hh' ) ) | |
) | |
{ | |
# For this case 'm' is minutes not months. | |
push @placeholders, [ 'mm', length($token), 2, 'minutes' ]; | |
} | |
else { | |
push @placeholders, | |
[ substr( $format, $pos, 2 ), length($token), 2 ]; | |
} | |
if ( ( substr( $format, $pos, 2 ) eq 'ss' ) | |
&& ( @placeholders > 1 ) ) | |
{ | |
if ( ( $placeholders[-2]->[0] eq 'm' ) | |
|| ( $placeholders[-2]->[0] eq 'mm' ) ) | |
{ | |
# For this case 'm' is minutes not months. | |
push( @{ $placeholders[-2] }, 'minutes' ); | |
} | |
} | |
$pos += 2; | |
} | |
elsif (( substr( $format, $pos, 1 ) eq 'm' ) | |
|| ( substr( $format, $pos, 1 ) eq 'd' ) | |
|| ( substr( $format, $pos, 1 ) eq 'h' ) | |
|| ( substr( $format, $pos, 1 ) eq 's' ) ) | |
{ | |
if ( | |
( substr( $format, $pos, 1 ) eq 'm' ) | |
&& (@placeholders) | |
&& ( ( $placeholders[-1]->[0] eq 'h' ) | |
or ( $placeholders[-1]->[0] eq 'hh' ) ) | |
) | |
{ | |
# For this case 'm' is minutes not months. | |
push @placeholders, [ 'm', length($token), 1, 'minutes' ]; | |
} | |
else { | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
} | |
if ( ( substr( $format, $pos, 1 ) eq 's' ) | |
&& ( @placeholders > 1 ) ) | |
{ | |
if ( ( $placeholders[-2]->[0] eq 'm' ) | |
|| ( $placeholders[-2]->[0] eq 'mm' ) ) | |
{ | |
# For this case 'm' is minutes not months. | |
push( @{ $placeholders[-2] }, 'minutes' ); | |
} | |
} | |
$pos += 1; | |
} | |
} | |
elsif ( ( substr( $format, $pos, 3 ) eq '[h]' ) ) { | |
$format_mode = 'date' unless $format_mode; | |
push @placeholders, [ '[h]', length($token), 3 ]; | |
$pos += 3; | |
} | |
elsif ( ( substr( $format, $pos, 4 ) eq '[mm]' ) ) { | |
$format_mode = 'date' unless $format_mode; | |
push @placeholders, [ '[mm]', length($token), 4 ]; | |
$pos += 4; | |
} | |
elsif ( $char eq '@' ) { | |
push @placeholders, [ '@', length($token), 1 ]; | |
$pos++; | |
} | |
elsif ( $char eq '*' ) { | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
} | |
else { | |
$pos++; | |
} | |
$pos++ if ( $pos == $start_pos ); #No Format match | |
$token .= substr( $format, $start_pos, $pos - $start_pos ); | |
} # End of parsing. | |
# Copy the located format string to a result string that we will perform | |
# the substitutions on and return to the user. | |
my $result = $token; | |
# Add a placeholder between the decimal/comma and end of the token, if any. | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start + 1 ), | |
$decimal_pos, $pos - $token_start + 1 | |
]; | |
} | |
# | |
# In the next sections we process date, number and text formats. We take a | |
# format such as yyyy/mm/dd and replace it with something like 2008/12/25. | |
# | |
if ( ( $format_mode eq 'date' ) && ( $number =~ $qrNUMBER ) ) { | |
# The maximum allowable date in Excel is 9999-12-31T23:59:59.000 which | |
# equates to 2958465.999+ in the 1900 epoch and 2957003.999+ in the | |
# 1904 epoch. We use 0 as the minimum in both epochs. The 1904 system | |
# actually supports negative numbers but that isn't worth the effort. | |
my $min_date = 0; | |
my $max_date = 2958466; | |
$max_date = 2957004 if $is_1904; | |
if ( $number < $min_date || $number >= $max_date ) { | |
return $number; # Return unformatted number. | |
} | |
# Process date formats. | |
my @time = ExcelLocaltime( $number, $is_1904 ); | |
# 0 1 2 3 4 5 6 7 | |
my ( $sec, $min, $hour, $day, $month, $year, $wday, $msec ) = @time; | |
$month++; # localtime() zero indexed month. | |
$year += 1900; # localtime() year. | |
my @full_month_name = qw( | |
None January February March April May June July | |
August September October November December | |
); | |
my @short_month_name = qw( | |
None Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec | |
); | |
my @full_day_name = qw( | |
Sunday Monday Tuesday Wednesday Thursday Friday Saturday | |
); | |
my @short_day_name = qw( | |
Sun Mon Tue Wed Thu Fri Sat | |
); | |
# Replace the placeholders in the template such as yyyy mm dd with | |
# actual numbers or strings. | |
my $replacement; | |
for my $placeholder ( reverse @placeholders ) { | |
if ( $placeholder->[-1] eq 'minutes' ) { | |
# For this case 'm/mm' is minutes not months. | |
if ( $placeholder->[0] eq 'mm' ) { | |
$replacement = sprintf( "%02d", $min ); | |
} | |
else { | |
$replacement = sprintf( "%d", $min ); | |
} | |
} | |
elsif ( $placeholder->[0] eq 'yyyy' ) { | |
# 4 digit Year. 2000 -> 2000. | |
$replacement = sprintf( '%04d', $year ); | |
} | |
elsif ( $placeholder->[0] eq 'yy' ) { | |
# 2 digit Year. 2000 -> 00. | |
$replacement = sprintf( '%02d', $year % 100 ); | |
} | |
elsif ( $placeholder->[0] eq 'mmmmm' ) { | |
# First character of the month name. 1 -> J. | |
$replacement = substr( $short_month_name[$month], 0, 1 ); | |
} | |
elsif ( $placeholder->[0] eq 'mmmm' ) { | |
# Full month name. 1 -> January. | |
$replacement = $full_month_name[$month]; | |
} | |
elsif ( $placeholder->[0] eq 'mmm' ) { | |
# Short month name. 1 -> Jan. | |
$replacement = $short_month_name[$month]; | |
} | |
elsif ( $placeholder->[0] eq 'mm' ) { | |
# 2 digit month. 1 -> 01. | |
$replacement = sprintf( '%02d', $month ); | |
} | |
elsif ( $placeholder->[0] eq 'm' ) { | |
# 1 digit month. 1 -> 1. | |
$replacement = sprintf( '%d', $month ); | |
} | |
elsif ( $placeholder->[0] eq 'dddd' ) { | |
# Full day name. Wednesday (for example.) | |
$replacement = $full_day_name[$wday]; | |
} | |
elsif ( $placeholder->[0] eq 'ddd' ) { | |
# Short day name. Wed (for example.) | |
$replacement = $short_day_name[$wday]; | |
} | |
elsif ( $placeholder->[0] eq 'dd' ) { | |
# 2 digit day. 1 -> 01. | |
$replacement = sprintf( '%02d', $day ); | |
} | |
elsif ( $placeholder->[0] eq 'd' ) { | |
# 1 digit day. 1 -> 1. | |
$replacement = sprintf( '%d', $day ); | |
} | |
elsif ( $placeholder->[0] eq 'hh' ) { | |
# 2 digit hour. | |
if ($is_12_hour) { | |
my $hour_tmp = $hour % 12; | |
$hour_tmp = 12 if $hour % 12 == 0; | |
$replacement = sprintf( '%d', $hour_tmp ); | |
} | |
else { | |
$replacement = sprintf( '%02d', $hour ); | |
} | |
} | |
elsif ( $placeholder->[0] eq 'h' ) { | |
# 1 digit hour. | |
if ($is_12_hour) { | |
my $hour_tmp = $hour % 12; | |
$hour_tmp = 12 if $hour % 12 == 0; | |
$replacement = sprintf( '%2d', $hour_tmp ); | |
} | |
else { | |
$replacement = sprintf( '%d', $hour ); | |
} | |
} | |
elsif ( $placeholder->[0] eq 'ss' ) { | |
# 2 digit seconds. | |
$replacement = sprintf( '%02d', $sec ); | |
} | |
elsif ( $placeholder->[0] eq 's' ) { | |
# 1 digit seconds. | |
$replacement = sprintf( '%d', $sec ); | |
} | |
elsif ( $placeholder->[0] eq 'am/pm' ) { | |
# AM/PM. | |
$replacement = ( $hour >= 12 ) ? 'PM' : 'AM'; | |
} | |
elsif ( $placeholder->[0] eq 'a/p' ) { | |
# AM/PM. | |
$replacement = ( $hour >= 12 ) ? 'P' : 'A'; | |
} | |
elsif ( $placeholder->[0] eq '.' ) { | |
# Decimal point for seconds. | |
$replacement = '.'; | |
} | |
elsif ( $placeholder->[0] =~ /(^0+$)/ ) { | |
# Milliseconds. For example h:ss.000. | |
my $length = length($1); | |
$replacement = | |
substr( sprintf( "%.${length}f", $msec / 1000 ), 2, $length ); | |
} | |
elsif ( $placeholder->[0] eq '[h]' ) { | |
# Hours modulus 24. 25 displays as 25 not as 1. | |
$replacement = sprintf( '%d', int($number) * 24 + $hour ); | |
} | |
elsif ( $placeholder->[0] eq '[mm]' ) { | |
# Mins modulus 60. 72 displays as 72 not as 12. | |
$replacement = | |
sprintf( '%d', ( int($number) * 24 + $hour ) * 60 + $min ); | |
} | |
elsif ( $placeholder->[0] eq 'ge' ) { | |
require Spreadsheet::ParseExcel::FmtJapan; | |
# Japanese Nengo (aka Gengo) in initialism (abbr. name) | |
$replacement = | |
Spreadsheet::ParseExcel::FmtJapan::CnvNengo( abbr_name => @time ); | |
} | |
elsif ( $placeholder->[0] eq 'ggge' ) { | |
require Spreadsheet::ParseExcel::FmtJapan; | |
# Japanese Nengo (aka Gengo) in Kanji (full name) | |
$replacement = | |
Spreadsheet::ParseExcel::FmtJapan::CnvNengo( name => @time ); | |
} | |
elsif ( $placeholder->[0] eq '@' ) { | |
# Text format. | |
$replacement = $number; | |
} | |
elsif ( $placeholder->[0] eq ',' ) { | |
next; | |
} | |
# Substitute the replacement string back into the template. | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
$replacement ); | |
} | |
} | |
elsif ( ( $format_mode eq 'number' ) && ( $number =~ $qrNUMBER ) ) { | |
# Process non date formats. | |
if (@placeholders) { | |
while ( $placeholders[-1]->[0] eq ',' ) { | |
$comma_count--; | |
substr( | |
$result, | |
$placeholders[-1]->[1], | |
$placeholders[-1]->[2], '' | |
); | |
$number /= 1000; | |
pop @placeholders; | |
} | |
my $number_format = join( '', map { $_->[0] } @placeholders ); | |
my $number_result; | |
my $str_length = 0; | |
my $engineering = 0; | |
my $is_decimal = 0; | |
my $is_integer = 0; | |
my $after_decimal = undef; | |
for my $token ( split //, $number_format ) { | |
if ( $token eq '.' ) { | |
$str_length++; | |
$is_decimal = 1; | |
} | |
elsif ( ( $token eq 'E' ) || ( $token eq 'e' ) ) { | |
$engineering = 1; | |
} | |
elsif ( $token eq '0' ) { | |
$str_length++; | |
$after_decimal++ if $is_decimal; | |
$is_integer = 1; | |
} | |
elsif ( $token eq '#' ) { | |
$after_decimal++ if $is_decimal; | |
$is_integer = 1; | |
} | |
elsif ( $token eq '?' ) { | |
$after_decimal++ if $is_decimal; | |
} | |
} | |
$number *= 100.0 if $is_percent; | |
my $data = ($is_currency) ? abs($number) : $number + 0; | |
if ($is_fraction) { | |
$number_result = sprintf( "%0${str_length}d", int($data) ); | |
} | |
else { | |
if ($is_decimal) { | |
if ( defined $after_decimal ) { | |
$number_result = | |
sprintf "%0${str_length}.${after_decimal}f", $data; | |
} | |
else { | |
$number_result = sprintf "%0${str_length}f", $data; | |
} | |
# Fix for Perl and sprintf not rounding up like Excel. | |
# http://rt.cpan.org/Public/Bug/Display.html?id=45626 | |
if ( $data =~ /^${number_result}5/ ) { | |
$number_result = | |
sprintf "%0${str_length}.${after_decimal}f", | |
$data . '1'; | |
} | |
} | |
else { | |
$number_result = sprintf( "%0${str_length}.0f", $data ); | |
} | |
} | |
$number_result = AddComma($number_result) if $comma_count > 0; | |
my $number_length = length($number_result); | |
my $decimal_pos = -1; | |
my $replacement; | |
for ( my $i = @placeholders - 1 ; $i >= 0 ; $i-- ) { | |
my $placeholder = $placeholders[$i]; | |
if ( $placeholder->[0] =~ | |
/([#0]*)([\.]?)([0#]*)([eE])([\+\-])([0#]+)/ ) | |
{ | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
MakeE( $placeholder->[0], $number ) ); | |
} | |
elsif ( $placeholder->[0] =~ /\// ) { | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
MakeFraction( $placeholder->[0], $number, $is_integer ) | |
); | |
} | |
elsif ( $placeholder->[0] eq '.' ) { | |
$number_length--; | |
$decimal_pos = $number_length; | |
} | |
elsif ( $placeholder->[0] eq '+' ) { | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
( $number > 0 ) | |
? '+' | |
: ( ( $number == 0 ) ? '+' : '-' ) ); | |
} | |
elsif ( $placeholder->[0] eq '-' ) { | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
( $number > 0 ) | |
? '' | |
: ( ( $number == 0 ) ? '' : '-' ) ); | |
} | |
elsif ( $placeholder->[0] eq '@' ) { | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
$number ); | |
} | |
elsif ( $placeholder->[0] eq '*' ) { | |
substr( $result, $placeholder->[1], $placeholder->[2], '' ); | |
} | |
elsif (( $placeholder->[0] eq "\xA2\xA4" ) | |
or ( $placeholder->[0] eq "\xA2\xA5" ) | |
or ( $placeholder->[0] eq "\x81\xA2" ) | |
or ( $placeholder->[0] eq "\x81\xA3" ) ) | |
{ | |
substr( | |
$result, $placeholder->[1], | |
$placeholder->[2], $placeholder->[0] | |
); | |
} | |
elsif (( $placeholder->[0] eq '(' ) | |
or ( $placeholder->[0] eq ')' ) ) | |
{ | |
substr( | |
$result, $placeholder->[1], | |
$placeholder->[2], $placeholder->[0] | |
); | |
} | |
else { | |
if ( $number_length > 0 ) { | |
if ( $i <= 0 ) { | |
$replacement = | |
substr( $number_result, 0, $number_length ); | |
$number_length = 0; | |
} | |
else { | |
my $real_part_length = length( $placeholder->[0] ); | |
if ( $decimal_pos >= 0 ) { | |
my $format = $placeholder->[0]; | |
$format =~ s/^#+//; | |
$real_part_length = length $format; | |
$real_part_length = | |
( $number_length <= $real_part_length ) | |
? $number_length | |
: $real_part_length; | |
} | |
else { | |
$real_part_length = | |
( $number_length <= $real_part_length ) | |
? $number_length | |
: $real_part_length; | |
} | |
$replacement = | |
substr( $number_result, | |
$number_length - $real_part_length, | |
$real_part_length ); | |
$number_length -= $real_part_length; | |
} | |
} | |
else { | |
$replacement = ''; | |
} | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
"\x00" . $replacement ); | |
} | |
} | |
$replacement = | |
( $number_length > 0 ) | |
? substr( $number_result, 0, $number_length ) | |
: ''; | |
$result =~ s/\x00/$replacement/; | |
$result =~ s/\x00//g; | |
} | |
} | |
else { | |
# Process text formats | |
my $is_text = 0; | |
for ( my $i = @placeholders - 1 ; $i >= 0 ; $i-- ) { | |
my $placeholder = $placeholders[$i]; | |
if ( $placeholder->[0] eq '@' ) { | |
substr( $result, $placeholder->[1], $placeholder->[2], | |
$number ); | |
$is_text++; | |
} | |
else { | |
substr( $result, $placeholder->[1], $placeholder->[2], '' ); | |
} | |
} | |
$result = $number unless $is_text; | |
} # End of placeholder substitutions. | |
# Trim the leading and trailing whitespace from the results. | |
$result =~ s/^\s+//; | |
$result =~ s/\s+$//; | |
# Fix for negative currency. | |
$result =~ s/^\$\-/\-\$/; | |
$result =~ s/^\$ \-/\-\$ /; | |
# Return color and locale strings if required. | |
if ($want_subformats) { | |
return ( $result, $color, $locale ); | |
} | |
else { | |
return $result; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# AddComma (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
sub AddComma { | |
my ($sNum) = @_; | |
if ( $sNum =~ /^([^\d]*)(\d\d\d\d+)(\.*.*)$/ ) { | |
my ( $sPre, $sObj, $sAft ) = ( $1, $2, $3 ); | |
for ( my $i = length($sObj) - 3 ; $i > 0 ; $i -= 3 ) { | |
substr( $sObj, $i, 0, ',' ); | |
} | |
return $sPre . $sObj . $sAft; | |
} | |
else { | |
return $sNum; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# MakeFraction (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
sub MakeFraction { | |
my ( $sFmt, $iData, $iFlg ) = @_; | |
my $iBunbo; | |
my $iShou; | |
#1. Init | |
# print "FLG: $iFlg\n"; | |
if ($iFlg) { | |
$iShou = $iData - int($iData); | |
return '' if ( $iShou == 0 ); | |
} | |
else { | |
$iShou = $iData; | |
} | |
$iShou = abs($iShou); | |
my $sSWk; | |
#2.Calc BUNBO | |
#2.1 BUNBO defined | |
if ( $sFmt =~ /\/(\d+)$/ ) { | |
$iBunbo = $1; | |
return sprintf( "%d/%d", $iShou * $iBunbo, $iBunbo ); | |
} | |
else { | |
#2.2 Calc BUNBO | |
$sFmt =~ /\/(\?+)$/; | |
my $iKeta = length($1); | |
my $iSWk = 1; | |
my $sSWk = ''; | |
my $iBunsi; | |
for ( my $iBunbo = 2 ; $iBunbo < 10**$iKeta ; $iBunbo++ ) { | |
$iBunsi = int( $iShou * $iBunbo + 0.5 ); | |
my $iCmp = abs( $iShou - ( $iBunsi / $iBunbo ) ); | |
if ( $iCmp < $iSWk ) { | |
$iSWk = $iCmp; | |
$sSWk = sprintf( "%d/%d", $iBunsi, $iBunbo ); | |
last if ( $iSWk == 0 ); | |
} | |
} | |
return $sSWk; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# MakeE (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
sub MakeE { | |
my ( $sFmt, $iData ) = @_; | |
$sFmt =~ /(([#0]*)[\.]?[#0]*)([eE])([\+\-][0#]+)/; | |
my ( $sKari, $iKeta, $sE, $sSisu ) = ( $1, length($2), $3, $4 ); | |
$iKeta = 1 if ( $iKeta <= 0 ); | |
my $iLog10 = 0; | |
$iLog10 = ( $iData == 0 ) ? 0 : ( log( abs($iData) ) / log(10) ); | |
$iLog10 = ( | |
int( $iLog10 / $iKeta ) + | |
( ( ( $iLog10 - int( $iLog10 / $iKeta ) ) < 0 ) ? -1 : 0 ) ) * $iKeta; | |
my $sUe = ExcelFmt( $sKari, $iData * ( 10**( $iLog10 * -1 ) ), 0 ); | |
my $sShita = ExcelFmt( $sSisu, $iLog10, 0 ); | |
return $sUe . $sE . $sShita; | |
} | |
#------------------------------------------------------------------------------ | |
# LeapYear (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
sub LeapYear { | |
my ($iYear) = @_; | |
return 1 if ( $iYear == 1900 ); #Special for Excel | |
return ( ( ( $iYear % 4 ) == 0 ) | |
&& ( ( $iYear % 100 ) || ( $iYear % 400 ) == 0 ) ) | |
? 1 | |
: 0; | |
} | |
#------------------------------------------------------------------------------ | |
# LocaltimeExcel (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
sub LocaltimeExcel { | |
my ( $iSec, $iMin, $iHour, $iDay, $iMon, $iYear, $iwDay, $iMSec, $flg1904 ) | |
= @_; | |
#0. Init | |
$iMon++; | |
$iYear += 1900; | |
#1. Calc Time | |
my $iTime; | |
$iTime = $iHour; | |
$iTime *= 60; | |
$iTime += $iMin; | |
$iTime *= 60; | |
$iTime += $iSec; | |
$iTime += $iMSec / 1000.0 if ( defined($iMSec) ); | |
$iTime /= 86400.0; #3600*24(1day in seconds) | |
my $iY; | |
my $iYDays; | |
#2. Calc Days | |
if ($flg1904) { | |
$iY = 1904; | |
$iTime--; #Start from Jan 1st | |
$iYDays = 366; | |
} | |
else { | |
$iY = 1900; | |
$iYDays = 366; #In Excel 1900 is leap year (That's not TRUE!) | |
} | |
while ( $iY < $iYear ) { | |
$iTime += $iYDays; | |
$iY++; | |
$iYDays = ( LeapYear($iY) ) ? 366 : 365; | |
} | |
for ( my $iM = 1 ; $iM < $iMon ; $iM++ ) { | |
if ( $iM == 1 | |
|| $iM == 3 | |
|| $iM == 5 | |
|| $iM == 7 | |
|| $iM == 8 | |
|| $iM == 10 | |
|| $iM == 12 ) | |
{ | |
$iTime += 31; | |
} | |
elsif ( $iM == 4 || $iM == 6 || $iM == 9 || $iM == 11 ) { | |
$iTime += 30; | |
} | |
elsif ( $iM == 2 ) { | |
$iTime += ( LeapYear($iYear) ) ? 29 : 28; | |
} | |
} | |
$iTime += $iDay; | |
return $iTime; | |
} | |
my @month_days = qw( | |
0 31 28 31 30 31 30 31 31 30 31 30 31 | |
); | |
#------------------------------------------------------------------------------ | |
# ExcelLocaltime (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
sub ExcelLocaltime { | |
my ( $dObj, $flg1904 ) = @_; | |
my ( $iSec, $iMin, $iHour, $iDay, $iMon, $iYear, $iwDay, $iMSec ); | |
my ( $iDt, $iTime, $iYDays, $iMD ); | |
$iDt = int($dObj); | |
$iTime = $dObj - $iDt; | |
#1. Calc Days | |
if ($flg1904) { | |
$iYear = 1904; | |
$iDt++; #Start from Jan 1st | |
$iYDays = 366; | |
$iwDay = ( ( $iDt + 4 ) % 7 ); | |
} | |
else { | |
$iYear = 1900; | |
$iYDays = 366; #In Excel 1900 is leap year (That's not TRUE!) | |
$iwDay = ( ( $iDt + 6 ) % 7 ); | |
} | |
while ( $iDt > $iYDays ) { | |
$iDt -= $iYDays; | |
$iYear++; | |
$iYDays = | |
( ( ( $iYear % 4 ) == 0 ) | |
&& ( ( $iYear % 100 ) || ( $iYear % 400 ) == 0 ) ) ? 366 : 365; | |
} | |
$iYear -= 1900; # Localtime year is relative to 1900. | |
for ( $iMon = 1 ; $iMon <= 12 ; $iMon++ ) { | |
$iMD = $month_days[$iMon]; | |
$iMD++ if $iMon == 2 and $iYear % 4 == 0; | |
last if ( $iDt <= $iMD ); | |
$iDt -= $iMD; | |
} | |
#2. Calc Time | |
$iDay = $iDt; | |
$iTime += ( 0.0005 / 86400.0 ); | |
if ($iTime >= 1.0) | |
{ | |
$iTime -= int($iTime); | |
$iwDay = ($iwDay == 6) ? 0 : $iwDay + 1; | |
if ($iDay == $iMD) | |
{ | |
if ($iMon == 12) | |
{ | |
$iMon = 1; | |
$iYear++; | |
} | |
else | |
{ | |
$iMon++; | |
} | |
$iDay = 1; | |
} | |
else | |
{ | |
$iDay++; | |
} | |
} | |
# Localtime month is 0 based. | |
$iMon -= 1; | |
$iTime *= 24.0; | |
$iHour = int($iTime); | |
$iTime -= $iHour; | |
$iTime *= 60.0; | |
$iMin = int($iTime); | |
$iTime -= $iMin; | |
$iTime *= 60.0; | |
$iSec = int($iTime); | |
$iTime -= $iSec; | |
$iTime *= 1000.0; | |
$iMSec = int($iTime); | |
return ( $iSec, $iMin, $iHour, $iDay, $iMon, $iYear, $iwDay, $iMSec ); | |
} | |
# ----------------------------------------------------------------------------- | |
# col2int (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
# converts a excel row letter into an int for use in an array | |
sub col2int { | |
my $result = 0; | |
my $str = shift; | |
my $incr = 0; | |
for ( my $i = length($str) ; $i > 0 ; $i-- ) { | |
my $char = substr( $str, $i - 1 ); | |
my $curr += ord( lc($char) ) - ord('a') + 1; | |
$curr *= $incr if ($incr); | |
$result += $curr; | |
$incr += 26; | |
} | |
# this is one out as we range 0..x-1 not 1..x | |
$result--; | |
return $result; | |
} | |
# ----------------------------------------------------------------------------- | |
# int2col (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
### int2col | |
# convert a column number into column letters | |
# @note this is quite a brute force coarse method | |
# does not manage values over 701 (ZZ) | |
# @arg number, to convert | |
# @returns string, column name | |
# | |
sub int2col { | |
my $out = ""; | |
my $val = shift; | |
do { | |
$out .= chr( ( $val % 26 ) + ord('A') ); | |
$val = int( $val / 26 ) - 1; | |
} while ( $val >= 0 ); | |
return scalar reverse $out; | |
} | |
# ----------------------------------------------------------------------------- | |
# sheetRef (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
# ----------------------------------------------------------------------------- | |
### sheetRef | |
# convert an excel letter-number address into a useful array address | |
# @note that also Excel uses X-Y notation, we normally use Y-X in arrays | |
# @args $str, excel coord eg. A2 | |
# @returns an array - 2 elements - column, row, or undefined | |
# | |
sub sheetRef { | |
my $str = shift; | |
my @ret; | |
$str =~ m/^(\D+)(\d+)$/; | |
if ( $1 && $2 ) { | |
push( @ret, $2 - 1, col2int($1) ); | |
} | |
if ( $ret[0] < 0 ) { | |
undef @ret; | |
} | |
return @ret; | |
} | |
# ----------------------------------------------------------------------------- | |
# xls2csv (for Spreadsheet::ParseExcel::Utility) | |
#------------------------------------------------------------------------------ | |
### xls2csv | |
# convert a chunk of an excel file into csv text chunk | |
# @args $param, sheet-colrow:colrow (1-A1:B2 or A1:B2 for sheet 1 | |
# @args $rotate, 0 or 1 decides if output should be rotated or not | |
# @returns string containing a chunk of csv | |
# | |
sub xls2csv { | |
my ( $filename, $regions, $rotate ) = @_; | |
my $sheet = 0; | |
# We need Text::CSV_XS for proper CSV handling. | |
require Text::CSV_XS; | |
# extract any sheet number from the region string | |
$regions =~ m/^(\d+)-(.*)/; | |
if ($2) { | |
$sheet = $1 - 1; | |
$regions = $2; | |
} | |
# now extract the start and end regions | |
$regions =~ m/(.*):(.*)/; | |
if ( !$1 || !$2 ) { | |
print STDERR "Bad Params"; | |
return ""; | |
} | |
my @start = sheetRef($1); | |
my @end = sheetRef($2); | |
if ( !@start ) { | |
print STDERR "Bad coorinates - $1"; | |
return ""; | |
} | |
if ( !@end ) { | |
print STDERR "Bad coorinates - $2"; | |
return ""; | |
} | |
if ( $start[1] > $end[1] ) { | |
print STDERR "Bad COLUMN ordering\n"; | |
print STDERR "Start column " . int2col( $start[1] ); | |
print STDERR " after end column " . int2col( $end[1] ) . "\n"; | |
return ""; | |
} | |
if ( $start[0] > $end[0] ) { | |
print STDERR "Bad ROW ordering\n"; | |
print STDERR "Start row " . ( $start[0] + 1 ); | |
print STDERR " after end row " . ( $end[0] + 1 ) . "\n"; | |
exit; | |
} | |
# start the excel object now | |
my $oExcel = new Spreadsheet::ParseExcel; | |
my $oBook = $oExcel->Parse($filename); | |
# open the sheet | |
my $oWkS = $oBook->{Worksheet}[$sheet]; | |
# now check that the region exists in the file | |
# if not truncate to the possible region | |
# output a warning msg | |
if ( $start[1] < $oWkS->{MinCol} ) { | |
print STDERR int2col( $start[1] ) | |
. " < min col " | |
. int2col( $oWkS->{MinCol} ) | |
. " Resetting\n"; | |
$start[1] = $oWkS->{MinCol}; | |
} | |
if ( $end[1] > $oWkS->{MaxCol} ) { | |
print STDERR int2col( $end[1] ) | |
. " > max col " | |
. int2col( $oWkS->{MaxCol} ) | |
. " Resetting\n"; | |
$end[1] = $oWkS->{MaxCol}; | |
} | |
if ( $start[0] < $oWkS->{MinRow} ) { | |
print STDERR "" | |
. ( $start[0] + 1 ) | |
. " < min row " | |
. ( $oWkS->{MinRow} + 1 ) | |
. " Resetting\n"; | |
$start[0] = $oWkS->{MinCol}; | |
} | |
if ( $end[0] > $oWkS->{MaxRow} ) { | |
print STDERR "" | |
. ( $end[0] + 1 ) | |
. " > max row " | |
. ( $oWkS->{MaxRow} + 1 ) | |
. " Resetting\n"; | |
$end[0] = $oWkS->{MaxRow}; | |
} | |
my $x1 = $start[1]; | |
my $y1 = $start[0]; | |
my $x2 = $end[1]; | |
my $y2 = $end[0]; | |
my @cell_data; | |
my $row = 0; | |
if ( !$rotate ) { | |
for ( my $y = $y1 ; $y <= $y2 ; $y++ ) { | |
for ( my $x = $x1 ; $x <= $x2 ; $x++ ) { | |
my $cell = $oWkS->{Cells}[$y][$x]; | |
my $value; | |
if ( defined $cell ) { | |
$value .= $cell->value(); | |
} | |
else { | |
$value = ''; | |
} | |
push @{ $cell_data[$row] }, $value; | |
} | |
$row++; | |
} | |
} | |
else { | |
for ( my $x = $x1 ; $x <= $x2 ; $x++ ) { | |
for ( my $y = $y1 ; $y <= $y2 ; $y++ ) { | |
my $cell = $oWkS->{Cells}[$y][$x]; | |
my $value; | |
if ( defined $cell ) { | |
$value .= $cell->value(); | |
} | |
else { | |
$value = ''; | |
} | |
push @{ $cell_data[$row] }, $value; | |
} | |
$row++; | |
} | |
} | |
# Create the CSV output string. | |
my $csv = Text::CSV_XS->new( { binary => 1, eol => $/ } ); | |
my $output = ""; | |
for my $row (@cell_data) { | |
$csv->combine(@$row); | |
$output .= $csv->string(); | |
} | |
return $output; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Utility - Utility functions for Spreadsheet::ParseExcel. | |
=head1 SYNOPSIS | |
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt ExcelLocaltime LocaltimeExcel); | |
# Convert localtime to Excel time | |
my $datetime = LocaltimeExcel(11, 10, 12, 23, 2, 64); # 1964-3-23 12:10:11 | |
print $datetime, "\n"; # 23459.5070717593 (Excel date/time format) | |
# Convert Excel Time to localtime | |
my @time = ExcelLocaltime($datetime); | |
print join(":", @time), "\n"; # 11:10:12:23:2:64:1:0 | |
# Formatting | |
print ExcelFmt('yyyy-mm-dd', $datetime), "\n"; # 1964-3-23 | |
print ExcelFmt('m-d-yy', $datetime), "\n"; # 3-23-64 | |
print ExcelFmt('#,##0', $datetime), "\n"; # 23,460 | |
print ExcelFmt('#,##0.00', $datetime), "\n"; # 23,459.51 | |
=head1 DESCRIPTION | |
The C<Spreadsheet::ParseExcel::Utility> module provides utility functions for working with ParseExcel and Excel data. | |
=head1 Functions | |
C<Spreadsheet::ParseExcel::Utility> can export the following functions: | |
ExcelFmt | |
ExcelLocaltime | |
LocaltimeExcel | |
col2int | |
int2col | |
sheetRef | |
xls2csv | |
These functions must be imported implicitly: | |
# Just one function. | |
use Spreadsheet::ParseExcel::Utility 'col2int'; | |
# More than one. | |
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt ExcelLocaltime LocaltimeExcel); | |
=head2 ExcelFmt($format_string, $number, $is_1904) | |
Excel stores data such as dates and currency values as numbers. The way these numbers are displayed is controlled by the number format string for the cell. For example a cell with a number format of C<'$#,##0.00'> for currency and a value of 1234.567 would be displayed as follows: | |
'$#,##0.00' + 1234.567 = '$1,234.57'. | |
The C<ExcelFmt()> function tries to emulate this formatting so that the user can convert raw numbers returned by C<Spreadsheet::ParseExel> to a desired format. For example: | |
print ExcelFmt('$#,##0.00', 1234.567); # $1,234.57. | |
The syntax of the function is: | |
my $text = ExcelFmt($format_string, $number, $is_1904); | |
Where C<$format_string> is an Excel number format string, C<$number> is a real or integer number and C<is_1904> is an optional flag to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch. | |
C<ExcelFmt()> is also used internally to convert numbers returned by the C<Cell::unformatted()> method to the formatted value returned by the C<Cell::value()> method: | |
my $cell = $worksheet->get_cell( 0, 0 ); | |
print $cell->unformatted(), "\n"; # 1234.567 | |
print $cell->value(), "\n"; # $1,234.57 | |
The most common usage for C<ExcelFmt> is to convert numbers to dates. Dates and times in Excel are represented by real numbers, for example "1 Jan 2001 12:30 PM" is represented by the number 36892.521. The integer part of the number stores the number of days since the epoch and the fractional part stores the percentage of the day. By applying an Excel number format the number is converted to the desired string representation: | |
print ExcelFmt('d mmm yyyy h:mm AM/PM', 36892.521); # 1 Jan 2001 12:30 PM | |
C<$is_1904> is an optional flag to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch. Excel for Windows generally uses 1900 and Excel for Mac OS uses 1904. The C<$is1904> flag isn't required very often by a casual user and can usually be ignored. | |
=head2 ExcelLocaltime($excel_datetime, $is_1904) | |
The C<ExcelLocaltime()> function converts from an Excel date/time number to a C<localtime()>-like array of values: | |
my @time = ExcelLocaltime($excel_datetime); | |
# 0 1 2 3 4 5 6 7 | |
my ( $sec, $min, $hour, $day, $month, $year, $wday, $msec ) = @time; | |
The array elements from C<(0 .. 6)> are the same as Perl's C<localtime()>. The last element C<$msec> is milliseconds. In particular it should be noted that, in common with C<localtime()>, the month is zero indexed and the year is the number of years since 1900. This means that you will usually need to do the following: | |
$month++; | |
$year += 1900; | |
See also Perl's documentation for L<localtime()|perlfunc>: | |
The C<$is_1904> flag is an optional. It is used to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch. | |
=head2 LocaltimeExcel($sec, $min, $hour, $day, $month, $year, $wday, $msec, $is_1904) | |
The C<LocaltimeExcel()> function converts from a C<localtime()>-like array of values to an Excel date/time number: | |
$excel_datetime = LocaltimeExcel($sec, $min, $hour, $day, $month, $year, $wday, $msec); | |
The array elements from C<(0 .. 6)> are the same as Perl's C<localtime()>. The last element C<$msec> is milliseconds. In particular it should be noted that, in common with C<localtime()>, the month is zero indexed and the year is the number of years since 1900. See also Perl's documentation for L<localtime()|perlfunc>: | |
The C<$wday> and C<$msec> elements are usually optional. This time elements can also be zeroed if they aren't of interest: | |
# sec, min, hour, day, month, year | |
$excel_datetime = LocaltimeExcel( 0, 0, 0, 1, 0, 101 ); | |
print ExcelFmt('d mmm yyyy', $excel_datetime); # 1 Jan 2001 | |
The C<$is_1904> flag is also optional. It is used to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch. | |
=head2 col2int($column) | |
The C<col2int()> function converts an Excel column letter to an zero-indexed column number: | |
print col2int('A'); # 0 | |
print col2int('AA'); # 26 | |
This function was contributed by Kevin Mulholland. | |
=head2 int2col($column_number) | |
The C<int2col()> function converts an zero-indexed Excel column number to a column letter: | |
print int2col(0); # 'A' | |
print int2col(26); # 'AA' | |
This function was contributed by Kevin Mulholland. | |
=head2 sheetRef($cell_string) | |
The C<sheetRef()> function converts an Excel cell reference in 'A1' notation to a zero-indexed C<(row, col)> pair. | |
my ($row, $col) = sheetRef('A1'); # ( 0, 0 ) | |
my ($row, $col) = sheetRef('C2'); # ( 1, 2 ) | |
This function was contributed by Kevin Mulholland. | |
=head2 xls2csv($filename, $region, $rotate) | |
The C<xls2csv()> function converts a section of an Excel file into a CSV text string. | |
$csv_text = xls2csv($filename, $region, $rotate); | |
Where: | |
$region = "sheet-colrow:colrow" | |
For example '1-A1:B2' means 'A1:B2' for sheet 1. | |
and | |
$rotate = 0 or 1 (output is rotated/transposed or not) | |
This function requires C<Text::CSV_XS> to be installed. It was contributed by Kevin Mulholland along with the C<xls2csv> script in the C<sample> directory of the distro. | |
See also the following xls2csv utilities: Ken Prows' C<xls2csv>: http://search.cpan.org/~ken/xls2csv/script/xls2csv and H.Merijn Brand's C<xls2csv> (which is part of Spreadsheet::Read): http://search.cpan.org/~hmbrand/Spreadsheet-Read/ | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_UTILITY | |
$fatpacked{"Spreadsheet/ParseExcel/Workbook.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_WORKBOOK'; | |
package Spreadsheet::ParseExcel::Workbook; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Workbook - A class for Workbooks. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
############################################################################### | |
# | |
# new() | |
# | |
# Constructor. | |
# | |
sub new { | |
my ($class) = @_; | |
my $self = {}; | |
bless $self, $class; | |
} | |
############################################################################### | |
sub color_idx_to_rgb { | |
my( $workbook, $iidx ) = @_; | |
my $palette = $workbook->{aColor}; | |
return ( ( defined $palette->[$iidx] ) ? $palette->[$iidx] : $palette->[0] ); | |
} | |
############################################################################### | |
# | |
# worksheet() | |
# | |
# This method returns a single Worksheet object using either its name or index. | |
# | |
sub worksheet { | |
my ( $oBook, $sName ) = @_; | |
my $oWkS; | |
foreach $oWkS ( @{ $oBook->{Worksheet} } ) { | |
return $oWkS if ( $oWkS->{Name} eq $sName ); | |
} | |
if ( $sName =~ /^\d+$/ ) { | |
return $oBook->{Worksheet}->[$sName]; | |
} | |
return undef; | |
} | |
############################################################################### | |
# | |
# worksheets() | |
# | |
# Returns an array of Worksheet objects. | |
# | |
sub worksheets { | |
my $self = shift; | |
return @{ $self->{Worksheet} }; | |
} | |
############################################################################### | |
# | |
# worksheet_count() | |
# | |
# Returns the number Woksheet objects in the Workbook. | |
# | |
sub worksheet_count { | |
my $self = shift; | |
return $self->{SheetCount}; | |
} | |
############################################################################### | |
# | |
# get_filename() | |
# | |
# Returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file. | |
# | |
sub get_filename { | |
my $self = shift; | |
return $self->{File}; | |
} | |
############################################################################### | |
# | |
# get_print_areas() | |
# | |
# Returns an array ref of print areas. | |
# | |
# TODO. This should really be a Worksheet method. | |
# | |
sub get_print_areas { | |
my $self = shift; | |
return $self->{PrintArea}; | |
} | |
############################################################################### | |
# | |
# get_print_titles() | |
# | |
# Returns an array ref of print title hash refs. | |
# | |
# TODO. This should really be a Worksheet method. | |
# | |
sub get_print_titles { | |
my $self = shift; | |
return $self->{PrintTitle}; | |
} | |
############################################################################### | |
# | |
# using_1904_date() | |
# | |
# Returns true if the Excel file is using the 1904 date epoch. | |
# | |
sub using_1904_date { | |
my $self = shift; | |
return $self->{Flg1904}; | |
} | |
############################################################################### | |
# | |
# ParseAbort() | |
# | |
# Todo | |
# | |
sub ParseAbort { | |
my ( $self, $val ) = @_; | |
$self->{_ParseAbort} = $val; | |
} | |
=head2 get_active_sheet() | |
Return the number of the active (open) worksheet (at the time the workbook | |
was saved. May return undef. | |
=cut | |
sub get_active_sheet { | |
my $workbook = shift; | |
return $workbook->{ActiveSheet}; | |
} | |
############################################################################### | |
# | |
# Parse(). Deprecated. | |
# | |
# Syntactic wrapper around Spreadsheet::ParseExcel::Parse(). | |
# This method is *deprecated* since it doesn't conform to the current | |
# error handling in the S::PE Parse() method. | |
# | |
sub Parse { | |
my ( $class, $source, $formatter ) = @_; | |
my $excel = Spreadsheet::ParseExcel->new(); | |
my $workbook = $excel->Parse( $source, $formatter ); | |
$workbook->{_Excel} = $excel; | |
return $workbook; | |
} | |
############################################################################### | |
# | |
# Mapping between legacy method names and new names. | |
# | |
{ | |
no warnings; # Ignore warnings about variables used only once. | |
*Worksheet = *worksheet; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Workbook - A class for Workbooks. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for L<Spreadsheet::ParseExcel>. | |
=head1 Methods | |
The following Workbook methods are available: | |
$workbook->worksheets() | |
$workbook->worksheet() | |
$workbook->worksheet_count() | |
$workbook->get_filename() | |
$workbook->get_print_areas() | |
$workbook->get_print_titles() | |
$workbook->using_1904_date() | |
=head2 worksheets() | |
The C<worksheets()> method returns an array of Worksheet objects. This was most commonly used to iterate over the worksheets in a workbook: | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
=head2 worksheet() | |
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
Returns C<undef> if the sheet name or index doesn't exist. | |
=head2 worksheet_count() | |
The C<worksheet_count()> method returns the number of Woksheet objects in the Workbook. | |
my $worksheet_count = $workbook->worksheet_count(); | |
=head2 get_filename() | |
The C<get_filename()> method returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file. | |
my $filename = $workbook->get_filename(); | |
=head2 get_print_areas() | |
The C<get_print_areas()> method returns an array ref of print areas. | |
my $print_areas = $workbook->get_print_areas(); | |
Each print area is as follows: | |
[ $start_row, $start_col, $end_row, $end_col ] | |
Returns undef if there are no print areas. | |
=head2 get_print_titles() | |
The C<get_print_titles()> method returns an array ref of print title hash refs. | |
my $print_titles = $workbook->get_print_titles(); | |
Each print title array ref is as follows: | |
{ | |
Row => [ $start_row, $end_row ], | |
Column => [ $start_col, $end_col ], | |
} | |
Returns undef if there are no print titles. | |
=head2 using_1904_date() | |
The C<using_1904_date()> method returns true if the Excel file is using the 1904 date epoch instead of the 1900 epoch. | |
my $using_1904_date = $workbook->using_1904_date(); | |
The Windows version of Excel generally uses the 1900 epoch while the Mac version of Excel generally uses the 1904 epoch. | |
Returns 0 if the 1900 epoch is in use. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_WORKBOOK | |
$fatpacked{"Spreadsheet/ParseExcel/Worksheet.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_WORKSHEET'; | |
package Spreadsheet::ParseExcel::Worksheet; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Worksheet - A class for Worksheets. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Scalar::Util qw(weaken); | |
our $VERSION = '0.65'; | |
############################################################################### | |
# | |
# new() | |
# | |
sub new { | |
my ( $class, %properties ) = @_; | |
my $self = \%properties; | |
weaken $self->{_Book}; | |
$self->{Cells} = undef; | |
$self->{DefColWidth} = 8.43; | |
return bless $self, $class; | |
} | |
############################################################################### | |
# | |
# get_cell( $row, $col ) | |
# | |
# Returns the Cell object at row $row and column $col, if defined. | |
# | |
sub get_cell { | |
my ( $self, $row, $col ) = @_; | |
if ( !defined $row | |
|| !defined $col | |
|| !defined $self->{MaxRow} | |
|| !defined $self->{MaxCol} ) | |
{ | |
# Return undef if no arguments are given or if no cells are defined. | |
return undef; | |
} | |
elsif ($row < $self->{MinRow} | |
|| $row > $self->{MaxRow} | |
|| $col < $self->{MinCol} | |
|| $col > $self->{MaxCol} ) | |
{ | |
# Return undef if outside allowable row/col range. | |
return undef; | |
} | |
else { | |
# Return the Cell object. | |
return $self->{Cells}->[$row]->[$col]; | |
} | |
} | |
############################################################################### | |
# | |
# row_range() | |
# | |
# Returns a two-element list ($min, $max) containing the minimum and maximum | |
# defined rows in the worksheet. | |
# | |
# If there is no row defined $max is smaller than $min. | |
# | |
sub row_range { | |
my $self = shift; | |
my $min = $self->{MinRow} || 0; | |
my $max = defined( $self->{MaxRow} ) ? $self->{MaxRow} : ( $min - 1 ); | |
return ( $min, $max ); | |
} | |
############################################################################### | |
# | |
# col_range() | |
# | |
# Returns a two-element list ($min, $max) containing the minimum and maximum | |
# defined cols in the worksheet. | |
# | |
# If there is no column defined $max is smaller than $min. | |
# | |
sub col_range { | |
my $self = shift; | |
my $min = $self->{MinCol} || 0; | |
my $max = defined( $self->{MaxCol} ) ? $self->{MaxCol} : ( $min - 1 ); | |
return ( $min, $max ); | |
} | |
############################################################################### | |
# | |
# get_name() | |
# | |
# Returns the name of the worksheet. | |
# | |
sub get_name { | |
my $self = shift; | |
return $self->{Name}; | |
} | |
############################################################################### | |
# | |
# sheet_num() | |
# | |
sub sheet_num { | |
my $self = shift; | |
return $self->{_SheetNo}; | |
} | |
############################################################################### | |
# | |
# get_h_pagebreaks() | |
# | |
# Returns an array ref of row numbers where a horizontal page break occurs. | |
# | |
sub get_h_pagebreaks { | |
my $self = shift; | |
return $self->{HPageBreak}; | |
} | |
############################################################################### | |
# | |
# get_v_pagebreaks() | |
# | |
# Returns an array ref of column numbers where a vertical page break occurs. | |
# | |
sub get_v_pagebreaks { | |
my $self = shift; | |
return $self->{VPageBreak}; | |
} | |
############################################################################### | |
# | |
# get_merged_areas() | |
# | |
# Returns an array ref of cells that are merged. | |
# | |
sub get_merged_areas { | |
my $self = shift; | |
return $self->{MergedArea}; | |
} | |
############################################################################### | |
# | |
# get_row_heights() | |
# | |
# Returns an array of row heights. | |
# | |
sub get_row_heights { | |
my $self = shift; | |
if ( wantarray() ) { | |
return unless $self->{RowHeight}; | |
return @{ $self->{RowHeight} }; | |
} | |
return $self->{RowHeight}; | |
} | |
############################################################################### | |
# | |
# get_col_widths() | |
# | |
# Returns an array of column widths. | |
# | |
sub get_col_widths { | |
my $self = shift; | |
if ( wantarray() ) { | |
return unless $self->{ColWidth}; | |
return @{ $self->{ColWidth} }; | |
} | |
return $self->{ColWidth}; | |
} | |
############################################################################### | |
# | |
# get_default_row_height() | |
# | |
# Returns the default row height for the worksheet. Generally 12.75. | |
# | |
sub get_default_row_height { | |
my $self = shift; | |
return $self->{DefRowHeight}; | |
} | |
############################################################################### | |
# | |
# get_default_col_width() | |
# | |
# Returns the default column width for the worksheet. Generally 8.43. | |
# | |
sub get_default_col_width { | |
my $self = shift; | |
return $self->{DefColWidth}; | |
} | |
############################################################################### | |
# | |
# _get_row_properties() | |
# | |
# Returns an array_ref of row properties. | |
# TODO. This is a placeholder for a future method. | |
# | |
sub _get_row_properties { | |
my $self = shift; | |
return $self->{RowProperties}; | |
} | |
############################################################################### | |
# | |
# _get_col_properties() | |
# | |
# Returns an array_ref of column properties. | |
# TODO. This is a placeholder for a future method. | |
# | |
sub _get_col_properties { | |
my $self = shift; | |
return $self->{ColProperties}; | |
} | |
############################################################################### | |
# | |
# get_header() | |
# | |
# Returns the worksheet header string. | |
# | |
sub get_header { | |
my $self = shift; | |
return $self->{Header}; | |
} | |
############################################################################### | |
# | |
# get_footer() | |
# | |
# Returns the worksheet footer string. | |
# | |
sub get_footer { | |
my $self = shift; | |
return $self->{Footer}; | |
} | |
############################################################################### | |
# | |
# get_margin_left() | |
# | |
# Returns the left margin of the worksheet in inches. | |
# | |
sub get_margin_left { | |
my $self = shift; | |
return $self->{LeftMargin}; | |
} | |
############################################################################### | |
# | |
# get_margin_right() | |
# | |
# Returns the right margin of the worksheet in inches. | |
# | |
sub get_margin_right { | |
my $self = shift; | |
return $self->{RightMargin}; | |
} | |
############################################################################### | |
# | |
# get_margin_top() | |
# | |
# Returns the top margin of the worksheet in inches. | |
# | |
sub get_margin_top { | |
my $self = shift; | |
return $self->{TopMargin}; | |
} | |
############################################################################### | |
# | |
# get_margin_bottom() | |
# | |
# Returns the bottom margin of the worksheet in inches. | |
# | |
sub get_margin_bottom { | |
my $self = shift; | |
return $self->{BottomMargin}; | |
} | |
############################################################################### | |
# | |
# get_margin_header() | |
# | |
# Returns the header margin of the worksheet in inches. | |
# | |
sub get_margin_header { | |
my $self = shift; | |
return $self->{HeaderMargin}; | |
} | |
############################################################################### | |
# | |
# get_margin_footer() | |
# | |
# Returns the footer margin of the worksheet in inches. | |
# | |
sub get_margin_footer { | |
my $self = shift; | |
return $self->{FooterMargin}; | |
} | |
############################################################################### | |
# | |
# get_paper() | |
# | |
# Returns the printer paper size. | |
# | |
sub get_paper { | |
my $self = shift; | |
return $self->{PaperSize}; | |
} | |
############################################################################### | |
# | |
# get_start_page() | |
# | |
# Returns the page number that printing will start from. | |
# | |
sub get_start_page { | |
my $self = shift; | |
# Only return the page number if the "First page number" option is set. | |
if ( $self->{UsePage} ) { | |
return $self->{PageStart}; | |
} | |
else { | |
return 0; | |
} | |
} | |
############################################################################### | |
# | |
# get_print_order() | |
# | |
# Returns the Worksheet page printing order. | |
# | |
sub get_print_order { | |
my $self = shift; | |
return $self->{LeftToRight}; | |
} | |
############################################################################### | |
# | |
# get_print_scale() | |
# | |
# Returns the workbook scale for printing. | |
# | |
sub get_print_scale { | |
my $self = shift; | |
return $self->{Scale}; | |
} | |
############################################################################### | |
# | |
# get_fit_to_pages() | |
# | |
# Returns the number of pages wide and high that the printed worksheet page | |
# will fit to. | |
# | |
sub get_fit_to_pages { | |
my $self = shift; | |
if ( !$self->{PageFit} ) { | |
return ( 0, 0 ); | |
} | |
else { | |
return ( $self->{FitWidth}, $self->{FitHeight} ); | |
} | |
} | |
############################################################################### | |
# | |
# is_portrait() | |
# | |
# Returns true if the worksheet has been set for printing in portrait mode. | |
# | |
sub is_portrait { | |
my $self = shift; | |
return $self->{Landscape}; | |
} | |
############################################################################### | |
# | |
# is_centered_horizontally() | |
# | |
# Returns true if the worksheet has been centered horizontally for printing. | |
# | |
sub is_centered_horizontally { | |
my $self = shift; | |
return $self->{HCenter}; | |
} | |
############################################################################### | |
# | |
# is_centered_vertically() | |
# | |
# Returns true if the worksheet has been centered vertically for printing. | |
# | |
sub is_centered_vertically { | |
my $self = shift; | |
return $self->{HCenter}; | |
} | |
############################################################################### | |
# | |
# is_print_gridlines() | |
# | |
# Returns true if the worksheet print "gridlines" option is turned on. | |
# | |
sub is_print_gridlines { | |
my $self = shift; | |
return $self->{PrintGrid}; | |
} | |
############################################################################### | |
# | |
# is_print_row_col_headers() | |
# | |
# Returns true if the worksheet print "row and column headings" option is on. | |
# | |
sub is_print_row_col_headers { | |
my $self = shift; | |
return $self->{PrintHeaders}; | |
} | |
############################################################################### | |
# | |
# is_print_black_and_white() | |
# | |
# Returns true if the worksheet print "black and white" option is turned on. | |
# | |
sub is_print_black_and_white { | |
my $self = shift; | |
return $self->{NoColor}; | |
} | |
############################################################################### | |
# | |
# is_print_draft() | |
# | |
# Returns true if the worksheet print "draft" option is turned on. | |
# | |
sub is_print_draft { | |
my $self = shift; | |
return $self->{Draft}; | |
} | |
############################################################################### | |
# | |
# is_print_comments() | |
# | |
# Returns true if the worksheet print "comments" option is turned on. | |
# | |
sub is_print_comments { | |
my $self = shift; | |
return $self->{Notes}; | |
} | |
=head2 get_tab_color() | |
Return color index of tab, or undef if not set. | |
=cut | |
sub get_tab_color { | |
my $worksheet = shift; | |
return $worksheet->{TabColor}; | |
} | |
=head2 is_sheet_hidden() | |
Return true if sheet is hidden | |
=cut | |
sub is_sheet_hidden { | |
my $worksheet = shift; | |
return $worksheet->{SheetHidden}; | |
} | |
=head2 is_row_hidden($row) | |
In scalar context, return true if $row is hidden | |
In array context, return an array whose elements are true | |
if the corresponding row is hidden. | |
=cut | |
sub is_row_hidden { | |
my $worksheet = shift; | |
my ($row) = @_; | |
unless ( $worksheet->{RowHidden} ) { | |
return () if (wantarray); | |
return 0; | |
} | |
return @{ $worksheet->{RowHidden} } if (wantarray); | |
return $worksheet->{RowHidden}[$row]; | |
} | |
=head2 is_col_hidden($col) | |
In scalar context, return true if $col is hidden | |
In array context, return an array whose elements are true | |
if the corresponding column is hidden. | |
=cut | |
sub is_col_hidden { | |
my $worksheet = shift; | |
my ($col) = @_; | |
unless ( $worksheet->{ColHidden} ) { | |
return () if (wantarray); | |
return 0; | |
} | |
return @{ $worksheet->{ColHidden} } if (wantarray); | |
return $worksheet->{ColHidden}[$col]; | |
} | |
############################################################################### | |
# | |
# Mapping between legacy method names and new names. | |
# | |
{ | |
no warnings; # Ignore warnings about variables used only once. | |
*sheetNo = *sheet_num; | |
*Cell = *get_cell; | |
*RowRange = *row_range; | |
*ColRange = *col_range; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Worksheet - A class for Worksheets. | |
=head1 SYNOPSIS | |
See the documentation for L<Spreadsheet::ParseExcel>. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 Methods | |
The C<Spreadsheet::ParseExcel::Worksheet> class encapsulates the properties of an Excel worksheet. It has the following methods: | |
$worksheet->get_cell() | |
$worksheet->row_range() | |
$worksheet->col_range() | |
$worksheet->get_name() | |
$worksheet->get_h_pagebreaks() | |
$worksheet->get_v_pagebreaks() | |
$worksheet->get_merged_areas() | |
$worksheet->get_row_heights() | |
$worksheet->get_col_widths() | |
$worksheet->get_default_row_height() | |
$worksheet->get_default_col_width() | |
$worksheet->get_header() | |
$worksheet->get_footer() | |
$worksheet->get_margin_left() | |
$worksheet->get_margin_right() | |
$worksheet->get_margin_top() | |
$worksheet->get_margin_bottom() | |
$worksheet->get_margin_header() | |
$worksheet->get_margin_footer() | |
$worksheet->get_paper() | |
$worksheet->get_start_page() | |
$worksheet->get_print_order() | |
$worksheet->get_print_scale() | |
$worksheet->get_fit_to_pages() | |
$worksheet->is_portrait() | |
$worksheet->is_centered_horizontally() | |
$worksheet->is_centered_vertically() | |
$worksheet->is_print_gridlines() | |
$worksheet->is_print_row_col_headers() | |
$worksheet->is_print_black_and_white() | |
$worksheet->is_print_draft() | |
$worksheet->is_print_comments() | |
=head2 get_cell($row, $col) | |
Return the L</Cell> object at row C<$row> and column C<$col> if it is defined. Otherwise returns undef. | |
my $cell = $worksheet->get_cell($row, $col); | |
=head2 row_range() | |
Returns a two-element list C<($min, $max)> containing the minimum and maximum defined rows in the worksheet. If there is no row defined C<$max> is smaller than C<$min>. | |
my ( $row_min, $row_max ) = $worksheet->row_range(); | |
=head2 col_range() | |
Returns a two-element list C<($min, $max)> containing the minimum and maximum of defined columns in the worksheet. If there is no column defined C<$max> is smaller than C<$min>. | |
my ( $col_min, $col_max ) = $worksheet->col_range(); | |
=head2 get_name() | |
The C<get_name()> method returns the name of the worksheet. | |
my $name = $worksheet->get_name(); | |
=head2 get_h_pagebreaks() | |
The C<get_h_pagebreaks()> method returns an array ref of row numbers where a horizontal page break occurs. | |
my $h_pagebreaks = $worksheet->get_h_pagebreaks(); | |
Returns C<undef> if there are no pagebreaks. | |
=head2 get_v_pagebreaks() | |
The C<get_v_pagebreaks()> method returns an array ref of column numbers where a vertical page break occurs. | |
my $v_pagebreaks = $worksheet->get_v_pagebreaks(); | |
Returns C<undef> if there are no pagebreaks. | |
=head2 get_merged_areas() | |
The C<get_merged_areas()> method returns an array ref of cells that are merged. | |
my $merged_areas = $worksheet->get_merged_areas(); | |
Each merged area is represented as follows: | |
[ $start_row, $start_col, $end_row, $end_col] | |
Returns C<undef> if there are no merged areas. | |
=head2 get_row_heights() | |
The C<get_row_heights()> method returns an array_ref of row heights in scalar context, | |
and an array in list context. | |
my $row_heights = $worksheet->get_row_heights(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_col_widths() | |
The C<get_col_widths()> method returns an array_ref of column widths in scalar context, | |
and an array in list context. | |
my $col_widths = $worksheet->get_col_widths(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_default_row_height() | |
The C<get_default_row_height()> method returns the default row height for the worksheet. Generally 12.75. | |
my $default_row_height = $worksheet->get_default_row_height(); | |
=head2 get_default_col_width() | |
The C<get_default_col_width()> method returns the default column width for the worksheet. Generally 8.43. | |
my $default_col_width = $worksheet->get_default_col_width(); | |
=head2 get_header() | |
The C<get_header()> method returns the worksheet header string. This string can contain control codes for alignment and font properties. Refer to the Excel on-line help on headers and footers or to the Spreadsheet::WriteExcel documentation for set_header(). | |
my $header = $worksheet->get_header(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_footer() | |
The C<get_footer()> method returns the worksheet footer string. This string can contain control codes for alignment and font properties. Refer to the Excel on-line help on headers and footers or to the Spreadsheet::WriteExcel documentation for set_header(). | |
my $footer = $worksheet->get_footer(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_margin_left() | |
The C<get_margin_left()> method returns the left margin of the worksheet in inches. | |
my $margin_left = $worksheet->get_margin_left(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_margin_right() | |
The C<get_margin_right()> method returns the right margin of the worksheet in inches. | |
my $margin_right = $worksheet->get_margin_right(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_margin_top() | |
The C<get_margin_top()> method returns the top margin of the worksheet in inches. | |
my $margin_top = $worksheet->get_margin_top(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_margin_bottom() | |
The C<get_margin_bottom()> method returns the bottom margin of the worksheet in inches. | |
my $margin_bottom = $worksheet->get_margin_bottom(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_margin_header() | |
The C<get_margin_header()> method returns the header margin of the worksheet in inches. | |
my $margin_header = $worksheet->get_margin_header(); | |
Returns a default value of 0.5 if not set. | |
=head2 get_margin_footer() | |
The C<get_margin_footer()> method returns the footer margin of the worksheet in inches. | |
my $margin_footer = $worksheet->get_margin_footer(); | |
Returns a default value of 0.5 if not set. | |
=head2 get_paper() | |
The C<get_paper()> method returns the printer paper size. | |
my $paper = $worksheet->get_paper(); | |
The value corresponds to the formats shown below: | |
Index Paper format Paper size | |
===== ============ ========== | |
0 Printer default - | |
1 Letter 8 1/2 x 11 in | |
2 Letter Small 8 1/2 x 11 in | |
3 Tabloid 11 x 17 in | |
4 Ledger 17 x 11 in | |
5 Legal 8 1/2 x 14 in | |
6 Statement 5 1/2 x 8 1/2 in | |
7 Executive 7 1/4 x 10 1/2 in | |
8 A3 297 x 420 mm | |
9 A4 210 x 297 mm | |
10 A4 Small 210 x 297 mm | |
11 A5 148 x 210 mm | |
12 B4 250 x 354 mm | |
13 B5 182 x 257 mm | |
14 Folio 8 1/2 x 13 in | |
15 Quarto 215 x 275 mm | |
16 - 10x14 in | |
17 - 11x17 in | |
18 Note 8 1/2 x 11 in | |
19 Envelope 9 3 7/8 x 8 7/8 | |
20 Envelope 10 4 1/8 x 9 1/2 | |
21 Envelope 11 4 1/2 x 10 3/8 | |
22 Envelope 12 4 3/4 x 11 | |
23 Envelope 14 5 x 11 1/2 | |
24 C size sheet - | |
25 D size sheet - | |
26 E size sheet - | |
27 Envelope DL 110 x 220 mm | |
28 Envelope C3 324 x 458 mm | |
29 Envelope C4 229 x 324 mm | |
30 Envelope C5 162 x 229 mm | |
31 Envelope C6 114 x 162 mm | |
32 Envelope C65 114 x 229 mm | |
33 Envelope B4 250 x 353 mm | |
34 Envelope B5 176 x 250 mm | |
35 Envelope B6 176 x 125 mm | |
36 Envelope 110 x 230 mm | |
37 Monarch 3.875 x 7.5 in | |
38 Envelope 3 5/8 x 6 1/2 in | |
39 Fanfold 14 7/8 x 11 in | |
40 German Std Fanfold 8 1/2 x 12 in | |
41 German Legal Fanfold 8 1/2 x 13 in | |
256 User defined | |
The two most common paper sizes are C<1 = "US Letter"> and C<9 = A4>. Returns 9 by default. | |
=head2 get_start_page() | |
The C<get_start_page()> method returns the page number that printing will start from. | |
my $start_page = $worksheet->get_start_page(); | |
Returns 0 if the property isn't set. | |
=head2 get_print_order() | |
The C<get_print_order()> method returns 0 if the worksheet print "page order" is "Down then over" (the default) or 1 if it is "Over then down". | |
my $print_order = $worksheet->get_print_order(); | |
=head2 get_print_scale() | |
The C<get_print_scale()> method returns the workbook scale for printing. The print scale factor can be in the range 10 .. 400. | |
my $print_scale = $worksheet->get_print_scale(); | |
Returns 100 by default. | |
=head2 get_fit_to_pages() | |
The C<get_fit_to_pages()> method returns the number of pages wide and high that the printed worksheet page will fit to. | |
my ($pages_wide, $pages_high) = $worksheet->get_fit_to_pages(); | |
Returns (0, 0) if the property isn't set. | |
=head2 is_portrait() | |
The C<is_portrait()> method returns true if the worksheet has been set for printing in portrait mode. | |
my $is_portrait = $worksheet->is_portrait(); | |
Returns 0 if the worksheet has been set for printing in horizontal mode. | |
=head2 is_centered_horizontally() | |
The C<is_centered_horizontally()> method returns true if the worksheet has been centered horizontally for printing. | |
my $is_centered_horizontally = $worksheet->is_centered_horizontally(); | |
Returns 0 if the property isn't set. | |
=head2 is_centered_vertically() | |
The C<is_centered_vertically()> method returns true if the worksheet has been centered vertically for printing. | |
my $is_centered_vertically = $worksheet->is_centered_vertically(); | |
Returns 0 if the property isn't set. | |
=head2 is_print_gridlines() | |
The C<is_print_gridlines()> method returns true if the worksheet print "gridlines" option is turned on. | |
my $is_print_gridlines = $worksheet->is_print_gridlines(); | |
Returns 0 if the property isn't set. | |
=head2 is_print_row_col_headers() | |
The C<is_print_row_col_headers()> method returns true if the worksheet print "row and column headings" option is turned on. | |
my $is_print_row_col_headers = $worksheet->is_print_row_col_headers(); | |
Returns 0 if the property isn't set. | |
=head2 is_print_black_and_white() | |
The C<is_print_black_and_white()> method returns true if the worksheet print "black and white" option is turned on. | |
my $is_print_black_and_white = $worksheet->is_print_black_and_white(); | |
Returns 0 if the property isn't set. | |
=head2 is_print_draft() | |
The C<is_print_draft()> method returns true if the worksheet print "draft" option is turned on. | |
my $is_print_draft = $worksheet->is_print_draft(); | |
Returns 0 if the property isn't set. | |
=head2 is_print_comments() | |
The C<is_print_comments()> method returns true if the worksheet print "comments" option is turned on. | |
my $is_print_comments = $worksheet->is_print_comments(); | |
Returns 0 if the property isn't set. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_PARSEEXCEL_WORKSHEET | |
$fatpacked{"Spreadsheet/XLSX.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX'; | |
package Spreadsheet::XLSX; | |
use 5.006000; | |
use strict; | |
use warnings; | |
use base 'Spreadsheet::ParseExcel::Workbook'; | |
our $VERSION = '0.15'; | |
use Archive::Zip; | |
use Spreadsheet::ParseExcel; | |
use Spreadsheet::XLSX::Fmt2007; | |
################################################################################ | |
sub new { | |
my ($class, $filename, $converter) = @_; | |
my %shared_info; # shared_strings, styles, style_info, rels, converter | |
$shared_info{converter} = $converter; | |
my $self = bless Spreadsheet::ParseExcel::Workbook->new(), $class; | |
my $zip = __load_zip($filename); | |
$shared_info{shared_strings}= __load_shared_strings($zip, $shared_info{converter}); | |
my ($styles, $style_info) = __load_styles($zip); | |
$shared_info{styles} = $styles; | |
$shared_info{style_info} = $style_info; | |
$shared_info{rels} = __load_rels($zip); | |
$self->_load_workbook($zip, \%shared_info); | |
return $self; | |
} | |
sub _load_workbook { | |
my ($self, $zip, $shared_info) = @_; | |
my $member_workbook = $zip->memberNamed('xl/workbook.xml') or die("xl/workbook.xml not found in this zip\n"); | |
$self->{SheetCount} = 0; | |
$self->{FmtClass} = Spreadsheet::XLSX::Fmt2007->new; | |
$self->{Flg1904} = 0; | |
if ($member_workbook->contents =~ /date1904="1"/) { | |
$self->{Flg1904} = 1; | |
} | |
foreach ($member_workbook->contents =~ /\<(.*?)\/?\>/g) { | |
/^(\w+)\s+/; | |
my ($tag, $other) = ($1, $'); | |
my @pairs = split /\" /, $other; | |
$tag eq 'sheet' or next; | |
my $sheet = { | |
MaxRow => 0, | |
MaxCol => 0, | |
MinRow => 1000000, | |
MinCol => 1000000, | |
}; | |
foreach ($other =~ /(\S+=".*?")/gsm) { | |
my ($k, $v) = split /=?"/; #" | |
if ($k eq 'name') { | |
$sheet->{Name} = $v; | |
$sheet->{Name} = $shared_info->{converter}->convert($sheet->{Name}) if defined $shared_info->{converter}; | |
} elsif ($k eq 'r:id') { | |
$sheet->{path} = $shared_info->{rels}->{$v}; | |
} | |
} | |
my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet); | |
$self->{Worksheet}[$self->{SheetCount}] = $wsheet; | |
$self->{SheetCount} += 1; | |
} | |
foreach my $sheet (@{$self->{Worksheet}}) { | |
my $member_sheet = $zip->memberNamed("xl/$sheet->{path}") or next; | |
my ($row, $col); | |
my $parsing_v_tag = 0; | |
my $s = 0; | |
my $s2 = 0; | |
my $sty = 0; | |
foreach ($member_sheet->contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) { | |
if (/^\<c\s*.*?\s*r=\"([A-Z])([A-Z]?)(\d+)\"/) { | |
($row, $col) = __decode_cell_name($1, $2, $3); | |
$s = m/t=\"s\"/ ? 1 : 0; | |
$s2 = m/t=\"str\"/ ? 1 : 0; | |
$sty = m/s="([0-9]+)"/ ? $1 : 0; | |
} elsif (/^<v>/) { | |
$parsing_v_tag = 1; | |
} elsif (/^<\/v>/) { | |
$parsing_v_tag = 0; | |
} elsif (length($_) && $parsing_v_tag) { | |
my $v = $s ? $shared_info->{shared_strings}->[$_] : $_; | |
if ($v eq "</c>") { | |
$v = ""; | |
} | |
my $type = "Text"; | |
my $thisstyle = ""; | |
if (not($s) && not($s2)) { | |
$type = "Numeric"; | |
if (defined $sty && defined $shared_info->{styles}->[$sty]) { | |
$thisstyle = $shared_info->{style_info}->{$shared_info->{styles}->[$sty]}; | |
if ($thisstyle =~ /\b(mmm|m|d|yy|h|hh|mm|ss)\b/) { | |
$type = "Date"; | |
} | |
} | |
} | |
$sheet->{MaxRow} = $row if $sheet->{MaxRow} < $row; | |
$sheet->{MaxCol} = $col if $sheet->{MaxCol} < $col; | |
$sheet->{MinRow} = $row if $sheet->{MinRow} > $row; | |
$sheet->{MinCol} = $col if $sheet->{MinCol} > $col; | |
if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric") { | |
$v = $1 / (10**$2); # this handles scientific notation for very small numbers | |
} | |
my $cell = Spreadsheet::ParseExcel::Cell->new( | |
Val => $v, | |
Format => $thisstyle, | |
Type => $type | |
); | |
$cell->{_Value} = $self->{FmtClass}->ValFmt($cell, $self); | |
if ($type eq "Date") { | |
if ($v < 1) { #then this is Excel time field | |
$cell->{Type} = "Text"; | |
} | |
$cell->{Val} = $cell->{_Value}; | |
} | |
$sheet->{Cells}[$row][$col] = $cell; | |
} | |
} | |
$sheet->{MinRow} = 0 if $sheet->{MinRow} > $sheet->{MaxRow}; | |
$sheet->{MinCol} = 0 if $sheet->{MinCol} > $sheet->{MaxCol}; | |
} | |
return $self; | |
} | |
# Convert cell name in the format AA1 to a row and column number. | |
sub __decode_cell_name { | |
my ($letter1, $letter2, $digits) = @_; | |
my $col = ord($letter1) - 65; | |
if ($letter2) { | |
$col++; | |
$col *= 26; | |
$col += (ord($letter2) - 65); | |
} | |
my $row = $digits - 1; | |
return ($row, $col); | |
} | |
sub __load_shared_strings { | |
my ($zip, $converter) = @_; | |
my $member_shared_strings = $zip->memberNamed('xl/sharedStrings.xml'); | |
my @shared_strings = (); | |
if ($member_shared_strings) { | |
my $mstr = $member_shared_strings->contents; | |
$mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/> | |
foreach my $si ($mstr =~ /<si.*?>(.*?)<\/si/gsm) { | |
my $str; | |
foreach my $t ($si =~ /<t.*?>(.*?)<\/t/gsm) { | |
$t = $converter->convert($t) if defined $converter; | |
$str .= $t; | |
} | |
push @shared_strings, $str; | |
} | |
} | |
return \@shared_strings; | |
} | |
sub __load_styles { | |
my ($zip) = @_; | |
my $member_styles = $zip->memberNamed('xl/styles.xml'); | |
my @styles = (); | |
my %style_info = (); | |
if ($member_styles) { | |
my $formatter = Spreadsheet::XLSX::Fmt2007->new(); | |
foreach my $t ($member_styles->contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { #" | |
push @styles, $t; | |
} | |
my $default = $1 || ''; | |
foreach my $t1 (@styles) { | |
$member_styles->contents =~ /numFmtId="$t1" formatCode="([^"]*)/; | |
my $formatCode = $1 || ''; | |
if ($formatCode eq $default || not($formatCode)) { | |
if ($t1 == 9 || $t1 == 10) { | |
$formatCode = '0.00000%'; | |
} elsif ($t1 == 14) { | |
$formatCode = 'yyyy-mm-dd'; | |
} else { | |
$formatCode = ''; | |
} | |
# $formatCode = $formatter->FmtStringDef($t1); | |
} | |
$style_info{$t1} = $formatCode; | |
$default = $1 || ''; | |
} | |
} | |
return (\@styles, \%style_info); | |
} | |
sub __load_rels { | |
my ($zip) = @_; | |
my $member_rels = $zip->memberNamed('xl/_rels/workbook.xml.rels') or die("xl/_rels/workbook.xml.rels not found in this zip\n"); | |
my %rels = (); | |
foreach ($member_rels->contents =~ /\<Relationship (.*?)\/?\>/g) { | |
my ($id, $target); | |
($id) = /Id="(.*?)"/; | |
($target) = /Target="(.*?)"/; | |
if (defined $id and defined $target) { | |
$rels{$id} = $target; | |
} | |
} | |
return \%rels; | |
} | |
sub __load_zip { | |
my ($filename) = @_; | |
my $zip = Archive::Zip->new(); | |
if (ref $filename) { | |
$zip->readFromFileHandle($filename) == Archive::Zip::AZ_OK or die("Cannot open data as Zip archive"); | |
} else { | |
$zip->read($filename) == Archive::Zip::AZ_OK or die("Cannot open $filename as Zip archive"); | |
} | |
return $zip; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files; | |
=head1 SYNOPSIS | |
use Text::Iconv; | |
my $converter = Text::Iconv -> new ("utf-8", "windows-1251"); | |
# Text::Iconv is not really required. | |
# This can be any object with the convert method. Or nothing. | |
use Spreadsheet::XLSX; | |
my $excel = Spreadsheet::XLSX -> new ('test.xlsx', $converter); | |
foreach my $sheet (@{$excel -> {Worksheet}}) { | |
printf("Sheet: %s\n", $sheet->{Name}); | |
$sheet -> {MaxRow} ||= $sheet -> {MinRow}; | |
foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) { | |
$sheet -> {MaxCol} ||= $sheet -> {MinCol}; | |
foreach my $col ($sheet -> {MinCol} .. $sheet -> {MaxCol}) { | |
my $cell = $sheet -> {Cells} [$row] [$col]; | |
if ($cell) { | |
printf("( %s , %s ) => %s\n", $row, $col, $cell -> {Val}); | |
} | |
} | |
} | |
} | |
=head1 DESCRIPTION | |
This module is a (quick and dirty) emulation of Spreadsheet::ParseExcel for | |
Excel 2007 (.xlsx) file format. It supports styles and many of Excel's quirks, | |
but not all. It populates the classes from Spreadsheet::ParseExcel for interoperability; | |
including Workbook, Worksheet, and Cell. | |
=head1 SEE ALSO | |
=over 2 | |
=item Text::CSV_XS, Text::CSV_PP | |
http://search.cpan.org/~hmbrand/ | |
A pure perl version is available on http://search.cpan.org/~makamaka/ | |
=item Spreadsheet::ParseExcel | |
http://search.cpan.org/~kwitknr/ | |
=item Spreadsheet::ReadSXC | |
http://search.cpan.org/~terhechte/ | |
=item Spreadsheet::BasicRead | |
http://search.cpan.org/~gng/ for xlscat likewise functionality (Excel only) | |
=item Spreadsheet::ConvertAA | |
http://search.cpan.org/~nkh/ for an alternative set of cell2cr () / | |
cr2cell () pair | |
=item Spreadsheet::Perl | |
http://search.cpan.org/~nkh/ offers a Pure Perl implementation of a | |
spreadsheet engine. Users that want this format to be supported in | |
Spreadsheet::Read are hereby motivated to offer patches. It's not high | |
on my todo-list. | |
=item xls2csv | |
http://search.cpan.org/~ken/ offers an alternative for my C<xlscat -c>, | |
in the xls2csv tool, but this tool focusses on character encoding | |
transparency, and requires some other modules. | |
=item Spreadsheet::Read | |
http://search.cpan.org/~hmbrand/ read the data from a spreadsheet (interface | |
module) | |
=back | |
=head1 AUTHOR | |
Dmitry Ovsyanko, E<lt>do@eludia.ru<gt>, http://eludia.ru/wiki/ | |
Patches by: | |
Steve Simms | |
Joerg Meltzer | |
Loreyna Yeung | |
Rob Polocz | |
Gregor Herrmann | |
H.Merijn Brand | |
endacoe | |
Pat Mariani | |
Sergey Pushkin | |
=head1 ACKNOWLEDGEMENTS | |
Thanks to TrackVia Inc. (http://www.trackvia.com) for paying for Rob Polocz working time. | |
=head1 COPYRIGHT AND LICENSE | |
Copyright (C) 2008 by Dmitry Ovsyanko | |
This library is free software; you can redistribute it and/or modify | |
it under the same terms as Perl itself, either Perl version 5.8.8 or, | |
at your option, any later version of Perl 5 you may have available. | |
=cut | |
SPREADSHEET_XLSX | |
$fatpacked{"Spreadsheet/XLSX/Fmt2007.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_FMT2007'; | |
# This code is adapted for Excel 2007 from: | |
# Spreadsheet::XLSX::FmtDefault | |
# by Kawai, Takanori (Hippo2000) 2001.2.2 | |
# This Program is ALPHA version. | |
#============================================================================== | |
package Spreadsheet::XLSX::Fmt2007; | |
use strict; | |
use warnings; | |
use Spreadsheet::XLSX::Utility2007 qw(ExcelFmt); | |
our $VERSION = '0.13'; # | |
my %hFmtDefault = ( | |
0x00 => '@', | |
0x01 => '0', | |
0x02 => '0.00', | |
0x03 => '#,##0', | |
0x04 => '#,##0.00', | |
0x05 => '($#,##0_);($#,##0)', | |
0x06 => '($#,##0_);[RED]($#,##0)', | |
0x07 => '($#,##0.00_);($#,##0.00_)', | |
0x08 => '($#,##0.00_);[RED]($#,##0.00_)', | |
0x09 => '0%', | |
0x0A => '0.00%', | |
0x0B => '0.00E+00', | |
0x0C => '# ?/?', | |
0x0D => '# ??/??', | |
0x0E => 'yyyy-mm-dd', | |
0x0F => 'd-mmm-yy', | |
0x10 => 'd-mmm', | |
0x11 => 'mmm-yy', | |
0x12 => 'h:mm AM/PM', | |
0x13 => 'h:mm:ss AM/PM', | |
0x14 => 'h:mm', | |
0x15 => 'h:mm:ss', | |
0x16 => 'm-d-yy h:mm', | |
#0x17-0x24 -- Differs in Natinal | |
0x25 => '(#,##0_);(#,##0)', | |
0x26 => '(#,##0_);[RED](#,##0)', | |
0x27 => '(#,##0.00);(#,##0.00)', | |
0x28 => '(#,##0.00);[RED](#,##0.00)', | |
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', | |
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)', | |
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2D => 'mm:ss', | |
0x2E => '[h]:mm:ss', | |
0x2F => 'mm:ss.0', | |
0x30 => '##0.0E+0', | |
0x31 => '@', | |
); | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::XLSX::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ($sPkg, %hKey) = @_; | |
my $oThis = {}; | |
bless $oThis; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::XLSX::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ($oThis, $sTxt, $sCode) = @_; | |
return $sTxt if ((!defined($sCode)) || ($sCode eq '_native_')); | |
return pack('U*', unpack('n*', $sTxt)); | |
} | |
#------------------------------------------------------------------------------ | |
# FmtStringDef (for Spreadsheet::XLSX::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub FmtStringDef { | |
my ($oThis, $iFmtIdx, $oBook, $rhFmt) = @_; | |
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx}; | |
if (!(defined($sFmtStr)) && defined($rhFmt)) { | |
$sFmtStr = $rhFmt->{$iFmtIdx}; | |
} | |
$sFmtStr = $hFmtDefault{$iFmtIdx} unless ($sFmtStr); | |
return $sFmtStr; | |
} | |
#------------------------------------------------------------------------------ | |
# FmtString (for Spreadsheet::XLSX::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub FmtString { | |
my ($oThis, $oCell, $oBook) = @_; | |
my $sFmtStr; # = $oThis->FmtStringDef( | |
# $oBook->{Format}[$oCell->{FormatNo}]->{FmtIdx}, $oBook); | |
unless (defined($sFmtStr)) { | |
if ($oCell->{Type} eq 'Numeric') { | |
if ($oCell->{Format}) { | |
$sFmtStr = $oCell->{Format}; | |
} elsif (int($oCell->{Val}) != $oCell->{Val}) { | |
$sFmtStr = '0.00'; | |
} else { | |
$sFmtStr = '0'; | |
} | |
} elsif ($oCell->{Type} eq 'Date') { | |
if ($oCell->{Format}) { | |
$sFmtStr = $oCell->{Format}; | |
} elsif (int($oCell->{Val}) <= 0) { | |
$sFmtStr = 'h:mm:ss'; | |
} else { | |
$sFmtStr = 'm-d-yy'; | |
} | |
} else { | |
$sFmtStr = '@'; | |
} | |
} | |
return $sFmtStr; | |
} | |
#------------------------------------------------------------------------------ | |
# ValFmt (for Spreadsheet::XLSX::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub ValFmt { | |
my ($oThis, $oCell, $oBook) = @_; | |
my ($Dt, $iFmtIdx, $iNumeric, $Flg1904); | |
if ($oCell->{Type} eq 'Text') { | |
$Dt = ((defined $oCell->{Val}) && ($oCell->{Val} ne '')) ? $oThis->TextFmt($oCell->{Val}, $oCell->{Code}) : ''; | |
} else { | |
$Dt = $oCell->{Val}; | |
} | |
$Flg1904 = $oBook->{Flg1904}; | |
my $sFmtStr = $oThis->FmtString($oCell, $oBook); | |
return ExcelFmt($sFmtStr, $Dt, $Flg1904, $oCell->{Type}); | |
} | |
#------------------------------------------------------------------------------ | |
# ChkType (for Spreadsheet::XLSX::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub ChkType { | |
my ($oPkg, $iNumeric, $iFmtIdx) = @_; | |
if ($iNumeric) { | |
if ( (($iFmtIdx >= 0x0E) && ($iFmtIdx <= 0x16)) | |
|| (($iFmtIdx >= 0x2D) && ($iFmtIdx <= 0x2F))) { | |
return "Date"; | |
} else { | |
return "Numeric"; | |
} | |
} else { | |
return "Text"; | |
} | |
} | |
1; | |
__END__ | |
=head1 SYNOPSIS | |
$cell = $myworkbook->worksheet->{Cells}[$row][$col] | |
my $type = $cell->{Type}; # Date, Text, or Numeric | |
my $disp_value = $cell->Value; # displayed (formatted) value set in XLSX by $myFmt2007->ValFmt($cell, $workbook) | |
my $fund_value = $cell->{Val}; # fundemental (underlying) value | |
my $formatter; | |
if ($myworkbook->excel07) { | |
$formatter=Spreadsheet::XLSX::Fmt2007->new(); | |
} else { | |
$formatter=Spreadsheet::ParseExcel::FmtDefault->new(); | |
} | |
my $format_string = $formatter->FmtString($cell,$self->workbook); | |
=cut | |
SPREADSHEET_XLSX_FMT2007 | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL'; | |
package Spreadsheet::ParseExcel; | |
############################################################################## | |
# | |
# Spreadsheet::ParseExcel - Extract information from an Excel file. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2008 Takanori Kawai | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use 5.008; | |
use OLE::Storage_Lite; | |
use File::Basename qw(fileparse); | |
use IO::File; | |
use Config; | |
use Crypt::RC4; | |
use Digest::Perl::MD5; | |
our $VERSION = '0.65'; | |
use Spreadsheet::ParseExcel::Workbook; | |
use Spreadsheet::ParseExcel::Worksheet; | |
use Spreadsheet::ParseExcel::Font; | |
use Spreadsheet::ParseExcel::Format; | |
use Spreadsheet::ParseExcel::Cell; | |
use Spreadsheet::ParseExcel::FmtDefault; | |
my $currentbook; | |
my @aColor = ( | |
'000000', # 0x00 | |
'FFFFFF', 'FFFFFF', 'FFFFFF', 'FFFFFF', | |
'FFFFFF', 'FFFFFF', 'FFFFFF', '000000', # 0x08 | |
'FFFFFF', 'FF0000', '00FF00', '0000FF', | |
'FFFF00', 'FF00FF', '00FFFF', '800000', # 0x10 | |
'008000', '000080', '808000', '800080', | |
'008080', 'C0C0C0', '808080', '9999FF', # 0x18 | |
'993366', 'FFFFCC', 'CCFFFF', '660066', | |
'FF8080', '0066CC', 'CCCCFF', '000080', # 0x20 | |
'FF00FF', 'FFFF00', '00FFFF', '800080', | |
'800000', '008080', '0000FF', '00CCFF', # 0x28 | |
'CCFFFF', 'CCFFCC', 'FFFF99', '99CCFF', | |
'FF99CC', 'CC99FF', 'FFCC99', '3366FF', # 0x30 | |
'33CCCC', '99CC00', 'FFCC00', 'FF9900', | |
'FF6600', '666699', '969696', '003366', # 0x38 | |
'339966', '003300', '333300', '993300', | |
'993366', '333399', '333333', '000000' # 0x40 | |
); | |
use constant verExcel95 => 0x500; | |
use constant verExcel97 => 0x600; | |
use constant verBIFF2 => 0x00; | |
use constant verBIFF3 => 0x02; | |
use constant verBIFF4 => 0x04; | |
use constant verBIFF5 => 0x08; | |
use constant verBIFF8 => 0x18; | |
use constant MS_BIFF_CRYPTO_NONE => 0; | |
use constant MS_BIFF_CRYPTO_XOR => 1; | |
use constant MS_BIFF_CRYPTO_RC4 => 2; | |
use constant sizeof_BIFF_8_FILEPASS => ( 6 + 3 * 16 ); | |
use constant REKEY_BLOCK => 0x400; | |
# Error code for some of the common parsing errors. | |
use constant ErrorNone => 0; | |
use constant ErrorNoFile => 1; | |
use constant ErrorNoExcelData => 2; | |
use constant ErrorFileEncrypted => 3; | |
# Color index for the 'auto' color | |
use constant AutoColor => 64; | |
our %error_strings = ( | |
ErrorNone, '', # 0 | |
ErrorNoFile, 'File not found', # 1 | |
ErrorNoExcelData, 'No Excel data found in file', # 2 | |
ErrorFileEncrypted, 'File is encrypted', # 3 | |
); | |
our %ProcTbl = ( | |
#Develpers' Kit P291 | |
0x14 => \&_subHeader, # Header | |
0x15 => \&_subFooter, # Footer | |
0x18 => \&_subName, # NAME(?) | |
0x1A => \&_subVPageBreak, # Vertical Page Break | |
0x1B => \&_subHPageBreak, # Horizontal Page Break | |
0x22 => \&_subFlg1904, # 1904 Flag | |
0x26 => \&_subMargin, # Left Margin | |
0x27 => \&_subMargin, # Right Margin | |
0x28 => \&_subMargin, # Top Margin | |
0x29 => \&_subMargin, # Bottom Margin | |
0x2A => \&_subPrintHeaders, # Print Headers | |
0x2B => \&_subPrintGridlines, # Print Gridlines | |
0x3C => \&_subContinue, # Continue | |
0x3D => \&_subWindow1, # Window1 | |
0x43 => \&_subXF, # XF for Excel < 4. | |
0x0443 => \&_subXF, # XF for Excel = 4. | |
0x862 => \&_subSheetLayout, # Sheet Layout | |
0x1B8 => \&_subHyperlink, # HYPERLINK | |
#Develpers' Kit P292 | |
0x55 => \&_subDefColWidth, # Consider | |
0x5C => \&_subWriteAccess, # WRITEACCESS | |
0x7D => \&_subColInfo, # Colinfo | |
0x7E => \&_subRK, # RK | |
0x81 => \&_subWSBOOL, # WSBOOL | |
0x83 => \&_subHcenter, # HCENTER | |
0x84 => \&_subVcenter, # VCENTER | |
0x85 => \&_subBoundSheet, # BoundSheet | |
0x92 => \&_subPalette, # Palette, fgp | |
0x99 => \&_subStandardWidth, # Standard Col | |
#Develpers' Kit P293 | |
0xA1 => \&_subSETUP, # SETUP | |
0xBD => \&_subMulRK, # MULRK | |
0xBE => \&_subMulBlank, # MULBLANK | |
0xD6 => \&_subRString, # RString | |
#Develpers' Kit P294 | |
0xE0 => \&_subXF, # ExTended Format | |
0xE5 => \&_subMergeArea, # MergeArea (Not Documented) | |
0xFC => \&_subSST, # Shared String Table | |
0xFD => \&_subLabelSST, # Label SST | |
#Develpers' Kit P295 | |
0x201 => \&_subBlank, # Blank | |
0x202 => \&_subInteger, # Integer(Not Documented) | |
0x203 => \&_subNumber, # Number | |
0x204 => \&_subLabel, # Label | |
0x205 => \&_subBoolErr, # BoolErr | |
0x207 => \&_subString, # STRING | |
0x208 => \&_subRow, # RowData | |
0x221 => \&_subArray, # Array (Consider) | |
0x225 => \&_subDefaultRowHeight, # Consider | |
0x31 => \&_subFont, # Font | |
0x231 => \&_subFont, # Font | |
0x27E => \&_subRK, # RK | |
0x41E => \&_subFormat, # Format | |
0x06 => \&_subFormula, # Formula | |
0x406 => \&_subFormula, # Formula | |
0x009 => \&_subBOF, # BOF(BIFF2) | |
0x209 => \&_subBOF, # BOF(BIFF3) | |
0x409 => \&_subBOF, # BOF(BIFF4) | |
0x809 => \&_subBOF, # BOF(BIFF5-8) | |
); | |
our $BIGENDIAN; | |
our $PREFUNC; | |
our $_use_perlio; | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->new | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $class, %hParam ) = @_; | |
if ( not defined $_use_perlio ) { | |
if ( exists $Config{useperlio} | |
&& defined $Config{useperlio} | |
&& $Config{useperlio} eq "define" ) | |
{ | |
$_use_perlio = 1; | |
} | |
else { | |
$_use_perlio = 0; | |
require IO::Scalar; | |
import IO::Scalar; | |
} | |
} | |
# Check ENDIAN(Little: Intel etc. BIG: Sparc etc) | |
$BIGENDIAN = | |
( defined $hParam{Endian} ) ? $hParam{Endian} | |
: ( unpack( "H08", pack( "L", 2 ) ) eq '02000000' ) ? 0 | |
: 1; | |
my $self = {}; | |
bless $self, $class; | |
$self->{GetContent} = \&_subGetContent; | |
if ( $hParam{EventHandlers} ) { | |
$self->SetEventHandlers( $hParam{EventHandlers} ); | |
} | |
else { | |
$self->SetEventHandlers( \%ProcTbl ); | |
} | |
if ( $hParam{AddHandlers} ) { | |
foreach my $sKey ( keys( %{ $hParam{AddHandlers} } ) ) { | |
$self->SetEventHandler( $sKey, $hParam{AddHandlers}->{$sKey} ); | |
} | |
} | |
$self->{CellHandler} = $hParam{CellHandler}; | |
$self->{NotSetCell} = $hParam{NotSetCell}; | |
$self->{Object} = $hParam{Object}; | |
if ( defined $hParam{Password} ) { | |
$self->{Password} = $hParam{Password}; | |
} | |
else { | |
$self->{Password} = 'VelvetSweatshop'; | |
} | |
$self->{_error_status} = ErrorNone; | |
return $self; | |
} | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->SetEventHandler | |
#------------------------------------------------------------------------------ | |
sub SetEventHandler { | |
my ( $self, $key, $sub_ref ) = @_; | |
$self->{FuncTbl}->{$key} = $sub_ref; | |
} | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->SetEventHandlers | |
#------------------------------------------------------------------------------ | |
sub SetEventHandlers { | |
my ( $self, $rhTbl ) = @_; | |
$self->{FuncTbl} = undef; | |
foreach my $sKey ( keys %$rhTbl ) { | |
$self->{FuncTbl}->{$sKey} = $rhTbl->{$sKey}; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# Decryption routines | |
# based on sources of gnumeric (ms-biff.c ms-excel-read.c) | |
#------------------------------------------------------------------------------ | |
sub md5state { | |
my ( $md5 ) = @_; | |
my $s = ''; | |
for ( my $i = 0 ; $i < 4 ; $i++ ) { | |
my $v = $md5->{_state}[$i]; | |
$s .= chr( $v & 0xff ); | |
$s .= chr( ( $v >> 8 ) & 0xff ); | |
$s .= chr( ( $v >> 16 ) & 0xff ); | |
$s .= chr( ( $v >> 24 ) & 0xff ); | |
} | |
return $s; | |
} | |
sub MakeKey { | |
my ( $block, $key, $valContext ) = @_; | |
my $pwarray = "\0" x 64; | |
substr( $pwarray, 0, 5 ) = substr( $valContext, 0, 5 ); | |
substr( $pwarray, 5, 1 ) = chr( $block & 0xff ); | |
substr( $pwarray, 6, 1 ) = chr( ( $block >> 8 ) & 0xff ); | |
substr( $pwarray, 7, 1 ) = chr( ( $block >> 16 ) & 0xff ); | |
substr( $pwarray, 8, 1 ) = chr( ( $block >> 24 ) & 0xff ); | |
substr( $pwarray, 9, 1 ) = "\x80"; | |
substr( $pwarray, 56, 1 ) = "\x48"; | |
my $md5 = Digest::Perl::MD5->new(); | |
$md5->add( $pwarray ); | |
my $s = md5state( $md5 ); | |
${$key} = Crypt::RC4->new( $s ); | |
} | |
sub VerifyPassword { | |
my ( $password, $docid, $salt_data, $hashedsalt_data, $valContext ) = @_; | |
my $pwarray = "\0" x 64; | |
my $i; | |
my $md5 = Digest::Perl::MD5->new(); | |
for ( $i = 0 ; $i < length( $password ) ; $i++ ) { | |
my $o = ord( substr( $password, $i, 1 ) ); | |
substr( $pwarray, 2 * $i, 1 ) = chr( $o & 0xff ); | |
substr( $pwarray, 2 * $i + 1, 1 ) = chr( ( $o >> 8 ) & 0xff ); | |
} | |
substr( $pwarray, 2 * $i, 1 ) = chr( 0x80 ); | |
substr( $pwarray, 56, 1 ) = chr( ( $i << 4 ) & 0xff ); | |
$md5->add( $pwarray ); | |
my $mdContext1 = md5state( $md5 ); | |
my $offset = 0; | |
my $keyoffset = 0; | |
my $tocopy = 5; | |
$md5->reset; | |
while ( $offset != 16 ) { | |
if ( ( 64 - $offset ) < 5 ) { | |
$tocopy = 64 - $offset; | |
} | |
substr( $pwarray, $offset, $tocopy ) = | |
substr( $mdContext1, $keyoffset, $tocopy ); | |
$offset += $tocopy; | |
if ( $offset == 64 ) { | |
$md5->add( $pwarray ); | |
$keyoffset = $tocopy; | |
$tocopy = 5 - $tocopy; | |
$offset = 0; | |
next; | |
} | |
$keyoffset = 0; | |
$tocopy = 5; | |
substr( $pwarray, $offset, 16 ) = $docid; | |
$offset += 16; | |
} | |
substr( $pwarray, 16, 1 ) = "\x80"; | |
substr( $pwarray, 17, 47 ) = "\0" x 47; | |
substr( $pwarray, 56, 1 ) = "\x80"; | |
substr( $pwarray, 57, 1 ) = "\x0a"; | |
$md5->add( $pwarray ); | |
${$valContext} = md5state( $md5 ); | |
my $key; | |
MakeKey( 0, \$key, ${$valContext} ); | |
my $salt = $key->RC4( $salt_data ); | |
my $hashedsalt = $key->RC4( $hashedsalt_data ); | |
$salt .= "\x80" . "\0" x 47; | |
substr( $salt, 56, 1 ) = "\x80"; | |
$md5->reset; | |
$md5->add( $salt ); | |
my $mdContext2 = md5state( $md5 ); | |
return ( $mdContext2 eq $hashedsalt ); | |
} | |
sub SkipBytes { | |
my ( $q, $start, $count ) = @_; | |
my $scratch = "\0" x REKEY_BLOCK; | |
my $block; | |
$block = int( ( $start + $count ) / REKEY_BLOCK ); | |
if ( $block != $q->{block} ) { | |
MakeKey( $q->{block} = $block, \$q->{rc4_key}, $q->{md5_ctxt} ); | |
$count = ( $start + $count ) % REKEY_BLOCK; | |
} | |
$q->{rc4_key}->RC4( substr( $scratch, 0, $count ) ); | |
return 1; | |
} | |
sub SetDecrypt { | |
my ( $q, $version, $password ) = @_; | |
if ( $q->{opcode} != 0x2f ) { | |
return 0; | |
} | |
if ( $password eq '' ) { | |
return 0; | |
} | |
# TODO old versions decryption | |
#if (version < MS_BIFF_V8 || q->data[0] == 0) | |
# return ms_biff_pre_biff8_query_set_decrypt (q, password); | |
if ( $q->{length} != sizeof_BIFF_8_FILEPASS ) { | |
return 0; | |
} | |
unless ( | |
VerifyPassword( | |
$password, | |
substr( $q->{data}, 6, 16 ), | |
substr( $q->{data}, 22, 16 ), | |
substr( $q->{data}, 38, 16 ), | |
\$q->{md5_ctxt} | |
) | |
) | |
{ | |
return 0; | |
} | |
$q->{encryption} = MS_BIFF_CRYPTO_RC4; | |
$q->{block} = -1; | |
# The first record after FILEPASS seems to be unencrypted | |
$q->{dont_decrypt_next_record} = 1; | |
# Pretend to decrypt the entire stream up till this point, it was | |
# encrypted, but do it anyway to keep the rc4 state in sync | |
SkipBytes( $q, 0, $q->{streamPos} ); | |
return 1; | |
} | |
sub InitStream { | |
my ( $stream_data ) = @_; | |
my %q; | |
$q{opcode} = 0; | |
$q{length} = 0; | |
$q{data} = ''; | |
$q{stream} = $stream_data; # data stream | |
$q{streamLen} = length( $stream_data ); # stream length | |
$q{streamPos} = 0; # stream position | |
$q{encryption} = 0; | |
$q{xor_key} = ''; | |
$q{rc4_key} = ''; | |
$q{md5_ctxt} = ''; | |
$q{block} = 0; | |
$q{dont_decrypt_next_record} = 0; | |
return \%q; | |
} | |
sub QueryNext { | |
my ( $q ) = @_; | |
if ( $q->{streamPos} + 4 >= $q->{streamLen} ) { | |
return 0; | |
} | |
my $data = substr( $q->{stream}, $q->{streamPos}, 4 ); | |
( $q->{opcode}, $q->{length} ) = unpack( 'v2', $data ); | |
# No biff record should be larger than around 20,000. | |
if ( $q->{length} >= 20000 ) { | |
return 0; | |
} | |
if ( $q->{length} > 0 ) { | |
$q->{data} = substr( $q->{stream}, $q->{streamPos} + 4, $q->{length} ); | |
} | |
else { | |
$q->{data} = undef; | |
$q->{dont_decrypt_next_record} = 1; | |
} | |
if ( $q->{encryption} == MS_BIFF_CRYPTO_RC4 ) { | |
if ( $q->{dont_decrypt_next_record} ) { | |
SkipBytes( $q, $q->{streamPos}, 4 + $q->{length} ); | |
$q->{dont_decrypt_next_record} = 0; | |
} | |
else { | |
my $pos = $q->{streamPos}; | |
my $data = $q->{data}; | |
my $len = $q->{length}; | |
my $res = ''; | |
# Pretend to decrypt header. | |
SkipBytes( $q, $pos, 4 ); | |
$pos += 4; | |
while ( $q->{block} != int( ( $pos + $len ) / REKEY_BLOCK ) ) { | |
my $step = REKEY_BLOCK - ( $pos % REKEY_BLOCK ); | |
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $step ) ); | |
$data = substr( $data, $step ); | |
$pos += $step; | |
$len -= $step; | |
MakeKey( ++$q->{block}, \$q->{rc4_key}, $q->{md5_ctxt} ); | |
} | |
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $len ) ); | |
$q->{data} = $res; | |
} | |
} | |
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_XOR ) { | |
# not implemented | |
return 0; | |
} | |
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_NONE ) { | |
} | |
$q->{streamPos} += 4 + $q->{length}; | |
return 1; | |
} | |
############################################################################### | |
# | |
# Parse() | |
# | |
# Parse the Excel file and convert it into a tree of objects.. | |
# | |
sub parse { | |
my ( $self, $source, $formatter ) = @_; | |
my $workbook = Spreadsheet::ParseExcel::Workbook->new(); | |
$currentbook = $workbook; | |
$workbook->{SheetCount} = 0; | |
$workbook->{CellHandler} = $self->{CellHandler}; | |
$workbook->{NotSetCell} = $self->{NotSetCell}; | |
$workbook->{Object} = $self->{Object}; | |
$workbook->{aColor} = [ @aColor ]; | |
my ( $biff_data, $data_length ) = $self->_get_content( $source, $workbook ); | |
return undef if not $biff_data; | |
if ( $formatter ) { | |
$workbook->{FmtClass} = $formatter; | |
} | |
else { | |
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new(); | |
} | |
# Parse the BIFF data. | |
my $stream = InitStream( $biff_data ); | |
while ( QueryNext( $stream ) ) { | |
my $record = $stream->{opcode}; | |
my $record_length = $stream->{length}; | |
my $record_header = $stream->{data}; | |
# If the file contains a FILEPASS record we assume that it is encrypted | |
# and cannot be parsed. | |
if ( $record == 0x002F ) { | |
unless ( SetDecrypt( $stream, '', $self->{Password} ) ) { | |
$self->{_error_status} = ErrorFileEncrypted; | |
return undef; | |
} | |
} | |
# Special case of a formula String with no string. | |
if ( $workbook->{_PrevPos} | |
&& ( defined $self->{FuncTbl}->{$record} ) | |
&& ( $record != 0x207 ) ) | |
{ | |
my $iPos = $workbook->{_PrevPos}; | |
$workbook->{_PrevPos} = undef; | |
my ( $row, $col, $format_index ) = @$iPos; | |
_NewCell( | |
$workbook, $row, $col, | |
Kind => 'Formula String', | |
Val => '', | |
FormatNo => $format_index, | |
Format => $workbook->{Format}[$format_index], | |
Numeric => 0, | |
Code => undef, | |
Book => $workbook, | |
); | |
} | |
# If the BIFF record matches 0x0*09 then it is a BOF record. | |
# We reset the _skip_chart flag to ensure we check the sheet type. | |
if ( ( $record & 0xF0FF ) == 0x09 ) { | |
$workbook->{_skip_chart} = 0; | |
} | |
if ( defined $self->{FuncTbl}->{$record} && !$workbook->{_skip_chart} ) | |
{ | |
$self->{FuncTbl}->{$record} | |
->( $workbook, $record, $record_length, $record_header ); | |
} | |
$PREFUNC = $record if ( $record != 0x3C ); #Not Continue | |
last if defined $workbook->{_ParseAbort}; | |
} | |
foreach my $worksheet (@{$workbook->{Worksheet}} ) { | |
# Install hyperlinks into each cell | |
# Range is undocumented for user; allows reuse of data | |
if ($worksheet->{HyperLinks}) { | |
foreach my $link (@{$worksheet->{HyperLinks}}) { | |
for( my $row = $link->[3]; $row <= $link->[4]; $row++ ) { | |
for( my $col = $link->[5]; $col <= $link->[6]; $col++ ) { | |
$worksheet->{Cells}[$row][$col]{Hyperlink} = $link; | |
} | |
} | |
} | |
} | |
} | |
return $workbook; | |
} | |
############################################################################### | |
# | |
# _get_content() | |
# | |
# Get the Excel BIFF content from the file or filehandle. | |
# | |
sub _get_content { | |
my ( $self, $source, $workbook ) = @_; | |
my ( $biff_data, $data_length ); | |
# Reset the error status in case method is called more than once. | |
$self->{_error_status} = ErrorNone; | |
my $ref = ref($source); | |
if ( $ref ) { | |
if ( $ref eq 'SCALAR' ) { | |
# Specified by a scalar buffer. | |
( $biff_data, $data_length ) = $self->{GetContent}->( $source ); | |
} | |
elsif ( $ref eq 'ARRAY' ) { | |
# Specified by file content | |
$workbook->{File} = undef; | |
my $sData = join( '', @$source ); | |
( $biff_data, $data_length ) = $self->{GetContent}->( \$sData ); | |
} | |
else { | |
# Assume filehandle | |
# For CGI.pm (Light FileHandle) | |
my $sBuff = ''; | |
if ( eval { binmode( $source ) } ) { | |
my $sWk; | |
while ( read( $source, $sWk, 4096 ) ) { | |
$sBuff .= $sWk; | |
} | |
} | |
else { | |
# Assume IO::Wrap or some other filehandle-like OO-only object | |
my $sWk; | |
# IO::Wrap does not implement binmode | |
eval { $source->binmode() }; | |
while ( $source->read( $sWk, 4096 ) ) { | |
$sBuff .= $sWk; | |
} | |
} | |
( $biff_data, $data_length ) = $self->{GetContent}->( \$sBuff ); | |
} | |
} | |
else { | |
# Specified by filename . | |
$workbook->{File} = $source; | |
if ( !-e $source ) { | |
$self->{_error_status} = ErrorNoFile; | |
return undef; | |
} | |
( $biff_data, $data_length ) = $self->{GetContent}->( $source ); | |
} | |
# If the read was successful return the data. | |
if ( $data_length ) { | |
return ( $biff_data, $data_length ); | |
} | |
else { | |
$self->{_error_status} = ErrorNoExcelData; | |
return undef; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subGetContent (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _subGetContent { | |
my ( $sFile ) = @_; | |
my $oOl = OLE::Storage_Lite->new( $sFile ); | |
return ( undef, undef ) unless ( $oOl ); | |
my @aRes = $oOl->getPpsSearch( | |
[ | |
OLE::Storage_Lite::Asc2Ucs( 'Book' ), | |
OLE::Storage_Lite::Asc2Ucs( 'Workbook' ) | |
], | |
1, 1 | |
); | |
return ( undef, undef ) if ( $#aRes < 0 ); | |
#Hack from Herbert | |
if ( $aRes[0]->{Data} ) { | |
return ( $aRes[0]->{Data}, length( $aRes[0]->{Data} ) ); | |
} | |
#Same as OLE::Storage_Lite | |
my $oIo; | |
#1. $sFile is Ref of scalar | |
if ( ref( $sFile ) eq 'SCALAR' ) { | |
if ( $_use_perlio ) { | |
open $oIo, "<", \$sFile; | |
} | |
else { | |
$oIo = IO::Scalar->new; | |
$oIo->open( $sFile ); | |
} | |
} | |
#2. $sFile is a IO::Handle object | |
elsif ( UNIVERSAL::isa( $sFile, 'IO::Handle' ) ) { | |
$oIo = $sFile; | |
binmode( $oIo ); | |
} | |
#3. $sFile is a simple filename string | |
elsif ( !ref( $sFile ) ) { | |
$oIo = IO::File->new; | |
$oIo->open( "<$sFile" ) || return undef; | |
binmode( $oIo ); | |
} | |
my $sWk; | |
my $sBuff = ''; | |
while ( $oIo->read( $sWk, 4096 ) ) { #4_096 has no special meanings | |
$sBuff .= $sWk; | |
} | |
$oIo->close(); | |
#Not Excel file (simple method) | |
return ( undef, undef ) if ( substr( $sBuff, 0, 1 ) ne "\x09" ); | |
return ( $sBuff, length( $sBuff ) ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBOF (for Spreadsheet::ParseExcel) Developers' Kit : P303 | |
#------------------------------------------------------------------------------ | |
sub _subBOF { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iVer, $iDt ) = unpack( "v2", $sWk ); | |
#Workbook Global | |
if ( $iDt == 0x0005 ) { | |
$oBook->{Version} = unpack( "v", $sWk ); | |
$oBook->{BIFFVersion} = | |
( $oBook->{Version} == verExcel95 ) ? verBIFF5 : verBIFF8; | |
$oBook->{_CurSheet} = undef; | |
$oBook->{_CurSheet_} = -1; | |
} | |
#Worksheet or Dialogsheet | |
elsif ( $iDt != 0x0020 ) { #if($iDt == 0x0010) | |
if ( defined $oBook->{_CurSheet_} ) { | |
$oBook->{_CurSheet} = $oBook->{_CurSheet_} + 1; | |
$oBook->{_CurSheet_}++; | |
( | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetVersion}, | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetType}, | |
) | |
= unpack( "v2", $sWk ) | |
if ( length( $sWk ) > 4 ); | |
} | |
else { | |
$oBook->{BIFFVersion} = int( $bOp / 0x100 ); | |
if ( ( $oBook->{BIFFVersion} == verBIFF2 ) | |
|| ( $oBook->{BIFFVersion} == verBIFF3 ) | |
|| ( $oBook->{BIFFVersion} == verBIFF4 ) ) | |
{ | |
$oBook->{Version} = $oBook->{BIFFVersion}; | |
$oBook->{_CurSheet} = 0; | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = | |
Spreadsheet::ParseExcel::Worksheet->new( | |
_Name => '', | |
Name => '', | |
_Book => $oBook, | |
_SheetNo => $oBook->{SheetCount}, | |
); | |
$oBook->{SheetCount}++; | |
} | |
} | |
} | |
else { | |
# Set flag to ignore all chart records until we reach another BOF. | |
$oBook->{_skip_chart} = 1; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subBlank (for Spreadsheet::ParseExcel) DK:P303 | |
#------------------------------------------------------------------------------ | |
sub _subBlank { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'BLANK', | |
Val => '', | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subInteger (for Spreadsheet::ParseExcel) Not in DK | |
#------------------------------------------------------------------------------ | |
sub _subInteger { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF, $sTxt, $sDum ); | |
( $iR, $iC, $iF, $sDum, $sTxt ) = unpack( "v3cv", $sWk ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'INTEGER', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subNumber (for Spreadsheet::ParseExcel) : DK: P354 | |
#------------------------------------------------------------------------------ | |
sub _subNumber { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my $dVal = _convDval( substr( $sWk, 6, 8 ) ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Number', | |
Val => $dVal, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 1, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _convDval (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _convDval { | |
my ( $sWk ) = @_; | |
return | |
unpack( "d", | |
( $BIGENDIAN ) ? pack( "c8", reverse( unpack( "c8", $sWk ) ) ) : $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subRString (for Spreadsheet::ParseExcel) DK:P405 | |
#------------------------------------------------------------------------------ | |
sub _subRString { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF, $iL, $sTxt ); | |
( $iR, $iC, $iF, $iL ) = unpack( "v4", $sWk ); | |
$sTxt = substr( $sWk, 8, $iL ); | |
#Has STRUN | |
if ( length( $sWk ) > ( 8 + $iL ) ) { | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'RString', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => '_native_', #undef, | |
Book => $oBook, | |
Rich => substr( $sWk, ( 8 + $iL ) + 1 ), | |
); | |
} | |
else { | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'RString', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => '_native_', | |
Book => $oBook, | |
); | |
} | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBoolErr (for Spreadsheet::ParseExcel) DK:P306 | |
#------------------------------------------------------------------------------ | |
sub _subBoolErr { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my ( $iVal, $iFlg ) = unpack( "cc", substr( $sWk, 6, 2 ) ); | |
my $sTxt = DecodeBoolErr( $iVal, $iFlg ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'BoolError', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
############################################################################### | |
# | |
# _subRK() | |
# | |
# Decode the RK BIFF record. | |
# | |
sub _subRK { | |
my ( $workbook, $biff_number, $length, $data ) = @_; | |
my ( $row, $col, $format_index, $rk_number ) = unpack( 'vvvV', $data ); | |
my $number = _decode_rk_number( $rk_number ); | |
_NewCell( | |
$workbook, $row, $col, | |
Kind => 'RK', | |
Val => $number, | |
FormatNo => $format_index, | |
Format => $workbook->{Format}->[$format_index], | |
Numeric => 1, | |
Code => undef, | |
Book => $workbook, | |
); | |
# Store the max and min row/col values. | |
_SetDimension( $workbook, $row, $col, $col ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subArray (for Spreadsheet::ParseExcel) DK:P297 | |
#------------------------------------------------------------------------------ | |
sub _subArray { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iBR, $iER, $iBC, $iEC ) = unpack( "v2c2", $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subFormula (for Spreadsheet::ParseExcel) DK:P336 | |
#------------------------------------------------------------------------------ | |
sub _subFormula { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my ( $iFlg ) = unpack( "v", substr( $sWk, 12, 2 ) ); | |
if ( $iFlg == 0xFFFF ) { | |
my ( $iKind ) = unpack( "c", substr( $sWk, 6, 1 ) ); | |
my ( $iVal ) = unpack( "c", substr( $sWk, 8, 1 ) ); | |
if ( ( $iKind == 1 ) or ( $iKind == 2 ) ) { | |
my $sTxt = | |
( $iKind == 1 ) | |
? DecodeBoolErr( $iVal, 0 ) | |
: DecodeBoolErr( $iVal, 1 ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Formula Bool', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
} | |
else { # Result (Reserve Only) | |
$oBook->{_PrevPos} = [ $iR, $iC, $iF ]; | |
} | |
} | |
else { | |
my $dVal = _convDval( substr( $sWk, 6, 8 ) ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Formula Number', | |
Val => $dVal, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 1, | |
Code => undef, | |
Book => $oBook, | |
); | |
} | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subString (for Spreadsheet::ParseExcel) DK:P414 | |
#------------------------------------------------------------------------------ | |
sub _subString { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
#Position (not enough for ARRAY) | |
my $iPos = $oBook->{_PrevPos}; | |
return undef unless ( $iPos ); | |
$oBook->{_PrevPos} = undef; | |
my ( $iR, $iC, $iF ) = @$iPos; | |
my ( $iLen, $sTxt, $sCode ); | |
if ( $oBook->{BIFFVersion} == verBIFF8 ) { | |
my ( $raBuff, $iLen ) = _convBIFF8String( $oBook, $sWk, 1 ); | |
$sTxt = $raBuff->[0]; | |
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef; | |
} | |
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) { | |
$sCode = '_native_'; | |
$iLen = unpack( "v", $sWk ); | |
$sTxt = substr( $sWk, 2, $iLen ); | |
} | |
else { | |
$sCode = '_native_'; | |
$iLen = unpack( "c", $sWk ); | |
$sTxt = substr( $sWk, 1, $iLen ); | |
} | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'String', | |
Val => $sTxt, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => $sCode, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subLabel (for Spreadsheet::ParseExcel) DK:P344 | |
#------------------------------------------------------------------------------ | |
sub _subLabel { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk ); | |
my ( $sLbl, $sCode ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
my ( $raBuff, $iLen, $iStPos, $iLenS ) = | |
_convBIFF8String( $oBook, substr( $sWk, 6 ), 1 ); | |
$sLbl = $raBuff->[0]; | |
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef; | |
} | |
#Before BIFF8 | |
else { | |
$sLbl = substr( $sWk, 8 ); | |
$sCode = '_native_'; | |
} | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'Label', | |
Val => $sLbl, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => $sCode, | |
Book => $oBook, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
############################################################################### | |
# | |
# _subMulRK() | |
# | |
# Decode the Multiple RK BIFF record. | |
# | |
sub _subMulRK { | |
my ( $workbook, $biff_number, $length, $data ) = @_; | |
# JMN: I don't know why this is here. | |
return if $workbook->{SheetCount} <= 0; | |
my ( $row, $first_col ) = unpack( "v2", $data ); | |
my $last_col = unpack( "v", substr( $data, length( $data ) - 2, 2 ) ); | |
# Iterate over the RK array and decode the data. | |
my $pos = 4; | |
for my $col ( $first_col .. $last_col ) { | |
my $data = substr( $data, $pos, 6 ); | |
my ( $format_index, $rk_number ) = unpack 'vV', $data; | |
my $number = _decode_rk_number( $rk_number ); | |
_NewCell( | |
$workbook, $row, $col, | |
Kind => 'MulRK', | |
Val => $number, | |
FormatNo => $format_index, | |
Format => $workbook->{Format}->[$format_index], | |
Numeric => 1, | |
Code => undef, | |
Book => $workbook, | |
); | |
$pos += 6; | |
} | |
# Store the max and min row/col values. | |
_SetDimension( $workbook, $row, $first_col, $last_col ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subMulBlank (for Spreadsheet::ParseExcel) DK:P349 | |
#------------------------------------------------------------------------------ | |
sub _subMulBlank { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iSc ) = unpack( "v2", $sWk ); | |
my $iEc = unpack( "v", substr( $sWk, length( $sWk ) - 2, 2 ) ); | |
my $iPos = 4; | |
for ( my $iC = $iSc ; $iC <= $iEc ; $iC++ ) { | |
my $iF = unpack( 'v', substr( $sWk, $iPos, 2 ) ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'MulBlank', | |
Val => '', | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => undef, | |
Book => $oBook, | |
); | |
$iPos += 2; | |
} | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iSc, $iEc ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subLabelSST (for Spreadsheet::ParseExcel) DK: P345 | |
#------------------------------------------------------------------------------ | |
sub _subLabelSST { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iR, $iC, $iF, $iIdx ) = unpack( 'v3V', $sWk ); | |
_NewCell( | |
$oBook, $iR, $iC, | |
Kind => 'PackedIdx', | |
Val => $oBook->{PkgStr}[$iIdx]->{Text}, | |
FormatNo => $iF, | |
Format => $oBook->{Format}[$iF], | |
Numeric => 0, | |
Code => ( $oBook->{PkgStr}[$iIdx]->{Unicode} ) ? 'ucs2' : undef, | |
Book => $oBook, | |
Rich => $oBook->{PkgStr}[$iIdx]->{Rich}, | |
); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iC, $iC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subFlg1904 (for Spreadsheet::ParseExcel) DK:P296 | |
#------------------------------------------------------------------------------ | |
sub _subFlg1904 { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
$oBook->{Flg1904} = unpack( "v", $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subRow (for Spreadsheet::ParseExcel) DK:P403 | |
#------------------------------------------------------------------------------ | |
sub _subRow { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#0. Get Worksheet info (MaxRow, MaxCol, MinRow, MinCol) | |
my ( $iR, $iSc, $iEc, $iHght, $undef1, $undef2, $iGr, $iXf ) = | |
unpack( "v8", $sWk ); | |
$iEc--; | |
if ( $iGr & 0x20 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHidden}[$iR] = 1; | |
} | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHeight}[$iR] = $iHght / 20; | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
_SetDimension( $oBook, $iR, $iSc, $iEc ); | |
} | |
#------------------------------------------------------------------------------ | |
# _SetDimension (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _SetDimension { | |
my ( $oBook, $iR, $iSc, $iEc ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#2.MaxRow, MaxCol, MinRow, MinCol | |
#2.1 MinRow | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} = $iR | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} <= $iR ); | |
#2.2 MaxRow | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} = $iR | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} > $iR ); | |
#2.3 MinCol | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} = $iSc | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} <= $iSc ); | |
#2.4 MaxCol | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} = $iEc | |
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} ) | |
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} > $iEc ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subDefaultRowHeight (for Spreadsheet::ParseExcel) DK: P318 | |
#------------------------------------------------------------------------------ | |
sub _subDefaultRowHeight { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#1. RowHeight | |
my ( $iDum, $iHght ) = unpack( "v2", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{DefRowHeight} = $iHght / 20; | |
} | |
#------------------------------------------------------------------------------ | |
# _subStandardWidth(for Spreadsheet::ParseExcel) DK:P413 | |
#------------------------------------------------------------------------------ | |
sub _subStandardWidth { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my $iW = unpack( "v", $sWk ); | |
$oBook->{StandardWidth} = _convert_col_width( $oBook, $iW ); | |
} | |
############################################################################### | |
# | |
# _subDefColWidth() | |
# | |
# Read the DEFCOLWIDTH Biff record. This gives the width in terms of chars | |
# and is different from the width in the COLINFO record. | |
# | |
sub _subDefColWidth { | |
my ( $self, $record, $length, $data ) = @_; | |
my $width = unpack 'v', $data; | |
# Adjustment for default Arial 10 width. | |
$width = 8.43 if $width == 8; | |
$self->{Worksheet}->[ $self->{_CurSheet} ]->{DefColWidth} = $width; | |
} | |
############################################################################### | |
# | |
# _convert_col_width() | |
# | |
# Converts from the internal Excel column width units to user units seen in the | |
# interface. It is first necessary to convert the internal width to pixels and | |
# then to user units. The conversion is specific to a default font of Arial 10. | |
# TODO, the conversion should be extended to other fonts and sizes. | |
# | |
sub _convert_col_width { | |
my $self = shift; | |
my $excel_width = shift; | |
# Convert from Excel units to pixels (rounded up). | |
my $pixels = int( 0.5 + $excel_width * 7 / 256 ); | |
# Convert from pixels to user units. | |
# The conversion is different for columns <= 1 user unit (12 pixels). | |
my $user_width; | |
if ( $pixels <= 12 ) { | |
$user_width = $pixels / 12; | |
} | |
else { | |
$user_width = ( $pixels - 5 ) / 7; | |
} | |
# Round up to 2 decimal places. | |
$user_width = int( $user_width * 100 + 0.5 ) / 100; | |
return $user_width; | |
} | |
#------------------------------------------------------------------------------ | |
# _subColInfo (for Spreadsheet::ParseExcel) DK:P309 | |
#------------------------------------------------------------------------------ | |
sub _subColInfo { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless defined $oBook->{_CurSheet}; | |
my ( $iSc, $iEc, $iW, $iXF, $iGr ) = unpack( "v5", $sWk ); | |
for ( my $i = $iSc ; $i <= $iEc ; $i++ ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColWidth}[$i] = | |
_convert_col_width( $oBook, $iW ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColFmtNo}[$i] = $iXF; | |
if ( $iGr & 0x01 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColHidden}[$i] = 1; | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subWindow1 Window information P 273 | |
#------------------------------------------------------------------------------ | |
sub _subWindow1 { | |
my ( $workbook, $op, $len, $wk ) = @_; | |
return if ( $workbook->{BIFFVersion} <= verBIFF4() ); | |
my ( | |
$hpos, $vpos, $width, | |
$height, $options, $active, | |
$firsttab, $numselected, $tabbarwidth | |
) = unpack( "v9", $wk ); | |
$workbook->{ActiveSheet} = $active; | |
} | |
#------------------------------------------------------------------------------ | |
# _subSheetLayout OpenOffice 5.96 (P207) | |
#------------------------------------------------------------------------------ | |
sub _subSheetLayout { | |
my ( $workbook, $op, $len, $wk ) = @_; | |
my @unused; | |
( | |
my $rc, | |
@unused[ 1 .. 10 ], | |
@unused[ 11 .. 14 ], | |
my $color, @unused[ 15, 16 ] | |
) = unpack( "vC10C4vC2", $wk ); | |
return unless ( $rc == 0x0862 ); | |
$workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{TabColor} = $color; | |
} | |
#------------------------------------------------------------------------------ | |
# _subHyperlink OpenOffice 5.96 (P182) | |
# | |
# Also see: http://msdn.microsoft.com/en-us/library/gg615407(v=office.14).aspx | |
#------------------------------------------------------------------------------ | |
# Helper: Extract a GID, returns as text string | |
sub _getguid { | |
my( $wk ) = @_; | |
my( $text, $guidl, $guids1, $guids2, @guidb ); | |
( $guidl, $guids1, $guids2, @guidb[0..7] ) = unpack( 'Vv2C8', $wk ); | |
$text = sprintf( '%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X', $guidl, $guids1, $guids2, @guidb); | |
return $text; | |
} | |
# Helper: Extract a counted (16-bit) unicode string, returns string, | |
# updates $offset | |
# $zterm == 1 if string is null-terminated. | |
# $bc if length is in bytes (not chars) | |
sub _getustr { | |
my( $wk, $offset, $zterm, $bc ) = @_; | |
my $len = unpack( 'V', substr( $wk, $offset ) ); | |
$offset += 4; | |
if( $bc ) { | |
$len /= 2; | |
} | |
$len -= $zterm; | |
my $text = join( '', map { chr $_ } unpack( "v$len", substr( $wk, $offset ) ) ); | |
$text =~ s/\0.*\z// if( $zterm ); | |
$_[1] = ( $offset += ($len + $zterm) *2 ); | |
return $text; | |
} | |
# HYPERLINK record | |
sub _subHyperlink { | |
my ( $workbook, $op, $len, $wk ) = @_; | |
# REF | |
my( $srow, $erow, $scol, $ecol ) = unpack( 'v4', $wk ); | |
my $guid = _getguid( substr( $wk, 8 ) ); | |
return unless( $guid eq '79EAC9D0-BAF9-11CE-8C82-00AA004BA90B' ); | |
my( $stmvers, $flags ) = unpack( 'VV', substr( $wk, 24 ) ); | |
return if( $flags & 0x60 || $stmvers != 2 ); | |
my $offset = 32; | |
my( $desc,$frame, $link, $mark ); | |
if( ($flags & 0x14) == 0x14 ) { | |
$desc = _getustr( $wk, $offset, 1, 0 ); | |
} | |
if( $flags & 0x80 ) { | |
$frame = _getustr( $wk, $offset, 1, 0 ); | |
} | |
$link = ''; | |
if( $flags & 0x100 ) { | |
# UNC path | |
$link = 'file:///' . _getustr( $wk, $offset, 1, 0 ); | |
} elsif( $flags & 0x1 ) { | |
# Has link (URI) | |
$guid = _getguid( substr( $wk, $offset ) ); | |
$offset += 16; | |
if( $guid eq '79EAC9E0-BAF9-11CE-8C82-00AA004BA90B' ) { | |
# URI | |
$link = _getustr( $wk, $offset, 1, 1 ); | |
} elsif( $guid eq '00000303-0000-0000-C000-000000000046' ) { | |
# Local file | |
$link = 'file:///'; | |
# !($flags & 2) = 'relative path' | |
if( !($flags & 0x2) ) { | |
my $file = $workbook->{File}; | |
if( defined $file && length $file ) { | |
$link .= (fileparse($file))[1]; | |
} | |
else { | |
$link .= '%REL%' | |
} | |
} | |
my $dirn = unpack( 'v', substr( $wk, $offset ) ); | |
$offset += 2; | |
$link .= '..\\' x $dirn; | |
my $namelen = unpack( 'V', substr( $wk, $offset ) ); | |
$offset += 4; | |
my $name = unpack( 'Z*', substr( $wk, $offset ) ); | |
$offset += $namelen; | |
$offset += 24; | |
my $size = unpack( 'V', substr( $wk, $offset ) ); | |
$offset += 4; | |
if( $size ) { | |
my $xlen = unpack( 'V', substr( $wk, $offset ) ) / 2; | |
$name = join( '', map { chr $_} unpack( "v$xlen", substr( $wk, $offset+4+2) ) ); | |
$offset += $size; | |
} | |
$link .= $name; | |
} else { | |
return; | |
} | |
} | |
# Text mark (Fragment identifier) | |
if( $flags & 0x8 ) { | |
# Cellrefs contain reserved characters, so url-encode | |
my $fragment = _getustr( $wk, $offset, 1 ); | |
$fragment =~ s/([^\w.~-])/sprintf( '%%%02X', ord( $1 ) )/gems; | |
$link .= '#' . $fragment; | |
} | |
# Update loop at end of parse() if this changes | |
push @{ $workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{HyperLinks} }, [ | |
$desc, $link, $frame, $srow, $erow, $scol, $ecol ]; | |
} | |
#------------------------------------------------------------------------------ | |
# _subSST (for Spreadsheet::ParseExcel) DK:P413 | |
#------------------------------------------------------------------------------ | |
sub _subSST { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
_subStrWk( $oBook, substr( $sWk, 8 ) ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subContinue (for Spreadsheet::ParseExcel) DK:P311 | |
#------------------------------------------------------------------------------ | |
sub _subContinue { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
#if(defined $self->{FuncTbl}->{$bOp}) { | |
# $self->{FuncTbl}->{$PREFUNC}->($oBook, $bOp, $bLen, $sWk); | |
#} | |
_subStrWk( $oBook, $sWk, 1 ) if ( $PREFUNC == 0xFC ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subWriteAccess (for Spreadsheet::ParseExcel) DK:P451 | |
#------------------------------------------------------------------------------ | |
sub _subWriteAccess { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return if ( defined $oBook->{_Author} ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
$oBook->{Author} = _convBIFF8String( $oBook, $sWk ); | |
} | |
#Before BIFF8 | |
else { | |
my ( $iLen ) = unpack( "c", $sWk ); | |
$oBook->{Author} = | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' ); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _convBIFF8String (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _convBIFF8String { | |
my ( $oBook, $sWk, $iCnvFlg ) = @_; | |
my ( $iLen, $iFlg ) = unpack( "vc", $sWk ); | |
my ( $iHigh, $iExt, $iRich ) = ( $iFlg & 0x01, $iFlg & 0x04, $iFlg & 0x08 ); | |
my ( $iStPos, $iExtCnt, $iRichCnt, $sStr ); | |
#2. Rich and Ext | |
if ( $iRich && $iExt ) { | |
$iStPos = 9; | |
( $iRichCnt, $iExtCnt ) = unpack( 'vV', substr( $sWk, 3, 6 ) ); | |
} | |
elsif ( $iRich ) { #Only Rich | |
$iStPos = 5; | |
$iRichCnt = unpack( 'v', substr( $sWk, 3, 2 ) ); | |
$iExtCnt = 0; | |
} | |
elsif ( $iExt ) { #Only Ext | |
$iStPos = 7; | |
$iRichCnt = 0; | |
$iExtCnt = unpack( 'V', substr( $sWk, 3, 4 ) ); | |
} | |
else { #Nothing Special | |
$iStPos = 3; | |
$iExtCnt = 0; | |
$iRichCnt = 0; | |
} | |
#3.Get String | |
if ( $iHigh ) { #Compressed | |
$iLen *= 2; | |
$sStr = substr( $sWk, $iStPos, $iLen ); | |
_SwapForUnicode( \$sStr ); | |
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, 'ucs2' ) | |
unless ( $iCnvFlg ); | |
} | |
else { #Not Compressed | |
$sStr = substr( $sWk, $iStPos, $iLen ); | |
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, undef ) unless ( $iCnvFlg ); | |
} | |
#4. return | |
if ( wantarray ) { | |
#4.1 Get Rich and Ext | |
if ( length( $sWk ) < $iStPos + $iLen + $iRichCnt * 4 + $iExtCnt ) { | |
return ( | |
[ undef, $iHigh, undef, undef ], | |
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt, | |
$iStPos, $iLen | |
); | |
} | |
else { | |
return ( | |
[ | |
$sStr, | |
$iHigh, | |
substr( $sWk, $iStPos + $iLen, $iRichCnt * 4 ), | |
substr( $sWk, $iStPos + $iLen + $iRichCnt * 4, $iExtCnt ) | |
], | |
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt, | |
$iStPos, $iLen | |
); | |
} | |
} | |
else { | |
return $sStr; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subXF (for Spreadsheet::ParseExcel) DK:P453 | |
#------------------------------------------------------------------------------ | |
sub _subXF { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iFnt, $iIdx ); | |
my ( | |
$iLock, $iHidden, $iStyle, $i123, $iAlH, $iWrap, | |
$iAlV, $iJustL, $iRotate, $iInd, $iShrink, $iMerge, | |
$iReadDir, $iBdrD, $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB, | |
$iBdrSD, $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB, $iBdrCD, | |
$iFillP, $iFillCF, $iFillCB | |
); | |
if ( $oBook->{BIFFVersion} == verBIFF4 ) { | |
# Minimal support for Excel 4. We just get the font and format indices | |
# so that the cell data value can be formatted. | |
( $iFnt, $iIdx, ) = unpack( "CC", $sWk ); | |
} | |
elsif ( $oBook->{BIFFVersion} == verBIFF8 ) { | |
my ( $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn ); | |
( $iFnt, $iIdx, $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn ) | |
= unpack( "v7Vv", $sWk ); | |
$iLock = ( $iGen & 0x01 ) ? 1 : 0; | |
$iHidden = ( $iGen & 0x02 ) ? 1 : 0; | |
$iStyle = ( $iGen & 0x04 ) ? 1 : 0; | |
$i123 = ( $iGen & 0x08 ) ? 1 : 0; | |
$iAlH = ( $iAlign & 0x07 ); | |
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0; | |
$iAlV = ( $iAlign & 0x70 ) / 0x10; | |
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0; | |
$iRotate = ( ( $iAlign & 0xFF00 ) / 0x100 ) & 0x00FF; | |
$iRotate = 90 if ( $iRotate == 255 ); | |
$iRotate = 90 - $iRotate if ( $iRotate > 90 ); | |
$iInd = ( $iGen2 & 0x0F ); | |
$iShrink = ( $iGen2 & 0x10 ) ? 1 : 0; | |
$iMerge = ( $iGen2 & 0x20 ) ? 1 : 0; | |
$iReadDir = ( ( $iGen2 & 0xC0 ) / 0x40 ) & 0x03; | |
$iBdrSL = $iBdr1 & 0x0F; | |
$iBdrSR = ( ( $iBdr1 & 0xF0 ) / 0x10 ) & 0x0F; | |
$iBdrST = ( ( $iBdr1 & 0xF00 ) / 0x100 ) & 0x0F; | |
$iBdrSB = ( ( $iBdr1 & 0xF000 ) / 0x1000 ) & 0x0F; | |
$iBdrCL = ( ( $iBdr2 & 0x7F ) ) & 0x7F; | |
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F; | |
$iBdrD = ( ( $iBdr2 & 0xC000 ) / 0x4000 ) & 0x3; | |
$iBdrCT = ( ( $iBdr3 & 0x7F ) ) & 0x7F; | |
$iBdrCB = ( ( $iBdr3 & 0x3F80 ) / 0x80 ) & 0x7F; | |
$iBdrCD = ( ( $iBdr3 & 0x1FC000 ) / 0x4000 ) & 0x7F; | |
$iBdrSD = ( ( $iBdr3 & 0x1E00000 ) / 0x200000 ) & 0xF; | |
$iFillP = ( ( $iBdr3 & 0xFC000000 ) / 0x4000000 ) & 0x3F; | |
$iFillCF = ( $iPtn & 0x7F ); | |
$iFillCB = ( ( $iPtn & 0x3F80 ) / 0x80 ) & 0x7F; | |
} | |
else { | |
my ( $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 ); | |
( $iFnt, $iIdx, $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 ) = | |
unpack( "v8", $sWk ); | |
$iLock = ( $iGen & 0x01 ) ? 1 : 0; | |
$iHidden = ( $iGen & 0x02 ) ? 1 : 0; | |
$iStyle = ( $iGen & 0x04 ) ? 1 : 0; | |
$i123 = ( $iGen & 0x08 ) ? 1 : 0; | |
$iAlH = ( $iAlign & 0x07 ); | |
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0; | |
$iAlV = ( $iAlign & 0x70 ) / 0x10; | |
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0; | |
$iRotate = ( ( $iAlign & 0x300 ) / 0x100 ) & 0x3; | |
$iFillCF = ( $iPtn & 0x7F ); | |
$iFillCB = ( ( $iPtn & 0x1F80 ) / 0x80 ) & 0x7F; | |
$iFillP = ( $iPtn2 & 0x3F ); | |
$iBdrSB = ( ( $iPtn2 & 0x1C0 ) / 0x40 ) & 0x7; | |
$iBdrCB = ( ( $iPtn2 & 0xFE00 ) / 0x200 ) & 0x7F; | |
$iBdrST = ( $iBdr1 & 0x07 ); | |
$iBdrSL = ( ( $iBdr1 & 0x38 ) / 0x8 ) & 0x07; | |
$iBdrSR = ( ( $iBdr1 & 0x1C0 ) / 0x40 ) & 0x07; | |
$iBdrCT = ( ( $iBdr1 & 0xFE00 ) / 0x200 ) & 0x7F; | |
$iBdrCL = ( $iBdr2 & 0x7F ) & 0x7F; | |
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F; | |
} | |
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new( | |
FontNo => $iFnt, | |
Font => $oBook->{Font}[$iFnt], | |
FmtIdx => $iIdx, | |
Lock => $iLock, | |
Hidden => $iHidden, | |
Style => $iStyle, | |
Key123 => $i123, | |
AlignH => $iAlH, | |
Wrap => $iWrap, | |
AlignV => $iAlV, | |
JustLast => $iJustL, | |
Rotate => $iRotate, | |
Indent => $iInd, | |
Shrink => $iShrink, | |
Merge => $iMerge, | |
ReadDir => $iReadDir, | |
BdrStyle => [ $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB ], | |
BdrColor => [ $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB ], | |
BdrDiag => [ $iBdrD, $iBdrSD, $iBdrCD ], | |
Fill => [ $iFillP, $iFillCF, $iFillCB ], | |
); | |
} | |
#------------------------------------------------------------------------------ | |
# _subFormat (for Spreadsheet::ParseExcel) DK: P336 | |
#------------------------------------------------------------------------------ | |
sub _subFormat { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my $sFmt; | |
if ( $oBook->{BIFFVersion} <= verBIFF5 ) { | |
$sFmt = substr( $sWk, 3, unpack( 'c', substr( $sWk, 2, 1 ) ) ); | |
$sFmt = $oBook->{FmtClass}->TextFmt( $sFmt, '_native_' ); | |
} | |
else { | |
$sFmt = _convBIFF8String( $oBook, substr( $sWk, 2 ) ); | |
} | |
my $format_index = unpack( 'v', substr( $sWk, 0, 2 ) ); | |
# Excel 4 and earlier used an index of 0 to indicate that a built-in format | |
# that was stored implicitly. | |
if ( $oBook->{BIFFVersion} <= verBIFF4 && $format_index == 0 ) { | |
$format_index = keys %{ $oBook->{FormatStr} }; | |
} | |
$oBook->{FormatStr}->{$format_index} = $sFmt; | |
} | |
#------------------------------------------------------------------------------ | |
# _subPalette (for Spreadsheet::ParseExcel) DK: P393 | |
#------------------------------------------------------------------------------ | |
sub _subPalette { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
for ( my $i = 0 ; $i < unpack( 'v', $sWk ) ; $i++ ) { | |
# push @aColor, unpack('H6', substr($sWk, $i*4+2)); | |
$oBook->{aColor}[ $i + 8 ] = unpack( 'H6', substr( $sWk, $i * 4 + 2 ) ); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subFont (for Spreadsheet::ParseExcel) DK:P333 | |
#------------------------------------------------------------------------------ | |
sub _subFont { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline, $sFntName ); | |
my ( $bBold, $bItalic, $bUnderline, $bStrikeout ); | |
if ( $oBook->{BIFFVersion} == verBIFF8 ) { | |
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) = | |
unpack( "v5c", $sWk ); | |
my ( $iSize, $iHigh ) = unpack( 'cc', substr( $sWk, 14, 2 ) ); | |
if ( $iHigh ) { | |
$sFntName = substr( $sWk, 16, $iSize * 2 ); | |
_SwapForUnicode( \$sFntName ); | |
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, 'ucs2' ); | |
} | |
else { | |
$sFntName = substr( $sWk, 16, $iSize ); | |
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, '_native_' ); | |
} | |
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0; | |
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0; | |
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0; | |
$bUnderline = ( $iUnderline ) ? 1 : 0; | |
} | |
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) { | |
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) = | |
unpack( "v5c", $sWk ); | |
$sFntName = | |
$oBook->{FmtClass} | |
->TextFmt( substr( $sWk, 15, unpack( "c", substr( $sWk, 14, 1 ) ) ), | |
'_native_' ); | |
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0; | |
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0; | |
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0; | |
$bUnderline = ( $iUnderline ) ? 1 : 0; | |
} | |
else { | |
( $iHeight, $iAttr ) = unpack( "v2", $sWk ); | |
$iCIdx = undef; | |
$iSuper = 0; | |
$bBold = ( $iAttr & 0x01 ) ? 1 : 0; | |
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0; | |
$bUnderline = ( $iAttr & 0x04 ) ? 1 : 0; | |
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0; | |
$sFntName = substr( $sWk, 5, unpack( "c", substr( $sWk, 4, 1 ) ) ); | |
} | |
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new( | |
Height => $iHeight / 20.0, | |
Attr => $iAttr, | |
Color => $iCIdx, | |
Super => $iSuper, | |
UnderlineStyle => $iUnderline, | |
Name => $sFntName, | |
Bold => $bBold, | |
Italic => $bItalic, | |
Underline => $bUnderline, | |
Strikeout => $bStrikeout, | |
); | |
#Skip Font[4] | |
push @{ $oBook->{Font} }, {} if ( scalar( @{ $oBook->{Font} } ) == 4 ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBoundSheet (for Spreadsheet::ParseExcel): DK: P307 | |
#------------------------------------------------------------------------------ | |
sub _subBoundSheet { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( $iPos, $iGr, $iKind ) = unpack( "Lc2", $sWk ); | |
$iKind &= 0x0F; | |
return if ( ( $iKind != 0x00 ) && ( $iKind != 0x01 ) ); | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
my ( $iSize, $iUni ) = unpack( "cc", substr( $sWk, 6, 2 ) ); | |
my $sWsName = substr( $sWk, 8 ); | |
if ( $iUni & 0x01 ) { | |
_SwapForUnicode( \$sWsName ); | |
$sWsName = $oBook->{FmtClass}->TextFmt( $sWsName, 'ucs2' ); | |
} | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = | |
Spreadsheet::ParseExcel::Worksheet->new( | |
Name => $sWsName, | |
Kind => $iKind, | |
_Pos => $iPos, | |
_Book => $oBook, | |
_SheetNo => $oBook->{SheetCount}, | |
SheetHidden => $iGr & 0x03 | |
); | |
} | |
else { | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = | |
Spreadsheet::ParseExcel::Worksheet->new( | |
Name => | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 7 ), '_native_' ), | |
Kind => $iKind, | |
_Pos => $iPos, | |
_Book => $oBook, | |
_SheetNo => $oBook->{SheetCount}, | |
SheetHidden => $iGr & 0x03 | |
); | |
} | |
$oBook->{SheetCount}++; | |
} | |
#------------------------------------------------------------------------------ | |
# _subHeader (for Spreadsheet::ParseExcel) DK: P340 | |
#------------------------------------------------------------------------------ | |
sub _subHeader { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $sW; | |
if ( !defined $sWk ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = undef; | |
return; | |
} | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
$sW = _convBIFF8String( $oBook, $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = | |
( $sW eq "\x00" ) ? undef : $sW; | |
} | |
#Before BIFF8 | |
else { | |
my ( $iLen ) = unpack( "c", $sWk ); | |
$sW = | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = | |
( $sW eq "\x00\x00\x00" ) ? undef : $sW; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subFooter (for Spreadsheet::ParseExcel) DK: P335 | |
#------------------------------------------------------------------------------ | |
sub _subFooter { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $sW; | |
if ( !defined $sWk ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = undef; | |
return; | |
} | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
$sW = _convBIFF8String( $oBook, $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = | |
( $sW eq "\x00" ) ? undef : $sW; | |
} | |
#Before BIFF8 | |
else { | |
my ( $iLen ) = unpack( "c", $sWk ); | |
$sW = | |
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = | |
( $sW eq "\x00\x00\x00" ) ? undef : $sW; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subHPageBreak (for Spreadsheet::ParseExcel) DK: P341 | |
#------------------------------------------------------------------------------ | |
sub _subHPageBreak { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my @aBreak; | |
my $iCnt = unpack( "v", $sWk ); | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iRow, $iColB, $iColE ) = | |
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) ); | |
# push @aBreak, [$iRow, $iColB, $iColE]; | |
push @aBreak, $iRow; | |
} | |
} | |
#Before BIFF8 | |
else { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iRow ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) ); | |
push @aBreak, $iRow; | |
# push @aBreak, [$iRow, 0, 255]; | |
} | |
} | |
@aBreak = sort { $a <=> $b } @aBreak; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HPageBreak} = \@aBreak; | |
} | |
#------------------------------------------------------------------------------ | |
# _subVPageBreak (for Spreadsheet::ParseExcel) DK: P447 | |
#------------------------------------------------------------------------------ | |
sub _subVPageBreak { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my @aBreak; | |
my $iCnt = unpack( "v", $sWk ); | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iCol, $iRowB, $iRowE ) = | |
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) ); | |
push @aBreak, $iCol; | |
# push @aBreak, [$iCol, $iRowB, $iRowE]; | |
} | |
} | |
#Before BIFF8 | |
else { | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iCol ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) ); | |
push @aBreak, $iCol; | |
# push @aBreak, [$iCol, 0, 65535]; | |
} | |
} | |
@aBreak = sort { $a <=> $b } @aBreak; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VPageBreak} = \@aBreak; | |
} | |
#------------------------------------------------------------------------------ | |
# _subMargin (for Spreadsheet::ParseExcel) DK: P306, 345, 400, 440 | |
#------------------------------------------------------------------------------ | |
sub _subMargin { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
# The "Mergin" options are a workaround for a backward compatible typo. | |
my $dWk = _convDval( substr( $sWk, 0, 8 ) ); | |
if ( $bOp == 0x26 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMargin} = $dWk; | |
} | |
elsif ( $bOp == 0x27 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMargin} = $dWk; | |
} | |
elsif ( $bOp == 0x28 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMargin} = $dWk; | |
} | |
elsif ( $bOp == 0x29 ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMergin} = $dWk; | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMargin} = $dWk; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _subHcenter (for Spreadsheet::ParseExcel) DK: P340 | |
#------------------------------------------------------------------------------ | |
sub _subHcenter { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HCenter} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subVcenter (for Spreadsheet::ParseExcel) DK: P447 | |
#------------------------------------------------------------------------------ | |
sub _subVcenter { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VCenter} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subPrintGridlines (for Spreadsheet::ParseExcel) DK: P397 | |
#------------------------------------------------------------------------------ | |
sub _subPrintGridlines { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintGrid} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subPrintHeaders (for Spreadsheet::ParseExcel) DK: P397 | |
#------------------------------------------------------------------------------ | |
sub _subPrintHeaders { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iWk = unpack( "v", $sWk ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintHeaders} = $iWk; | |
} | |
#------------------------------------------------------------------------------ | |
# _subSETUP (for Spreadsheet::ParseExcel) DK: P409 | |
#------------------------------------------------------------------------------ | |
sub _subSETUP { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
# Workaround for some apps and older Excels that don't write a | |
# complete SETUP record. | |
return undef if $bLen != 34; | |
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ]; | |
my $iGrBit; | |
( | |
$oWkS->{PaperSize}, $oWkS->{Scale}, $oWkS->{PageStart}, | |
$oWkS->{FitWidth}, $oWkS->{FitHeight}, $iGrBit, | |
$oWkS->{Res}, $oWkS->{VRes}, | |
) = unpack( 'v8', $sWk ); | |
$oWkS->{HeaderMargin} = _convDval( substr( $sWk, 16, 8 ) ); | |
$oWkS->{FooterMargin} = _convDval( substr( $sWk, 24, 8 ) ); | |
$oWkS->{Copis} = unpack( 'v2', substr( $sWk, 32, 2 ) ); | |
$oWkS->{LeftToRight} = ( ( $iGrBit & 0x01 ) ? 1 : 0 ); | |
$oWkS->{Landscape} = ( ( $iGrBit & 0x02 ) ? 1 : 0 ); | |
$oWkS->{NoPls} = ( ( $iGrBit & 0x04 ) ? 1 : 0 ); | |
$oWkS->{NoColor} = ( ( $iGrBit & 0x08 ) ? 1 : 0 ); | |
$oWkS->{Draft} = ( ( $iGrBit & 0x10 ) ? 1 : 0 ); | |
$oWkS->{Notes} = ( ( $iGrBit & 0x20 ) ? 1 : 0 ); | |
$oWkS->{NoOrient} = ( ( $iGrBit & 0x40 ) ? 1 : 0 ); | |
$oWkS->{UsePage} = ( ( $iGrBit & 0x80 ) ? 1 : 0 ); | |
# The NoPls flag indicates that the values have not been taken from an | |
# actual printer and thus may not be accurate. | |
# Set default scale if NoPls otherwise it may be an invalid value of 0XFF. | |
$oWkS->{Scale} = 100 if $oWkS->{NoPls}; | |
# Workaround for a backward compatible typo. | |
$oWkS->{HeaderMergin} = $oWkS->{HeaderMargin}; | |
$oWkS->{FooterMergin} = $oWkS->{FooterMargin}; | |
} | |
#------------------------------------------------------------------------------ | |
# _subName (for Spreadsheet::ParseExcel) DK: P350 | |
#------------------------------------------------------------------------------ | |
sub _subName { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
my ( | |
$iGrBit, $cKey, $cCh, $iCce, $ixAls, | |
$iTab, $cchCust, $cchDsc, $cchHep, $cchStatus | |
) = unpack( 'vc2v3c4', $sWk ); | |
#Builtin Name + Length == 1 | |
if ( ( $iGrBit & 0x20 ) && ( $cCh == 1 ) ) { | |
#BIFF8 | |
if ( $oBook->{BIFFVersion} >= verBIFF8 ) { | |
my $iName = unpack( 'n', substr( $sWk, 14 ) ); | |
my $iSheet = unpack( 'v', substr( $sWk, 8 ) ) - 1; | |
# Workaround for mal-formed Excel workbooks where Print_Title is | |
# set as Global (i.e. itab = 0). Note, this will have to be | |
# treated differently when we get around to handling global names. | |
return undef if $iSheet == -1; | |
if ( $iName == 6 ) { #PrintArea | |
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) ); | |
$oBook->{PrintArea}[$iSheet] = $raArea; | |
} | |
elsif ( $iName == 7 ) { #Title | |
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) ); | |
my @aTtlR = (); | |
my @aTtlC = (); | |
foreach my $raI ( @$raArea ) { | |
if ( $raI->[3] == 0xFF ) { #Row Title | |
push @aTtlR, [ $raI->[0], $raI->[2] ]; | |
} | |
else { #Col Title | |
push @aTtlC, [ $raI->[1], $raI->[3] ]; | |
} | |
} | |
$oBook->{PrintTitle}[$iSheet] = | |
{ Row => \@aTtlR, Column => \@aTtlC }; | |
} | |
} | |
else { | |
my $iName = unpack( 'c', substr( $sWk, 14 ) ); | |
if ( $iName == 6 ) { #PrintArea | |
my ( $iSheet, $raArea ) = | |
_ParseNameArea95( substr( $sWk, 15 ) ); | |
$oBook->{PrintArea}[$iSheet] = $raArea; | |
} | |
elsif ( $iName == 7 ) { #Title | |
my ( $iSheet, $raArea ) = | |
_ParseNameArea95( substr( $sWk, 15 ) ); | |
my @aTtlR = (); | |
my @aTtlC = (); | |
foreach my $raI ( @$raArea ) { | |
if ( $raI->[3] == 0xFF ) { #Row Title | |
push @aTtlR, [ $raI->[0], $raI->[2] ]; | |
} | |
else { #Col Title | |
push @aTtlC, [ $raI->[1], $raI->[3] ]; | |
} | |
} | |
$oBook->{PrintTitle}[$iSheet] = | |
{ Row => \@aTtlR, Column => \@aTtlC }; | |
} | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# ParseNameArea (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d) | |
#------------------------------------------------------------------------------ | |
sub _ParseNameArea { | |
my ( $sObj ) = @_; | |
my ( $iOp ); | |
my @aRes = (); | |
$iOp = unpack( 'C', $sObj ); | |
my $iSheet; | |
if ( $iOp == 0x3b ) { | |
my ( $iWkS, $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v5', substr( $sObj, 1 ) ); | |
$iSheet = $iWkS; | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
elsif ( $iOp == 0x29 ) { | |
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) ); | |
my $iSt = 0; | |
while ( $iSt < $iLen ) { | |
my ( $iOpW, $iWkS, $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'cv5', substr( $sObj, $iSt + 3, 11 ) ); | |
if ( $iOpW == 0x3b ) { | |
$iSheet = $iWkS; | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
if ( $iSt == 0 ) { | |
$iSt += 11; | |
} | |
else { | |
$iSt += 12; #Skip 1 byte; | |
} | |
} | |
} | |
return ( $iSheet, \@aRes ); | |
} | |
#------------------------------------------------------------------------------ | |
# ParseNameArea95 (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d) | |
#------------------------------------------------------------------------------ | |
sub _ParseNameArea95 { | |
my ( $sObj ) = @_; | |
my ( $iOp ); | |
my @aRes = (); | |
$iOp = unpack( 'C', $sObj ); | |
my $iSheet; | |
if ( $iOp == 0x3b ) { | |
$iSheet = unpack( 'v', substr( $sObj, 11, 2 ) ); | |
my ( $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v2C2', substr( $sObj, 15, 6 ) ); | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
elsif ( $iOp == 0x29 ) { | |
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) ); | |
my $iSt = 0; | |
while ( $iSt < $iLen ) { | |
my $iOpW = unpack( 'c', substr( $sObj, $iSt + 3, 6 ) ); | |
$iSheet = unpack( 'v', substr( $sObj, $iSt + 14, 2 ) ); | |
my ( $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v2C2', substr( $sObj, $iSt + 18, 6 ) ); | |
push @aRes, [ $iRs, $iCs, $iRe, $iCe ] if ( $iOpW == 0x3b ); | |
if ( $iSt == 0 ) { | |
$iSt += 21; | |
} | |
else { | |
$iSt += 22; #Skip 1 byte; | |
} | |
} | |
} | |
return ( $iSheet, \@aRes ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subBOOL (for Spreadsheet::ParseExcel) DK: P452 | |
#------------------------------------------------------------------------------ | |
sub _subWSBOOL { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PageFit} = | |
( ( unpack( 'v', $sWk ) & 0x100 ) ? 1 : 0 ); | |
} | |
#------------------------------------------------------------------------------ | |
# _subMergeArea (for Spreadsheet::ParseExcel) DK: (Not) | |
#------------------------------------------------------------------------------ | |
sub _subMergeArea { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $iCnt = unpack( "v", $sWk ); | |
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ]; | |
$oWkS->{MergedArea} = [] unless ( defined $oWkS->{MergedArea} ); | |
for ( my $i = 0 ; $i < $iCnt ; $i++ ) { | |
my ( $iRs, $iRe, $iCs, $iCe ) = | |
unpack( 'v4', substr( $sWk, $i * 8 + 2, 8 ) ); | |
for ( my $iR = $iRs ; $iR <= $iRe ; $iR++ ) { | |
for ( my $iC = $iCs ; $iC <= $iCe ; $iC++ ) { | |
$oWkS->{Cells}[$iR][$iC]->{Merged} = 1 | |
if ( defined $oWkS->{Cells}[$iR][$iC] ); | |
} | |
} | |
push @{ $oWkS->{MergedArea} }, [ $iRs, $iCs, $iRe, $iCe ]; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# DecodeBoolErr (for Spreadsheet::ParseExcel) DK: P306 | |
#------------------------------------------------------------------------------ | |
sub DecodeBoolErr { | |
my ( $iVal, $iFlg ) = @_; | |
if ( $iFlg ) { # ERROR | |
if ( $iVal == 0x00 ) { | |
return "#NULL!"; | |
} | |
elsif ( $iVal == 0x07 ) { | |
return "#DIV/0!"; | |
} | |
elsif ( $iVal == 0x0F ) { | |
return "#VALUE!"; | |
} | |
elsif ( $iVal == 0x17 ) { | |
return "#REF!"; | |
} | |
elsif ( $iVal == 0x1D ) { | |
return "#NAME?"; | |
} | |
elsif ( $iVal == 0x24 ) { | |
return "#NUM!"; | |
} | |
elsif ( $iVal == 0x2A ) { | |
return "#N/A!"; | |
} | |
else { | |
return "#ERR"; | |
} | |
} | |
else { | |
return ( $iVal ) ? "TRUE" : "FALSE"; | |
} | |
} | |
############################################################################### | |
# | |
# _decode_rk_number() | |
# | |
# Convert an encoded RK number into a real number. The RK encoding is | |
# explained in some detail in the MS docs. It is a way of storing applicable | |
# ints and doubles in 32bits (30 data + 2 info bits) in order to save space. | |
# | |
sub _decode_rk_number { | |
my $rk_number = shift; | |
my $number; | |
# Check the main RK type. | |
if ( $rk_number & 0x02 ) { | |
# RK Type 2 and 4, a packed integer. | |
# Shift off the info bits. | |
$number = $rk_number >> 2; | |
# Convert from unsigned to signed if required. | |
$number -= 0x40000000 if $number & 0x20000000; | |
} | |
else { | |
# RK Type 1 and 3, a truncated IEEE Double. | |
# Pack the RK number into the high 30 bits of an IEEE double. | |
$number = pack "VV", 0x0000, $rk_number & 0xFFFFFFFC; | |
# Reverse the packed IEEE double on big-endian machines. | |
$number = reverse $number if $BIGENDIAN; | |
# Unpack the number. | |
$number = unpack "d", $number; | |
} | |
# RK Types 3 and 4 were multiplied by 100 prior to encoding. | |
$number /= 100 if $rk_number & 0x01; | |
return $number; | |
} | |
############################################################################### | |
# | |
# _subStrWk() | |
# | |
# Extract the workbook strings from the SST (Shared String Table) record and | |
# any following CONTINUE records. | |
# | |
# The workbook strings are initially contained in the SST block but may also | |
# occupy one or more CONTINUE blocks. Reading the CONTINUE blocks is made a | |
# little tricky by the fact that they can contain an additional initial byte | |
# if a string is continued from a previous block. | |
# | |
# Parsing is further complicated by the fact that the continued section of the | |
# string may have a different encoding (ASCII or UTF-8) from the previous | |
# section. Excel does this to save space. | |
# | |
sub _subStrWk { | |
my ( $self, $biff_data, $is_continue ) = @_; | |
if ( $is_continue ) { | |
# We are reading a CONTINUE record. | |
if ( $self->{_buffer} eq '' ) { | |
# A CONTINUE block with no previous SST. | |
$self->{_buffer} .= $biff_data; | |
} | |
elsif ( !defined $self->{_string_continued} ) { | |
# The CONTINUE block starts with a new (non-continued) string. | |
# Strip the Grbit byte and store the string data. | |
$self->{_buffer} .= substr $biff_data, 1; | |
} | |
else { | |
# A CONTINUE block that starts with a continued string. | |
# The first byte (Grbit) of the CONTINUE record indicates if (0) | |
# the continued string section is single bytes or (1) double bytes. | |
my $grbit = ord $biff_data; | |
my ( $str_position, $str_length ) = @{ $self->{_previous_info} }; | |
my $buff_length = length $self->{_buffer}; | |
if ( $buff_length >= ( $str_position + $str_length ) ) { | |
# Not in a string. | |
$self->{_buffer} .= $biff_data; | |
} | |
elsif ( ( $self->{_string_continued} & 0x01 ) == ( $grbit & 0x01 ) ) | |
{ | |
# Same encoding as the previous block of the string. | |
$self->{_buffer} .= substr( $biff_data, 1 ); | |
} | |
else { | |
# Different encoding to the previous block of the string. | |
if ( $grbit & 0x01 ) { | |
# Current block is UTF-16, previous was ASCII. | |
my ( undef, $cch ) = unpack 'vc', $self->{_buffer}; | |
substr( $self->{_buffer}, 2, 1 ) = pack( 'C', $cch | 0x01 ); | |
# Convert the previous ASCII, single character, portion of | |
# the string into a double character UTF-16 string by | |
# inserting zero bytes. | |
for ( | |
my $i = ( $buff_length - $str_position ) ; | |
$i >= 1 ; | |
$i-- | |
) | |
{ | |
substr( $self->{_buffer}, $str_position + $i, 0 ) = | |
"\x00"; | |
} | |
} | |
else { | |
# Current block is ASCII, previous was UTF-16. | |
# Convert the current ASCII, single character, portion of | |
# the string into a double character UTF-16 string by | |
# inserting null bytes. | |
my $change_length = | |
( $str_position + $str_length ) - $buff_length; | |
# Length of the current CONTINUE record data. | |
my $biff_length = length $biff_data; | |
# Restrict the portion to be changed to the current block | |
# if the string extends over more than one block. | |
if ( $change_length > ( $biff_length - 1 ) * 2 ) { | |
$change_length = ( $biff_length - 1 ) * 2; | |
} | |
# Insert the null bytes. | |
for ( my $i = ( $change_length / 2 ) ; $i >= 1 ; $i-- ) { | |
substr( $biff_data, $i + 1, 0 ) = "\x00"; | |
} | |
} | |
# Strip the Grbit byte and store the string data. | |
$self->{_buffer} .= substr $biff_data, 1; | |
} | |
} | |
} | |
else { | |
# Not a CONTINUE block therefore an SST block. | |
$self->{_buffer} .= $biff_data; | |
} | |
# Reset the state variables. | |
$self->{_string_continued} = undef; | |
$self->{_previous_info} = undef; | |
# Extract out any full strings from the current buffer leaving behind a | |
# partial string that is continued into the next block, or an empty | |
# buffer is no string is continued. | |
while ( length $self->{_buffer} >= 4 ) { | |
my ( $str_info, $length, $str_position, $str_length ) = | |
_convBIFF8String( $self, $self->{_buffer}, 1 ); | |
if ( defined $str_info->[0] ) { | |
push @{ $self->{PkgStr} }, | |
{ | |
Text => $str_info->[0], | |
Unicode => $str_info->[1], | |
Rich => $str_info->[2], | |
Ext => $str_info->[3], | |
}; | |
$self->{_buffer} = substr( $self->{_buffer}, $length ); | |
} | |
else { | |
$self->{_string_continued} = $str_info->[1]; | |
$self->{_previous_info} = [ $str_position, $str_length ]; | |
last; | |
} | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _SwapForUnicode (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _SwapForUnicode { | |
my ( $sObj ) = @_; | |
# for(my $i = 0; $i<length($$sObj); $i+=2){ | |
for ( my $i = 0 ; $i < ( int( length( $$sObj ) / 2 ) * 2 ) ; $i += 2 ) { | |
my $sIt = substr( $$sObj, $i, 1 ); | |
substr( $$sObj, $i, 1 ) = substr( $$sObj, $i + 1, 1 ); | |
substr( $$sObj, $i + 1, 1 ) = $sIt; | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# _NewCell (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub _NewCell { | |
my ( $oBook, $iR, $iC, %rhKey ) = @_; | |
my ( $sWk, $iLen ); | |
return undef unless ( defined $oBook->{_CurSheet} ); | |
my $FmtClass = $oBook->{FmtClass}; | |
$rhKey{Type} = | |
$FmtClass->ChkType( $rhKey{Numeric}, $rhKey{Format}{FmtIdx} ); | |
my $FmtStr = $oBook->{FormatStr}{ $rhKey{Format}{FmtIdx} }; | |
# Set "Date" type if required for numbers in a MulRK BIFF block. | |
if ( defined $FmtStr && $rhKey{Type} eq "Numeric" ) { | |
# Match a range of possible date formats. Note: this isn't important | |
# except for reporting. The number will still be converted to a date | |
# by ExcelFmt() even if 'Type' isn't set to 'Date'. | |
if ( $FmtStr =~ m{^[dmy][-\\/dmy]*$}i ) { | |
$rhKey{Type} = "Date"; | |
} | |
} | |
my $oCell = Spreadsheet::ParseExcel::Cell->new( | |
Val => $rhKey{Val}, | |
FormatNo => $rhKey{FormatNo}, | |
Format => $rhKey{Format}, | |
Code => $rhKey{Code}, | |
Type => $rhKey{Type}, | |
); | |
$oCell->{_Kind} = $rhKey{Kind}; | |
$oCell->{_Value} = $FmtClass->ValFmt( $oCell, $oBook ); | |
if ( $rhKey{Rich} ) { | |
my @aRich = (); | |
my $sRich = $rhKey{Rich}; | |
for ( my $iWk = 0 ; $iWk < length( $sRich ) ; $iWk += 4 ) { | |
my ( $iPos, $iFnt ) = unpack( 'v2', substr( $sRich, $iWk ) ); | |
push @aRich, [ $iPos, $oBook->{Font}[$iFnt] ]; | |
} | |
$oCell->{Rich} = \@aRich; | |
} | |
if ( defined $oBook->{CellHandler} ) { | |
if ( defined $oBook->{Object} ) { | |
no strict; | |
ref( $oBook->{CellHandler} ) eq "CODE" | |
? $oBook->{CellHandler}->( | |
$_Object, $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell | |
) | |
: $oBook->{CellHandler}->callback( $_Object, $oBook, $oBook->{_CurSheet}, | |
$iR, $iC, $oCell ); | |
} | |
else { | |
$oBook->{CellHandler}->( $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell ); | |
} | |
} | |
unless ( $oBook->{NotSetCell} ) { | |
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Cells}[$iR][$iC] = $oCell; | |
} | |
return $oCell; | |
} | |
#------------------------------------------------------------------------------ | |
# ColorIdxToRGB (for Spreadsheet::ParseExcel) | |
# | |
# Returns for most recently opened book for compatibility, use | |
# Workbook::color_idx_to_rgb instead | |
# | |
#------------------------------------------------------------------------------ | |
sub ColorIdxToRGB { | |
my ( $sPkg, $iIdx ) = @_; | |
unless( defined $currentbook ) { | |
return ( ( defined $aColor[$iIdx] ) ? $aColor[$iIdx] : $aColor[0] ); | |
} | |
return $currentbook->color_idx_to_rgb( $iIdx ); | |
} | |
############################################################################### | |
# | |
# error(). | |
# | |
# Return an error string for a failed parse(). | |
# | |
sub error { | |
my $self = shift; | |
my $parse_error = $self->{_error_status}; | |
if ( exists $error_strings{$parse_error} ) { | |
return $error_strings{$parse_error}; | |
} | |
else { | |
return 'Unknown parse error'; | |
} | |
} | |
############################################################################### | |
# | |
# error_code(). | |
# | |
# Return an error code for a failed parse(). | |
# | |
sub error_code { | |
my $self = shift; | |
return $self->{_error_status}; | |
} | |
############################################################################### | |
# | |
# Mapping between legacy method names and new names. | |
# | |
{ | |
no warnings; # Ignore warnings about variables used only once. | |
*Parse = *parse; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Spreadsheet::ParseExcel - Read information from an Excel file. | |
=head1 SYNOPSIS | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die $parser->error(), ".\n"; | |
} | |
for my $worksheet ( $workbook->worksheets() ) { | |
my ( $row_min, $row_max ) = $worksheet->row_range(); | |
my ( $col_min, $col_max ) = $worksheet->col_range(); | |
for my $row ( $row_min .. $row_max ) { | |
for my $col ( $col_min .. $col_max ) { | |
my $cell = $worksheet->get_cell( $row, $col ); | |
next unless $cell; | |
print "Row, Col = ($row, $col)\n"; | |
print "Value = ", $cell->value(), "\n"; | |
print "Unformatted = ", $cell->unformatted(), "\n"; | |
print "\n"; | |
} | |
} | |
} | |
=head1 DESCRIPTION | |
The Spreadsheet::ParseExcel module can be used to read information from Excel 95-2003 binary files. | |
The module cannot read files in the Excel 2007 Open XML XLSX format. See the L<Spreadsheet::XLSX> module instead. | |
=head1 Parser | |
=head2 new() | |
The C<new()> method is used to create a new C<Spreadsheet::ParseExcel> parser object. | |
my $parser = Spreadsheet::ParseExcel->new(); | |
It is possible to pass a password to decrypt an encrypted file: | |
$parser = Spreadsheet::ParseExcel->new( Password => 'secret' ); | |
Only the default Excel encryption scheme is currently supported. See L</Decryption>. | |
As an advanced feature it is also possible to pass a call-back handler to the parser to control the parsing of the spreadsheet. | |
$parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1, | |
); | |
The call-back can be used to ignore certain cells or to reduce memory usage. See the section L<Reducing the memory usage of Spreadsheet::ParseExcel> for more information. | |
=head2 parse($filename, $formatter) | |
The Parser C<parse()> method returns a L</Workbook> object. | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
If an error occurs C<parse()> returns C<undef>. In general, programs should contain a test for failed parsing as follows: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die $parser->error(), ".\n"; | |
} | |
The C<$filename> parameter is generally the file to be parsed. However, it can also be a filehandle or a scalar reference. | |
The optional C<$formatter> parameter can be an reference to a L</Formatter Class> to format the value of cells. This is useful for parsing workbooks with Unicode or Asian characters: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $formatter = Spreadsheet::ParseExcel::FmtJapan->new(); | |
my $workbook = $parser->parse( 'Book1.xls', $formatter ); | |
The L<Spreadsheet::ParseExcel::FmtJapan> formatter also supports Unicode. If you encounter any encoding problems with the default formatter try that instead. | |
=head2 error() | |
The Parser C<error()> method returns an error string if a C<parse()> fails: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die $parser->error(), ".\n"; | |
} | |
If you wish to generate you own error string you can use the C<error_code()> method instead (see below). The C<error()> and C<error_code()> values are as follows: | |
error() error_code() | |
======= ============ | |
'' 0 | |
'File not found' 1 | |
'No Excel data found in file' 2 | |
'File is encrypted' 3 | |
The C<error_code()> method is explained below. | |
Spreadsheet::ParseExcel will try to decrypt an encrypted Excel file using the default password or a user supplied password passed to C<new()>, see above. If these fail the module will return the C<'File is encrypted'> error. Only the default Excel encryption scheme is currently supported, see L</Decryption>. | |
=head2 error_code() | |
The Parser C<error_code()> method returns an error code if a C<parse()> fails: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
if ( !defined $workbook ) { | |
die "Got error code ", $parser->error_code, ".\n"; | |
} | |
This can be useful if you wish to employ you own error strings or error handling methods. | |
=head1 Workbook | |
A C<Spreadsheet::ParseExcel::Workbook> is created via the C<Spreadsheet::ParseExcel> C<parse()> method: | |
my $parser = Spreadsheet::ParseExcel->new(); | |
my $workbook = $parser->parse('Book1.xls'); | |
The main methods of the Workbook class are: | |
$workbook->worksheets() | |
$workbook->worksheet() | |
$workbook->worksheet_count() | |
$workbook->get_filename() | |
These more commonly used methods of the Workbook class are outlined below. The other, less commonly used, methods are documented in L<Spreadsheet::ParseExcel::Worksheet>. | |
=head2 worksheets() | |
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook: | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
=head2 worksheet() | |
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
Returns C<undef> if the sheet name or index doesn't exist. | |
=head2 worksheet_count() | |
The C<worksheet_count()> method returns the number of Worksheet objects in the Workbook. | |
my $worksheet_count = $workbook->worksheet_count(); | |
=head2 get_filename() | |
The C<get_filename()> method returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file. | |
my $filename = $workbook->get_filename(); | |
=head2 Other Workbook Methods | |
For full documentation of the methods available via a Workbook object see L<Spreadsheet::ParseExcel::Workbook>. | |
=head1 Worksheet | |
The C<Spreadsheet::ParseExcel::Worksheet> class encapsulates the properties of an Excel worksheet. | |
A Worksheet object is obtained via the L</worksheets()> or L</worksheet()> methods. | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
# Or: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
The most commonly used methods of the Worksheet class are: | |
$worksheet->get_cell() | |
$worksheet->row_range() | |
$worksheet->col_range() | |
$worksheet->get_name() | |
The Spreadsheet::ParseExcel::Worksheet class exposes a lot of methods but in general very few are required unless you are writing an advanced filter. | |
The most commonly used methods are detailed below. The others are documented in L<Spreadsheet::ParseExcel::Worksheet>. | |
=head2 get_cell($row, $col) | |
Return the L</Cell> object at row C<$row> and column C<$col> if it is defined. Otherwise returns undef. | |
my $cell = $worksheet->get_cell($row, $col); | |
=head2 row_range() | |
Returns a two-element list C<($min, $max)> containing the minimum and maximum defined rows in the worksheet. If there is no row defined C<$max> is smaller than C<$min>. | |
my ( $row_min, $row_max ) = $worksheet->row_range(); | |
=head2 col_range() | |
Returns a two-element list C<($min, $max)> containing the minimum and maximum of defined columns in the worksheet. If there is no column defined C<$max> is smaller than C<$min>. | |
my ( $col_min, $col_max ) = $worksheet->col_range(); | |
=head2 get_name() | |
The C<get_name()> method returns the name of the worksheet, such as 'Sheet1'. | |
my $name = $worksheet->get_name(); | |
=head2 Other Worksheet Methods | |
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Worksheet>. | |
=head1 Cell | |
The C<Spreadsheet::ParseExcel::Cell> class has the following main methods. | |
$cell->value() | |
$cell->unformatted() | |
=head2 value() | |
The C<value()> method returns the formatted value of the cell. | |
my $value = $cell->value(); | |
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30. | |
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below. | |
For a defined C<$cell> the C<value()> method will always return a value. | |
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>. | |
=head2 unformatted() | |
The C<unformatted()> method returns the unformatted value of the cell. | |
my $unformatted = $cell->unformatted(); | |
Returns the cell value without a numeric format. See the C<value()> method above. | |
=head2 Other Cell Methods | |
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Cell>. | |
=head1 Format | |
The C<Spreadsheet::ParseExcel::Format> class has the following properties: | |
=head2 Format properties | |
$format->{Font} | |
$format->{AlignH} | |
$format->{AlignV} | |
$format->{Indent} | |
$format->{Wrap} | |
$format->{Shrink} | |
$format->{Rotate} | |
$format->{JustLast} | |
$format->{ReadDir} | |
$format->{BdrStyle} | |
$format->{BdrColor} | |
$format->{BdrDiag} | |
$format->{Fill} | |
$format->{Lock} | |
$format->{Hidden} | |
$format->{Style} | |
These properties are generally only of interest to advanced users. Casual users can skip this section. | |
=head2 $format->{Font} | |
Returns the L</Font> object for the Format. | |
=head2 $format->{AlignH} | |
Returns the horizontal alignment of the format where the value has the following meaning: | |
0 => No alignment | |
1 => Left | |
2 => Center | |
3 => Right | |
4 => Fill | |
5 => Justify | |
6 => Center across | |
7 => Distributed/Equal spaced | |
=head2 $format->{AlignV} | |
Returns the vertical alignment of the format where the value has the following meaning: | |
0 => Top | |
1 => Center | |
2 => Bottom | |
3 => Justify | |
4 => Distributed/Equal spaced | |
=head2 $format->{Indent} | |
Returns the indent level of the C<Left> horizontal alignment. | |
=head2 $format->{Wrap} | |
Returns true if textwrap is on. | |
=head2 $format->{Shrink} | |
Returns true if "Shrink to fit" is set for the format. | |
=head2 $format->{Rotate} | |
Returns the text rotation. In Excel97+, it returns the angle in degrees of the text rotation. | |
In Excel95 or earlier it returns a value as follows: | |
0 => No rotation | |
1 => Top down | |
2 => 90 degrees anti-clockwise, | |
3 => 90 clockwise | |
=head2 $format->{JustLast} | |
Return true if the "justify last" property is set for the format. | |
=head2 $format->{ReadDir} | |
Returns the direction that the text is read from. | |
=head2 $format->{BdrStyle} | |
Returns an array ref of border styles as follows: | |
[ $left, $right, $top, $bottom ] | |
=head2 $format->{BdrColor} | |
Returns an array ref of border color indexes as follows: | |
[ $left, $right, $top, $bottom ] | |
=head2 $format->{BdrDiag} | |
Returns an array ref of diagonal border kind, style and color index as follows: | |
[$kind, $style, $color ] | |
Where kind is: | |
0 => None | |
1 => Right-Down | |
2 => Right-Up | |
3 => Both | |
=head2 $format->{Fill} | |
Returns an array ref of fill pattern and color indexes as follows: | |
[ $pattern, $front_color, $back_color ] | |
=head2 $format->{Lock} | |
Returns true if the cell is locked. | |
=head2 $format->{Hidden} | |
Returns true if the cell is Hidden. | |
=head2 $format->{Style} | |
Returns true if the format is a Style format. | |
=head1 Font | |
I<Spreadsheet::ParseExcel::Font> | |
Format class has these properties: | |
=head1 Font Properties | |
$font->{Name} | |
$font->{Bold} | |
$font->{Italic} | |
$font->{Height} | |
$font->{Underline} | |
$font->{UnderlineStyle} | |
$font->{Color} | |
$font->{Strikeout} | |
$font->{Super} | |
=head2 $font->{Name} | |
Returns the name of the font, for example 'Arial'. | |
=head2 $font->{Bold} | |
Returns true if the font is bold. | |
=head2 $font->{Italic} | |
Returns true if the font is italic. | |
=head2 $font->{Height} | |
Returns the size (height) of the font. | |
=head2 $font->{Underline} | |
Returns true if the font in underlined. | |
=head2 $font->{UnderlineStyle} | |
Returns the style of an underlined font where the value has the following meaning: | |
0 => None | |
1 => Single | |
2 => Double | |
33 => Single accounting | |
34 => Double accounting | |
=head2 $font->{Color} | |
Returns the color index for the font. The mapping to an RGB color is defined by each workbook. | |
The index can be converted to a RGB string using the C<$workbook->ColorIdxToRGB()> Parser method. | |
(Older versions of C<Spreadsheet::ParseExcel> provided the C<ColorIdxToRGB> class method, which is deprecated.) | |
=head2 $font->{Strikeout} | |
Returns true if the font has the strikeout property set. | |
=head2 $font->{Super} | |
Returns one of the following values if the superscript or subscript property of the font is set: | |
0 => None | |
1 => Superscript | |
2 => Subscript | |
=head1 Formatter Class | |
Formatters can be passed to the C<parse()> method to deal with Unicode or Asian formatting. | |
Spreadsheet::ParseExcel includes 2 formatter classes. C<FmtDefault> and C<FmtJapanese>. It is also possible to create a user defined formatting class. | |
The formatter class C<Spreadsheet::ParseExcel::Fmt*> should provide the following functions: | |
=head2 ChkType($self, $is_numeric, $format_index) | |
Method to check the type of data in the cell. Should return C<Date>, C<Numeric> or C<Text>. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $is_numeric | |
If true, the value seems to be number. | |
=item $format_index | |
The index number for the cell Format object. | |
=back | |
=head2 TextFmt($self, $string_data, $string_encoding) | |
Converts the string data in the cell into the correct encoding. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $string_data | |
The original string/text data. | |
=item $string_encoding | |
The character encoding of original string/text. | |
=back | |
=head2 ValFmt($self, $cell, $workbook) | |
Convert the original unformatted cell value into the appropriate formatted value. For instance turn a number into a formatted date. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $cell | |
A scalar reference to the Cell object. | |
=item $workbook | |
A scalar reference to the Workbook object. | |
=back | |
=head2 FmtString($self, $cell, $workbook) | |
Get the format string for the Cell. It is passed the following parameters: | |
=over | |
=item $self | |
A scalar reference to the Formatter object. | |
=item $cell | |
A scalar reference to the Cell object. | |
=item $workbook | |
A scalar reference to the Workbook object. | |
=back | |
=head1 Reducing the memory usage of Spreadsheet::ParseExcel | |
In some cases a C<Spreadsheet::ParseExcel> application may consume a lot of memory when processing a large Excel file and, as a result, may fail to complete. The following explains why this can occur and how to resolve it. | |
C<Spreadsheet::ParseExcel> processes an Excel file in two stages. In the first stage it extracts the Excel binary stream from the OLE container file using C<OLE::Storage_Lite>. In the second stage it parses the binary stream to read workbook, worksheet and cell data which it then stores in memory. The majority of the memory usage is required for storing cell data. | |
The reason for this is that as the Excel file is parsed and each cell is encountered a cell handling function creates a relatively large nested cell object that contains the cell value and all of the data that relates to the cell formatting. For large files (a 10MB Excel file on a 256MB system) this overhead can cause the system to grind to a halt. | |
However, in a lot of cases when an Excel file is being processed the only information that is required are the cell values. In these cases it is possible to avoid most of the memory overhead by specifying your own cell handling function and by telling Spreadsheet::ParseExcel not to store the parsed cell data. This is achieved by passing a cell handler function to C<new()> when creating the parse object. Here is an example. | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1 | |
); | |
my $workbook = $parser->parse('file.xls'); | |
sub cell_handler { | |
my $workbook = $_[0]; | |
my $sheet_index = $_[1]; | |
my $row = $_[2]; | |
my $col = $_[3]; | |
my $cell = $_[4]; | |
# Do something useful with the formatted cell value | |
print $cell->value(), "\n"; | |
} | |
The user specified cell handler is passed as a code reference to C<new()> along with the parameter C<NotSetCell> which tells Spreadsheet::ParseExcel not to store the parsed cell. Note, you don't have to iterate over the rows and columns, this happens automatically as part of the parsing. | |
The cell handler is passed 5 arguments. The first, C<$workbook>, is a reference to the C<Spreadsheet::ParseExcel::Workbook> object that represent the parsed workbook. This can be used to access any of the C<Spreadsheet::ParseExcel::Workbook> methods, see L</Workbook>. The second C<$sheet_index> is the zero-based index of the worksheet being parsed. The third and fourth, C<$row> and C<$col>, are the zero-based row and column number of the cell. The fifth, C<$cell>, is a reference to the C<Spreadsheet::ParseExcel::Cell> object. This is used to extract the data from the cell. See L</Cell> for more information. | |
This technique can be useful if you are writing an Excel to database filter since you can put your DB calls in the cell handler. | |
If you don't want all of the data in the spreadsheet you can add some control logic to the cell handler. For example we can extend the previous example so that it only prints the first 10 rows of the first two worksheets in the parsed workbook by adding some C<if()> statements to the cell handler: | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1 | |
); | |
my $workbook = $parser->parse('file.xls'); | |
sub cell_handler { | |
my $workbook = $_[0]; | |
my $sheet_index = $_[1]; | |
my $row = $_[2]; | |
my $col = $_[3]; | |
my $cell = $_[4]; | |
# Skip some worksheets and rows (inefficiently). | |
return if $sheet_index >= 3; | |
return if $row >= 10; | |
# Do something with the formatted cell value | |
print $cell->value(), "\n"; | |
} | |
However, this still processes the entire workbook. If you wish to save some additional processing time you can abort the parsing after you have read the data that you want, using the workbook C<ParseAbort> method: | |
#!/usr/bin/perl -w | |
use strict; | |
use Spreadsheet::ParseExcel; | |
my $parser = Spreadsheet::ParseExcel->new( | |
CellHandler => \&cell_handler, | |
NotSetCell => 1 | |
); | |
my $workbook = $parser->parse('file.xls'); | |
sub cell_handler { | |
my $workbook = $_[0]; | |
my $sheet_index = $_[1]; | |
my $row = $_[2]; | |
my $col = $_[3]; | |
my $cell = $_[4]; | |
# Skip some worksheets and rows (more efficiently). | |
if ( $sheet_index >= 1 and $row >= 10 ) { | |
$workbook->ParseAbort(1); | |
return; | |
} | |
# Do something with the formatted cell value | |
print $cell->value(), "\n"; | |
} | |
=head1 Decryption | |
If a workbook is "protected" then Excel will encrypt the file whether a password is supplied or not. As of version 0.59 Spreadsheet::ParseExcel supports decrypting Excel workbooks using a default or user supplied password. However, only the following encryption scheme is supported: | |
Office 97/2000 Compatible encryption | |
The following encryption methods are not supported: | |
Weak Encryption (XOR) | |
RC4, Microsoft Base Cryptographic Provider v1.0 | |
RC4, Microsoft Base DSS and Diffie-Hellman Cryptographic Provider | |
RC4, Microsoft DH SChannel Cryptographic Provider | |
RC4, Microsoft Enhanced Cryptographic Provider v1.0 | |
RC4, Microsoft Enhanced DSS and Diffie-Hellman Cryptographic Provider | |
RC4, Microsoft Enhanced RSA and AES Cryptographic Provider | |
RC4, Microsoft RSA SChannel Cryptographic Provider | |
RC4, Microsoft Strong Cryptographic Provider | |
See the following for more information on Excel encryption: L<http://office.microsoft.com/en-us/office-2003-resource-kit/important-aspects-of-password-and-encryption-protection-HA001140311.aspx>. | |
=head1 KNOWN PROBLEMS | |
=over | |
=item * Issues reported by users: L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel> | |
=item * This module cannot read the values of formulas from files created with Spreadsheet::WriteExcel unless the user specified the values when creating the file (which is generally not the case). The reason for this is that Spreadsheet::WriteExcel writes the formula but not the formula result since it isn't in a position to calculate arbitrary Excel formulas without access to Excel's formula engine. | |
=item * If Excel has date fields where the specified format is equal to the system-default for the short-date locale, Excel does not store the format, but defaults to an internal format which is system dependent. In these cases ParseExcel uses the date format 'yyyy-mm-dd'. | |
=back | |
=head1 REPORTING A BUG | |
Bugs can be reported via rt.cpan.org. See the following for instructions on bug reporting for Spreadsheet::ParseExcel | |
L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel> | |
=head1 SEE ALSO | |
=over | |
=item * xls2csv by Ken Prows L<http://search.cpan.org/~ken/xls2csv-1.06/script/xls2csv>. | |
=item * xls2csv and xlscat by H.Merijn Brand (these utilities are part of Spreadsheet::Read, see below). | |
=item * excel2txt by Ken Youens-Clark, L<http://search.cpan.org/~kclark/excel2txt/excel2txt>. This is an excellent example of an Excel filter using Spreadsheet::ParseExcel. It can produce CSV, Tab delimited, Html, XML and Yaml. | |
=item * XLSperl by Jon Allen L<http://search.cpan.org/~jonallen/XLSperl/bin/XLSperl>. This application allows you to use Perl "one-liners" with Microsoft Excel files. | |
=item * Spreadsheet::XLSX L<http://search.cpan.org/~dmow/Spreadsheet-XLSX/lib/Spreadsheet/XLSX.pm> by Dmitry Ovsyanko. A module with a similar interface to Spreadsheet::ParseExcel for parsing Excel 2007 XLSX OpenXML files. | |
=item * Spreadsheet::Read L<http://search.cpan.org/~hmbrand/Spreadsheet-Read/Read.pm> by H.Merijn Brand. A single interface for reading several different spreadsheet formats. | |
=item * Spreadsheet::WriteExcel L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm>. A perl module for creating new Excel files. | |
=item * Spreadsheet::ParseExcel::SaveParser L<http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/lib/Spreadsheet/ParseExcel/SaveParser.pm>. This is a combination of Spreadsheet::ParseExcel and Spreadsheet::WriteExcel and it allows you to "rewrite" an Excel file. See the following example L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm#MODIFYING_AND_REWRITING_EXCEL_FILES>. It is part of the Spreadsheet::ParseExcel distro. | |
=item * Text::CSV_XS L<http://search.cpan.org/~hmbrand/Text-CSV_XS/CSV_XS.pm> by H.Merijn Brand. A fast and rigorous module for reading and writing CSV data. Don't consider rolling your own CSV handling, use this module instead. | |
=back | |
=head1 MAILING LIST | |
There is a Google group for discussing and asking questions about Spreadsheet::ParseExcel. This is a good place to search to see if your question has been asked before: L<http://groups-beta.google.com/group/spreadsheet-parseexcel/> | |
=head1 DONATIONS | |
If you'd care to donate to the Spreadsheet::ParseExcel project, you can do so via PayPal: L<http://tinyurl.com/7ayes> | |
=head1 TODO | |
=over | |
=item * The current maintenance work is directed towards making the documentation more useful, improving and simplifying the API, and improving the maintainability of the code base. After that new features will be added. | |
=item * Fix open bugs and documentation for SaveParser. | |
=item * Add Formula support, Hyperlink support, Named Range support. | |
=item * Improve Spreadsheet::ParseExcel::SaveParser compatibility with Spreadsheet::WriteExcel. | |
=item * Improve Unicode and other encoding support. This will probably require dropping support for perls prior to 5.8+. | |
=back | |
=head1 ACKNOWLEDGEMENTS | |
From Kawai Takanori: | |
First of all, I would like to acknowledge the following valuable programs and modules: | |
XHTML, OLE::Storage and Spreadsheet::WriteExcel. | |
In no particular order: Yamaji Haruna, Simamoto Takesi, Noguchi Harumi, Ikezawa Kazuhiro, Suwazono Shugo, Hirofumi Morisada, Michael Edwards, Kim Namusk, Slaven Rezic, Grant Stevens, H.Merijn Brand and many many people + Kawai Mikako. | |
Alexey Mazurin added the decryption facility. | |
=head1 DISCLAIMER OF WARRANTY | |
Because this software is licensed free of charge, there is no warranty for the software, to the extent permitted by applicable law. Except when otherwise stated in writing the copyright holders and/or other parties provide the software "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the software is with you. Should the software prove defective, you assume the cost of all necessary servicing, repair, or correction. | |
In no event unless required by applicable law or agreed to in writing will any copyright holder, or any other party who may modify and/or redistribute the software as permitted by the above licence, be liable to you for damages, including any general, special, incidental, or consequential damages arising out of the use or inability to use the software (including but not limited to loss of data or data being rendered inaccurate or losses sustained by you or third parties or a failure of the software to operate with any other software), even if such holder or other party has been advised of the possibility of such damages. | |
=head1 LICENSE | |
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php> | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori (Hippo2000) kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. This is free software. You may distribute under the terms of either the GNU General Public License or the Artistic License. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Cell.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_CELL'; | |
package Spreadsheet::ParseExcel::Cell; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
############################################################################### | |
# | |
# new() | |
# | |
# Constructor. | |
# | |
sub new { | |
my ( $package, %properties ) = @_; | |
my $self = \%properties; | |
bless $self, $package; | |
} | |
############################################################################### | |
# | |
# value() | |
# | |
# Returns the formatted value of the cell. | |
# | |
sub value { | |
my $self = shift; | |
return $self->{_Value}; | |
} | |
############################################################################### | |
# | |
# unformatted() | |
# | |
# Returns the unformatted value of the cell. | |
# | |
sub unformatted { | |
my $self = shift; | |
return $self->{Val}; | |
} | |
############################################################################### | |
# | |
# get_format() | |
# | |
# Returns the Format object for the cell. | |
# | |
sub get_format { | |
my $self = shift; | |
return $self->{Format}; | |
} | |
############################################################################### | |
# | |
# type() | |
# | |
# Returns the type of cell such as Text, Numeric or Date. | |
# | |
sub type { | |
my $self = shift; | |
return $self->{Type}; | |
} | |
############################################################################### | |
# | |
# encoding() | |
# | |
# Returns the character encoding of the cell. | |
# | |
sub encoding { | |
my $self = shift; | |
if ( !defined $self->{Code} ) { | |
return 1; | |
} | |
elsif ( $self->{Code} eq 'ucs2' ) { | |
return 2; | |
} | |
elsif ( $self->{Code} eq '_native_' ) { | |
return 3; | |
} | |
else { | |
return 0; | |
} | |
return $self->{Code}; | |
} | |
############################################################################### | |
# | |
# is_merged() | |
# | |
# Returns true if the cell is merged. | |
# | |
sub is_merged { | |
my $self = shift; | |
return $self->{Merged}; | |
} | |
############################################################################### | |
# | |
# get_rich_text() | |
# | |
# Returns an array ref of font information about each string block in a "rich", | |
# i.e. multi-format, string. | |
# | |
sub get_rich_text { | |
my $self = shift; | |
return $self->{Rich}; | |
} | |
############################################################################### | |
# | |
# get_hyperlink { | |
# | |
# Returns an array ref of hyperlink information if the cell contains a hyperlink. | |
# Returns undef otherwise | |
# | |
# [0] : Description of link (You may want $cell->value, as it will have rich text) | |
# [1] : URL - the link expressed as a URL. N.B. relative URLs will be defaulted to | |
# the directory of the input file, if the input file name is known. Otherwise | |
# %REL% will be inserted as a place-holder. Depending on your application, | |
# you should either remove %REL% or replace it with the appropriate path. | |
# [2] : Target frame (or undef if none) | |
sub get_hyperlink { | |
my $self = shift; | |
return $self->{Hyperlink} if exists $self->{Hyperlink}; | |
return undef; | |
} | |
# | |
############################################################################### | |
# | |
# Mapping between legacy method names and new names. | |
# | |
{ | |
no warnings; # Ignore warnings about variables used only once. | |
*Value = \&value; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 Methods | |
The following Cell methods are available: | |
$cell->value() | |
$cell->unformatted() | |
$cell->get_format() | |
$cell->type() | |
$cell->encoding() | |
$cell->is_merged() | |
$cell->get_rich_text() | |
$cell->get_hyperlink() | |
=head2 value() | |
The C<value()> method returns the formatted value of the cell. | |
my $value = $cell->value(); | |
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30. | |
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below. | |
For a defined C<$cell> the C<value()> method will always return a value. | |
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>. | |
=head2 unformatted() | |
The C<unformatted()> method returns the unformatted value of the cell. | |
my $unformatted = $cell->unformatted(); | |
Returns the cell value without a numeric format. See the C<value()> method above. | |
=head2 get_format() | |
The C<get_format()> method returns the L<Spreadsheet::ParseExcel::Format> object for the cell. | |
my $format = $cell->get_format(); | |
If a user defined format hasn't been applied to the cell then the default cell format is returned. | |
=head2 type() | |
The C<type()> method returns the type of cell such as Text, Numeric or Date. If the type was detected as Numeric, and the Cell Format matches C<m{^[dmy][-\\/dmy]*$}i>, it will be treated as a Date type. | |
my $type = $cell->type(); | |
See also L<Dates and Time in Excel>. | |
=head2 encoding() | |
The C<encoding()> method returns the character encoding of the cell. | |
my $encoding = $cell->encoding(); | |
This method is only of interest to developers. In general Spreadsheet::ParseExcel will return all character strings in UTF-8 regardless of the encoding used by Excel. | |
The C<encoding()> method returns one of the following values: | |
=over | |
=item * 0: Unknown format. This shouldn't happen. In the default case the format should be 1. | |
=item * 1: 8bit ASCII or single byte UTF-16. This indicates that the characters are encoded in a single byte. In Excel 95 and earlier This usually meant ASCII or an international variant. In Excel 97 it refers to a compressed UTF-16 character string where all of the high order bytes are 0 and are omitted to save space. | |
=item * 2: UTF-16BE. | |
=item * 3: Native encoding. In Excel 95 and earlier this encoding was used to represent multi-byte character encodings such as SJIS. | |
=back | |
=head2 is_merged() | |
The C<is_merged()> method returns true if the cell is merged. | |
my $is_merged = $cell->is_merged(); | |
Returns C<undef> if the property isn't set. | |
=head2 get_rich_text() | |
The C<get_rich_text()> method returns an array ref of font information about each string block in a "rich", i.e. multi-format, string. | |
my $rich_text = $cell->get_rich_text(); | |
The return value is an arrayref of arrayrefs in the form: | |
[ | |
[ $start_position, $font_object ], | |
..., | |
] | |
Returns undef if the property isn't set. | |
=head2 get_hyperlink() | |
If a cell contains a hyperlink, the C<get_hyperlink()> method returns an array ref of information about it. | |
A cell can contain at most one hyperlink. If it does, it contains no other value. | |
Otherwise, it returns undef; | |
The array contains: | |
=over | |
=item * 0: Description (what's displayed); undef if not present | |
=item * 1: Link, converted to an appropriate URL - Note: Relative links are based on the input file. %REL% is used if the input file is unknown (e.g. a file handle or scalar) | |
=item * 2: Target - target frame (or undef if none) | |
=back | |
=head1 Dates and Time in Excel | |
Dates and times in Excel are represented by real numbers, for example "Jan 1 2001 12:30 PM" is represented by the number 36892.521. | |
The integer part of the number stores the number of days since the epoch and the fractional part stores the percentage of the day. | |
A date or time in Excel is just like any other number. The way in which it is displayed is controlled by the number format: | |
Number format $cell->value() $cell->unformatted() | |
============= ============== ============== | |
'dd/mm/yy' '28/02/08' 39506.5 | |
'mm/dd/yy' '02/28/08' 39506.5 | |
'd-m-yyyy' '28-2-2008' 39506.5 | |
'dd/mm/yy hh:mm' '28/02/08 12:00' 39506.5 | |
'd mmm yyyy' '28 Feb 2008' 39506.5 | |
'mmm d yyyy hh:mm AM/PM' 'Feb 28 2008 12:00 PM' 39506.5 | |
The L<Spreadsheet::ParseExcel::Utility> module contains a function called C<ExcelLocaltime> which will convert between an unformatted Excel date/time number and a C<localtime()> like array. | |
For date conversions using the CPAN C<DateTime> framework see L<DateTime::Format::Excel> http://search.cpan.org/search?dist=DateTime-Format-Excel | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_CELL | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Dump.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_DUMP'; | |
package Spreadsheet::ParseExcel::Dump; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Dump - A class for dumping Excel records. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
my %NameTbl = ( | |
#P291 | |
0x0A => 'EOF', | |
0x0C => 'CALCCOUNT', | |
0x0D => 'CALCMODE', | |
0x0E => 'PRECISION', | |
0x0F => 'REFMODE', | |
0x10 => 'DELTA', | |
0x11 => 'ITERATION', | |
0x12 => 'PROTECT', | |
0x13 => 'PASSWORD', | |
0x14 => 'HEADER', | |
0x15 => 'FOOTER', | |
0x16 => 'EXTERNCOUNT', | |
0x17 => 'EXTERNSHEET', | |
0x19 => 'WINDOWPROTECT', | |
0x1A => 'VERTICALPAGEBREAKS', | |
0x1B => 'HORIZONTALPAGEBREAKS', | |
0x1C => 'NOTE', | |
0x1D => 'SELECTION', | |
0x22 => '1904', | |
0x26 => 'LEFTMARGIN', | |
0x27 => 'RIGHTMARGIN', | |
0x28 => 'TOPMARGIN', | |
0x29 => 'BOTTOMMARGIN', | |
0x2A => 'PRINTHEADERS', | |
0x2B => 'PRINTGRIDLINES', | |
0x2F => 'FILEPASS', | |
0x3C => 'COUNTINUE', | |
0x3D => 'WINDOW1', | |
0x40 => 'BACKUP', | |
0x41 => 'PANE', | |
0x42 => 'CODEPAGE', | |
0x4D => 'PLS', | |
0x50 => 'DCON', | |
0x51 => 'DCONREF', | |
#P292 | |
0x52 => 'DCONNAME', | |
0x55 => 'DEFCOLWIDTH', | |
0x59 => 'XCT', | |
0x5A => 'CRN', | |
0x5B => 'FILESHARING', | |
0x5C => 'WRITEACCES', | |
0x5D => 'OBJ', | |
0x5E => 'UNCALCED', | |
0x5F => 'SAVERECALC', | |
0x60 => 'TEMPLATE', | |
0x63 => 'OBJPROTECT', | |
0x7D => 'COLINFO', | |
0x7E => 'RK', | |
0x7F => 'IMDATA', | |
0x80 => 'GUTS', | |
0x81 => 'WSBOOL', | |
0x82 => 'GRIDSET', | |
0x83 => 'HCENTER', | |
0x84 => 'VCENTER', | |
0x85 => 'BOUNDSHEET', | |
0x86 => 'WRITEPROT', | |
0x87 => 'ADDIN', | |
0x88 => 'EDG', | |
0x89 => 'PUB', | |
0x8C => 'COUNTRY', | |
0x8D => 'HIDEOBJ', | |
0x90 => 'SORT', | |
0x91 => 'SUB', | |
0x92 => 'PALETTE', | |
0x94 => 'LHRECORD', | |
0x95 => 'LHNGRAPH', | |
0x96 => 'SOUND', | |
0x98 => 'LPR', | |
0x99 => 'STANDARDWIDTH', | |
0x9A => 'FNGROUPNAME', | |
0x9B => 'FILTERMODE', | |
0x9C => 'FNGROUPCOUNT', | |
#P293 | |
0x9D => 'AUTOFILTERINFO', | |
0x9E => 'AUTOFILTER', | |
0xA0 => 'SCL', | |
0xA1 => 'SETUP', | |
0xA9 => 'COORDLIST', | |
0xAB => 'GCW', | |
0xAE => 'SCENMAN', | |
0xAF => 'SCENARIO', | |
0xB0 => 'SXVIEW', | |
0xB1 => 'SXVD', | |
0xB2 => 'SXV', | |
0xB4 => 'SXIVD', | |
0xB5 => 'SXLI', | |
0xB6 => 'SXPI', | |
0xB8 => 'DOCROUTE', | |
0xB9 => 'RECIPNAME', | |
0xBC => 'SHRFMLA', | |
0xBD => 'MULRK', | |
0xBE => 'MULBLANK', | |
0xBF => 'TOOLBARHDR', | |
0xC0 => 'TOOLBAREND', | |
0xC1 => 'MMS', | |
0xC2 => 'ADDMENU', | |
0xC3 => 'DELMENU', | |
0xC5 => 'SXDI', | |
0xC6 => 'SXDB', | |
0xCD => 'SXSTRING', | |
0xD0 => 'SXTBL', | |
0xD1 => 'SXTBRGIITM', | |
0xD2 => 'SXTBPG', | |
0xD3 => 'OBPROJ', | |
0xD5 => 'SXISDTM', | |
0xD6 => 'RSTRING', | |
0xD7 => 'DBCELL', | |
0xDA => 'BOOKBOOL', | |
0xDC => 'PARAMQRY', | |
0xDC => 'SXEXT', | |
0xDD => 'SCENPROTECT', | |
0xDE => 'OLESIZE', | |
#P294 | |
0xDF => 'UDDESC', | |
0xE0 => 'XF', | |
0xE1 => 'INTERFACEHDR', | |
0xE2 => 'INTERFACEEND', | |
0xE3 => 'SXVS', | |
0xEA => 'TABIDCONF', | |
0xEB => 'MSODRAWINGGROUP', | |
0xEC => 'MSODRAWING', | |
0xED => 'MSODRAWINGSELECTION', | |
0xEF => 'PHONETICINFO', | |
0xF0 => 'SXRULE', | |
0xF1 => 'SXEXT', | |
0xF2 => 'SXFILT', | |
0xF6 => 'SXNAME', | |
0xF7 => 'SXSELECT', | |
0xF8 => 'SXPAIR', | |
0xF9 => 'SXFMLA', | |
0xFB => 'SXFORMAT', | |
0xFC => 'SST', | |
0xFD => 'LABELSST', | |
0xFF => 'EXTSST', | |
0x100 => 'SXVDEX', | |
0x103 => 'SXFORMULA', | |
0x122 => 'SXDBEX', | |
0x13D => 'TABID', | |
0x160 => 'USESELFS', | |
0x161 => 'DSF', | |
0x162 => 'XL5MODIFY', | |
0x1A5 => 'FILESHARING2', | |
0x1A9 => 'USERBVIEW', | |
0x1AA => 'USERVIEWBEGIN', | |
0x1AB => 'USERSVIEWEND', | |
0x1AD => 'QSI', | |
0x1AE => 'SUPBOOK', | |
0x1AF => 'PROT4REV', | |
0x1B0 => 'CONDFMT', | |
0x1B1 => 'CF', | |
0x1B2 => 'DVAL', | |
#P295 | |
0x1B5 => 'DCONBIN', | |
0x1B6 => 'TXO', | |
0x1B7 => 'REFRESHALL', | |
0x1B8 => 'HLINK', | |
0x1BA => 'CODENAME', | |
0x1BB => 'SXFDBTYPE', | |
0x1BC => 'PROT4REVPASS', | |
0x1BE => 'DV', | |
0x200 => 'DIMENSIONS', | |
0x201 => 'BLANK', | |
0x202 => 'Integer', #Not Documented | |
0x203 => 'NUMBER', | |
0x204 => 'LABEL', | |
0x205 => 'BOOLERR', | |
0x207 => 'STRING', | |
0x208 => 'ROW', | |
0x20B => 'INDEX', | |
0x218 => 'NAME', | |
0x221 => 'ARRAY', | |
0x223 => 'EXTERNNAME', | |
0x225 => 'DEFAULTROWHEIGHT', | |
0x231 => 'FONT', | |
0x236 => 'TABLE', | |
0x23E => 'WINDOW2', | |
0x293 => 'STYLE', | |
0x406 => 'FORMULA', | |
0x41E => 'FORMAT', | |
0x18 => 'NAME', | |
0x06 => 'FORMULA', | |
0x09 => 'BOF(BIFF2)', | |
0x209 => 'BOF(BIFF3)', | |
0x409 => 'BOF(BIFF4)', | |
0x809 => 'BOF(BIFF5-7)', | |
0x31 => 'FONT', 0x27E => 'RK', | |
#Chart/Graph | |
0x1001 => 'UNITS', | |
0x1002 => 'CHART', | |
0x1003 => 'SERISES', | |
0x1006 => 'DATAFORMAT', | |
0x1007 => 'LINEFORMAT', | |
0x1009 => 'MAKERFORMAT', | |
0x100A => 'AREAFORMAT', | |
0x100B => 'PIEFORMAT', | |
0x100C => 'ATTACHEDLABEL', | |
0x100D => 'SERIESTEXT', | |
0x1014 => 'CHARTFORMAT', | |
0x1015 => 'LEGEND', | |
0x1016 => 'SERIESLIST', | |
0x1017 => 'BAR', | |
0x1018 => 'LINE', | |
0x1019 => 'PIE', | |
0x101A => 'AREA', | |
0x101B => 'SCATTER', | |
0x101C => 'CHARTLINE', | |
0x101D => 'AXIS', | |
0x101E => 'TICK', | |
0x101F => 'VALUERANGE', | |
0x1020 => 'CATSERRANGE', | |
0x1021 => 'AXISLINEFORMAT', | |
0x1022 => 'CHARTFORMATLINK', | |
0x1024 => 'DEFAULTTEXT', | |
0x1025 => 'TEXT', | |
0x1026 => 'FONTX', | |
0x1027 => 'OBJECTLINK', | |
0x1032 => 'FRAME', | |
0x1033 => 'BEGIN', | |
0x1034 => 'END', | |
0x1035 => 'PLOTAREA', | |
0x103A => '3D', | |
0x103C => 'PICF', | |
0x103D => 'DROPBAR', | |
0x103E => 'RADAR', | |
0x103F => 'SURFACE', | |
0x1040 => 'RADARAREA', | |
0x1041 => 'AXISPARENT', | |
0x1043 => 'LEGENDXN', | |
0x1044 => 'SHTPROPS', | |
0x1045 => 'SERTOCRT', | |
0x1046 => 'AXESUSED', | |
0x1048 => 'SBASEREF', | |
0x104A => 'SERPARENT', | |
0x104B => 'SERAUXTREND', | |
0x104E => 'IFMT', | |
0x104F => 'POS', | |
0x1050 => 'ALRUNS', | |
0x1051 => 'AI', | |
0x105B => 'SERAUXERRBAR', | |
0x105D => 'SERFMT', | |
0x1060 => 'FBI', | |
0x1061 => 'BOPPOP', | |
0x1062 => 'AXCEXT', | |
0x1063 => 'DAT', | |
0x1064 => 'PLOTGROWTH', | |
0x1065 => 'SINDEX', | |
0x1066 => 'GELFRAME', | |
0x1067 => 'BPOPPOPCUSTOM', | |
); | |
#------------------------------------------------------------------------------ | |
# subDUMP (for Spreadsheet::ParseExcel) | |
#------------------------------------------------------------------------------ | |
sub subDUMP { | |
my ( $oBook, $bOp, $bLen, $sWk ) = @_; | |
printf "%04X:%-23s (Len:%3d) : %s\n", | |
$bOp, OpName($bOp), $bLen, unpack( "H40", $sWk ); | |
} | |
#------------------------------------------------------------------------------ | |
# Spreadsheet::ParseExcel->OpName | |
#------------------------------------------------------------------------------ | |
sub OpName { | |
my ($bOp) = @_; | |
return ( defined $NameTbl{$bOp} ) ? $NameTbl{$bOp} : 'undef'; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Dump - A class for dumping Excel records. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_DUMP | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtDefault.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTDEFAULT'; | |
package Spreadsheet::ParseExcel::FmtDefault; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt); | |
our $VERSION = '0.65'; | |
my %hFmtDefault = ( | |
0x00 => 'General', | |
0x01 => '0', | |
0x02 => '0.00', | |
0x03 => '#,##0', | |
0x04 => '#,##0.00', | |
0x05 => '($#,##0_);($#,##0)', | |
0x06 => '($#,##0_);[Red]($#,##0)', | |
0x07 => '($#,##0.00_);($#,##0.00_)', | |
0x08 => '($#,##0.00_);[Red]($#,##0.00_)', | |
0x09 => '0%', | |
0x0A => '0.00%', | |
0x0B => '0.00E+00', | |
0x0C => '# ?/?', | |
0x0D => '# ??/??', | |
0x0E => 'yyyy-mm-dd', # Was 'm-d-yy', which is bad as system default | |
0x0F => 'd-mmm-yy', | |
0x10 => 'd-mmm', | |
0x11 => 'mmm-yy', | |
0x12 => 'h:mm AM/PM', | |
0x13 => 'h:mm:ss AM/PM', | |
0x14 => 'h:mm', | |
0x15 => 'h:mm:ss', | |
0x16 => 'm-d-yy h:mm', | |
#0x17-0x24 -- Differs in Natinal | |
0x25 => '(#,##0_);(#,##0)', | |
0x26 => '(#,##0_);[Red](#,##0)', | |
0x27 => '(#,##0.00);(#,##0.00)', | |
0x28 => '(#,##0.00);[Red](#,##0.00)', | |
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', | |
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)', | |
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2D => 'mm:ss', | |
0x2E => '[h]:mm:ss', | |
0x2F => 'mm:ss.0', | |
0x30 => '##0.0E+0', | |
0x31 => '@', | |
); | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $sPkg, %hKey ) = @_; | |
my $oThis = {}; | |
bless $oThis; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $oThis, $sTxt, $sCode ) = @_; | |
return $sTxt if ( ( !defined($sCode) ) || ( $sCode eq '_native_' ) ); | |
return pack( 'U*', unpack( 'n*', $sTxt ) ); | |
} | |
#------------------------------------------------------------------------------ | |
# FmtStringDef (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub FmtStringDef { | |
my ( $oThis, $iFmtIdx, $oBook, $rhFmt ) = @_; | |
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx}; | |
if ( !( defined($sFmtStr) ) && defined($rhFmt) ) { | |
$sFmtStr = $rhFmt->{$iFmtIdx}; | |
} | |
$sFmtStr = $hFmtDefault{$iFmtIdx} unless ($sFmtStr); | |
return $sFmtStr; | |
} | |
#------------------------------------------------------------------------------ | |
# FmtString (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub FmtString { | |
my ( $oThis, $oCell, $oBook ) = @_; | |
my $sFmtStr = | |
$oThis->FmtStringDef( $oBook->{Format}[ $oCell->{FormatNo} ]->{FmtIdx}, | |
$oBook ); | |
# Special case for cells that use Lotus123 style leading | |
# apostrophe to designate text formatting. | |
if ( $oBook->{Format}[ $oCell->{FormatNo} ]->{Key123} ) { | |
$sFmtStr = '@'; | |
} | |
unless ( defined($sFmtStr) ) { | |
if ( $oCell->{Type} eq 'Numeric' ) { | |
if ( int( $oCell->{Val} ) != $oCell->{Val} ) { | |
$sFmtStr = '0.00'; | |
} | |
else { | |
$sFmtStr = '0'; | |
} | |
} | |
elsif ( $oCell->{Type} eq 'Date' ) { | |
if ( int( $oCell->{Val} ) <= 0 ) { | |
$sFmtStr = 'h:mm:ss'; | |
} | |
else { | |
$sFmtStr = 'yyyy-mm-dd'; | |
} | |
} | |
else { | |
$sFmtStr = '@'; | |
} | |
} | |
return $sFmtStr; | |
} | |
#------------------------------------------------------------------------------ | |
# ValFmt (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub ValFmt { | |
my ( $oThis, $oCell, $oBook ) = @_; | |
my ( $Dt, $iFmtIdx, $iNumeric, $Flg1904 ); | |
if ( $oCell->{Type} eq 'Text' ) { | |
$Dt = | |
( ( defined $oCell->{Val} ) && ( $oCell->{Val} ne '' ) ) | |
? $oThis->TextFmt( $oCell->{Val}, $oCell->{Code} ) | |
: ''; | |
return $Dt; | |
} | |
else { | |
$Dt = $oCell->{Val}; | |
$Flg1904 = $oBook->{Flg1904}; | |
my $sFmtStr = $oThis->FmtString( $oCell, $oBook ); | |
return ExcelFmt( $sFmtStr, $Dt, $Flg1904, $oCell->{Type} ); | |
} | |
} | |
#------------------------------------------------------------------------------ | |
# ChkType (for Spreadsheet::ParseExcel::FmtDefault) | |
#------------------------------------------------------------------------------ | |
sub ChkType { | |
my ( $oPkg, $iNumeric, $iFmtIdx ) = @_; | |
if ($iNumeric) { | |
if ( ( ( $iFmtIdx >= 0x0E ) && ( $iFmtIdx <= 0x16 ) ) | |
|| ( ( $iFmtIdx >= 0x2D ) && ( $iFmtIdx <= 0x2F ) ) ) | |
{ | |
return "Date"; | |
} | |
else { | |
return "Numeric"; | |
} | |
} | |
else { | |
return "Text"; | |
} | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_FMTDEFAULT | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtJapan.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN'; | |
package Spreadsheet::ParseExcel::FmtJapan; | |
use utf8; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Encode qw(find_encoding decode); | |
use base 'Spreadsheet::ParseExcel::FmtDefault'; | |
our $VERSION = '0.65'; | |
my %FormatTable = ( | |
0x00 => 'General', | |
0x01 => '0', | |
0x02 => '0.00', | |
0x03 => '#,##0', | |
0x04 => '#,##0.00', | |
0x05 => '(\\#,##0_);(\\#,##0)', | |
0x06 => '(\\#,##0_);[Red](\\#,##0)', | |
0x07 => '(\\#,##0.00_);(\\#,##0.00_)', | |
0x08 => '(\\#,##0.00_);[Red](\\#,##0.00_)', | |
0x09 => '0%', | |
0x0A => '0.00%', | |
0x0B => '0.00E+00', | |
0x0C => '# ?/?', | |
0x0D => '# ??/??', | |
# 0x0E => 'm/d/yy', | |
0x0E => 'yyyy/m/d', | |
0x0F => 'd-mmm-yy', | |
0x10 => 'd-mmm', | |
0x11 => 'mmm-yy', | |
0x12 => 'h:mm AM/PM', | |
0x13 => 'h:mm:ss AM/PM', | |
0x14 => 'h:mm', | |
0x15 => 'h:mm:ss', | |
# 0x16 => 'm/d/yy h:mm', | |
0x16 => 'yyyy/m/d h:mm', | |
#0x17-0x24 -- Differs in Natinal | |
0x1E => 'm/d/yy', | |
0x1F => 'yyyy"年"m"月"d"日"', | |
0x20 => 'h"時"mm"分"', | |
0x21 => 'h"時"mm"分"ss"秒"', | |
#0x17-0x24 -- Differs in Natinal | |
0x25 => '(#,##0_);(#,##0)', | |
0x26 => '(#,##0_);[Red](#,##0)', | |
0x27 => '(#,##0.00);(#,##0.00)', | |
0x28 => '(#,##0.00);[Red](#,##0.00)', | |
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', | |
0x2A => '_(\\*#,##0_);_(\\*(#,##0);_(*"-"_);_(@_)', | |
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2C => '_(\\*#,##0.00_);_(\\*(#,##0.00);_(*"-"??_);_(@_)', | |
0x2D => 'mm:ss', | |
0x2E => '[h]:mm:ss', | |
0x2F => 'mm:ss.0', | |
0x30 => '##0.0E+0', | |
0x31 => '@', | |
0x37 => 'yyyy"年"m"月"', | |
0x38 => 'm"月"d"日"', | |
0x39 => 'ge.m.d', | |
0x3A => 'ggge"年"m"月"d"日"', | |
); | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $class, %args ) = @_; | |
my $encoding = $args{Code} || $args{encoding}; | |
my $self = { Code => $encoding }; | |
if($encoding){ | |
$self->{encoding} = find_encoding($encoding eq 'sjis' ? 'cp932' : $encoding) | |
or do{ | |
require Carp; | |
Carp::croak(qq{Unknown encoding '$encoding'}); | |
}; | |
} | |
return bless $self, $class; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $self, $text, $input_encoding ) = @_; | |
if(!defined $input_encoding){ | |
$input_encoding = 'utf8'; | |
} | |
elsif($input_encoding eq '_native_'){ | |
$input_encoding = 'cp932'; # Shift_JIS in Microsoft products | |
} | |
$text = decode($input_encoding, $text); | |
return $self->{Code} ? $self->{encoding}->encode($text) : $text; | |
} | |
#------------------------------------------------------------------------------ | |
# FmtStringDef (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
sub FmtStringDef { | |
my ( $self, $format_index, $book ) = @_; | |
return $self->SUPER::FmtStringDef( $format_index, $book, \%FormatTable ); | |
} | |
#------------------------------------------------------------------------------ | |
# CnvNengo (for Spreadsheet::ParseExcel::FmtJapan) | |
#------------------------------------------------------------------------------ | |
# Convert A.D. into Japanese Nengo (aka Gengo) | |
my @Nengo = ( | |
{ | |
name => '平成', # Heisei | |
abbr_name => 'H', | |
base => 1988, | |
start => 19890108, | |
}, | |
{ | |
name => '昭和', # Showa | |
abbr_name => 'S', | |
base => 1925, | |
start => 19261225, | |
}, | |
{ | |
name => '大正', # Taisho | |
abbr_name => 'T', | |
base => 1911, | |
start => 19120730, | |
}, | |
{ | |
name => '明治', # Meiji | |
abbr_name => 'M', | |
base => 1867, | |
start => 18680908, | |
}, | |
); | |
# Usage: CnvNengo(name => @tm) or CnvNeng(abbr_name => @tm) | |
sub CnvNengo { | |
my ( $kind, @tm ) = @_; | |
my $year = $tm[5] + 1900; | |
my $wk = ($year * 10000) + ($tm[4] * 100) + ($tm[3] * 1); | |
#my $wk = sprintf( '%04d%02d%02d', $year, $tm[4], $tm[3] ); | |
foreach my $nengo(@Nengo){ | |
if( $wk >= $nengo->{start} ){ | |
return $nengo->{$kind} . ($year - $nengo->{base}); | |
} | |
} | |
return $year; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtJapan2.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN2'; | |
package Spreadsheet::ParseExcel::FmtJapan2; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Jcode; | |
use Unicode::Map; | |
use base 'Spreadsheet::ParseExcel::FmtJapan'; | |
our $VERSION = '0.65'; | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtJapan2) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $sPkg, %hKey ) = @_; | |
my $oMap = Unicode::Map->new('CP932Excel'); | |
die "NO MAP FILE CP932Excel!!" | |
unless ( -r Unicode::Map->mapping("CP932Excel") ); | |
my $oThis = { | |
Code => $hKey{Code}, | |
_UniMap => $oMap, | |
}; | |
bless $oThis; | |
$oThis->SUPER::new(%hKey); | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan2) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $oThis, $sTxt, $sCode ) = @_; | |
# $sCode = 'sjis' if((! defined($sCode)) || ($sCode eq '_native_')); | |
if ( $oThis->{Code} ) { | |
if ( !defined($sCode) ) { | |
$sTxt =~ s/(.)/\x00$1/sg; | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
} | |
elsif ( $sCode eq 'ucs2' ) { | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
} | |
return Jcode::convert( $sTxt, $oThis->{Code}, 'sjis' ); | |
} | |
else { | |
return $sTxt; | |
} | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN2 | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtUnicode.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTUNICODE'; | |
package Spreadsheet::ParseExcel::FmtUnicode; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Unicode::Map; | |
use base 'Spreadsheet::ParseExcel::FmtDefault'; | |
our $VERSION = '0.65'; | |
#------------------------------------------------------------------------------ | |
# new (for Spreadsheet::ParseExcel::FmtUnicode) | |
#------------------------------------------------------------------------------ | |
sub new { | |
my ( $sPkg, %hKey ) = @_; | |
my $sMap = $hKey{Unicode_Map}; | |
my $oMap; | |
$oMap = Unicode::Map->new($sMap) if $sMap; | |
my $oThis = { | |
Unicode_Map => $sMap, | |
_UniMap => $oMap, | |
}; | |
bless $oThis; | |
return $oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# TextFmt (for Spreadsheet::ParseExcel::FmtUnicode) | |
#------------------------------------------------------------------------------ | |
sub TextFmt { | |
my ( $oThis, $sTxt, $sCode ) = @_; | |
if ( $oThis->{_UniMap} ) { | |
if ( !defined($sCode) ) { | |
my $sSv = $sTxt; | |
$sTxt =~ s/(.)/\x00$1/sg; | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
$sTxt = $sSv unless ($sTxt); | |
} | |
elsif ( $sCode eq 'ucs2' ) { | |
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt); | |
} | |
# $sTxt = $oThis->{_UniMap}->from_unicode($sTxt) | |
# if(defined($sCode) && $sCode eq 'ucs2'); | |
return $sTxt; | |
} | |
else { | |
return $sTxt; | |
} | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_FMTUNICODE | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Font.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FONT'; | |
package Spreadsheet::ParseExcel::Font; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Font - A class for Cell fonts. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
sub new { | |
my ( $class, %rhIni ) = @_; | |
my $self = \%rhIni; | |
bless $self, $class; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Font - A class for Cell fonts. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_FONT | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Format.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FORMAT'; | |
package Spreadsheet::ParseExcel::Format; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Format - A class for Cell formats. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
our $VERSION = '0.65'; | |
sub new { | |
my ( $class, %rhIni ) = @_; | |
my $self = \%rhIni; | |
bless $self, $class; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::Format - A class for Cell formats. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_FORMAT | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/SaveParser.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER'; | |
package Spreadsheet::ParseExcel::SaveParser; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use Spreadsheet::ParseExcel; | |
use Spreadsheet::ParseExcel::SaveParser::Workbook; | |
use Spreadsheet::ParseExcel::SaveParser::Worksheet; | |
use Spreadsheet::WriteExcel; | |
use base 'Spreadsheet::ParseExcel'; | |
our $VERSION = '0.65'; | |
############################################################################### | |
# | |
# new() | |
# | |
sub new { | |
my ( $package, %params ) = @_; | |
$package->SUPER::new(%params); | |
} | |
############################################################################### | |
# | |
# Create() | |
# | |
sub Create { | |
my ( $self, $formatter ) = @_; | |
#0. New $workbook | |
my $workbook = Spreadsheet::ParseExcel::Workbook->new(); | |
$workbook->{SheetCount} = 0; | |
# User specified formatter class. | |
if ($formatter) { | |
$workbook->{FmtClass} = $formatter; | |
} | |
else { | |
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new(); | |
} | |
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook); | |
} | |
############################################################################### | |
# | |
# Parse() | |
# | |
sub Parse { | |
my ( $self, $sFile, $formatter ) = @_; | |
my $workbook = $self->SUPER::Parse( $sFile, $formatter ); | |
return undef unless defined $workbook; | |
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook); | |
} | |
############################################################################### | |
# | |
# SaveAs() | |
# | |
sub SaveAs { | |
my ( $self, $workbook, $filename ) = @_; | |
$workbook->SaveAs($filename); | |
} | |
1; | |
__END__ | |
=head1 NAME | |
Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file. | |
=head1 SYNOPSIS | |
Say we start with an Excel file that looks like this: | |
----------------------------------------------------- | |
| | A | B | C | | |
----------------------------------------------------- | |
| 1 | Hello | ... | ... | ... | |
| 2 | World | ... | ... | ... | |
| 3 | *Bold text* | ... | ... | ... | |
| 4 | ... | ... | ... | ... | |
| 5 | ... | ... | ... | ... | |
Then we process it with the following program: | |
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Spreadsheet::ParseExcel; | |
use Spreadsheet::ParseExcel::SaveParser; | |
# Open an existing file with SaveParser | |
my $parser = Spreadsheet::ParseExcel::SaveParser->new(); | |
my $template = $parser->Parse('template.xls'); | |
# Get the first worksheet. | |
my $worksheet = $template->worksheet(0); | |
my $row = 0; | |
my $col = 0; | |
# Overwrite the string in cell A1 | |
$worksheet->AddCell( $row, $col, 'New string' ); | |
# Add a new string in cell B1 | |
$worksheet->AddCell( $row, $col + 1, 'Newer' ); | |
# Add a new string in cell C1 with the format from cell A3. | |
my $cell = $worksheet->get_cell( $row + 2, $col ); | |
my $format_number = $cell->{FormatNo}; | |
$worksheet->AddCell( $row, $col + 2, 'Newest', $format_number ); | |
# Write over the existing file or write a new file. | |
$template->SaveAs('newfile.xls'); | |
We should now have an Excel file that looks like this: | |
----------------------------------------------------- | |
| | A | B | C | | |
----------------------------------------------------- | |
| 1 | New string | Newer | *Newest* | ... | |
| 2 | World | ... | ... | ... | |
| 3 | *Bold text* | ... | ... | ... | |
| 4 | ... | ... | ... | ... | |
| 5 | ... | ... | ... | ... | |
=head1 DESCRIPTION | |
The C<Spreadsheet::ParseExcel::SaveParser> module rewrite an existing Excel file by reading it with C<Spreadsheet::ParseExcel> and rewriting it with C<Spreadsheet::WriteExcel>. | |
=head1 METHODS | |
=head1 Parser | |
=head2 new() | |
$parse = new Spreadsheet::ParseExcel::SaveParser(); | |
Constructor. | |
=head2 Parse() | |
$workbook = $parse->Parse($sFileName); | |
$workbook = $parse->Parse($sFileName , $formatter); | |
Returns a L</Workbook> object. If an error occurs, returns undef. | |
The optional C<$formatter> is a Formatter Class to format the value of cells. | |
=head1 Workbook | |
The C<Parse()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Workbook> object. | |
This is a subclass of the L<Spreadsheet::ParseExcel::Workbook> and has the following methods: | |
=head2 worksheets() | |
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook: | |
for my $worksheet ( $workbook->worksheets() ) { | |
... | |
} | |
=head2 worksheet() | |
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index: | |
$worksheet = $workbook->worksheet('Sheet1'); | |
$worksheet = $workbook->worksheet(0); | |
Returns C<undef> if the sheet name or index doesn't exist. | |
=head2 AddWorksheet() | |
$workbook = $workbook->AddWorksheet($name, %properties); | |
Create a new Worksheet object of type C<Spreadsheet::ParseExcel::Worksheet>. | |
The C<%properties> hash contains the properties of new Worksheet. | |
=head2 AddFont | |
$workbook = $workbook->AddFont(%properties); | |
Create new Font object of type C<Spreadsheet::ParseExcel::Font>. | |
The C<%properties> hash contains the properties of new Font. | |
=head2 AddFormat | |
$workbook = $workbook->AddFormat(%properties); | |
The C<%properties> hash contains the properties of new Font. | |
=head1 Worksheet | |
Spreadsheet::ParseExcel::SaveParser::Worksheet | |
Worksheet is a subclass of Spreadsheet::ParseExcel::Worksheet. | |
And has these methods : | |
The C<Worksbook::worksheet()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Worksheet> object. | |
This is a subclass of the L<Spreadsheet::ParseExcel::Worksheet> and has the following methods: | |
=head1 AddCell | |
$workbook = $worksheet->AddCell($row, $col, $value, $format [$encoding]); | |
Create new Cell object of type C<Spreadsheet::ParseExcel::Cell>. | |
The C<$format> parameter is the format number rather than a full format object. | |
To specify just same as another cell, | |
you can set it like below: | |
$row = 0; | |
$col = 0; | |
$worksheet = $template->worksheet(0); | |
$cell = $worksheet->get_cell( $row, $col ); | |
$format_number = $cell->{FormatNo}; | |
$worksheet->AddCell($row +1, $coll, 'New data', $format_number); | |
=head1 TODO | |
Please note that this module is currently (versions 0.50-0.60) undergoing a major | |
restructuring and rewriting. | |
=head1 Known Problems | |
You can only rewrite the features that Spreadsheet::WriteExcel supports so | |
macros, graphs and some other features in the original Excel file will be lost. | |
Also, formulas aren't rewritten, only the result of a formula is written. | |
Only last print area will remain. (Others will be removed) | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2002 Kawai Takanori and Nippon-RAD Co. OP Division | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/SaveParser/Workbook.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKBOOK'; | |
package Spreadsheet::ParseExcel::SaveParser::Workbook; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
use base 'Spreadsheet::ParseExcel::Workbook'; | |
our $VERSION = '0.65'; | |
#============================================================================== | |
# Spreadsheet::ParseExcel::SaveParser::Workbook | |
#============================================================================== | |
sub new { | |
my ( $sPkg, $oBook ) = @_; | |
return undef unless ( defined $oBook ); | |
my %oThis = %$oBook; | |
bless \%oThis, $sPkg; | |
# re-bless worksheets (and set their _Book properties !!!) | |
my $sWkP = ref($sPkg) || "$sPkg"; | |
$sWkP =~ s/Workbook$/Worksheet/; | |
map { bless( $_, $sWkP ); } @{ $oThis{Worksheet} }; | |
map { $_->{_Book} = \%oThis; } @{ $oThis{Worksheet} }; | |
return \%oThis; | |
} | |
#------------------------------------------------------------------------------ | |
# Parse (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub Parse { | |
my ( $sClass, $sFile, $oWkFmt ) = @_; | |
my $oBook = Spreadsheet::ParseExcel::Workbook->Parse( $sFile, $oWkFmt ); | |
bless $oBook, $sClass; | |
# re-bless worksheets (and set their _Book properties !!!) | |
my $sWkP = ref($sClass) || "$sClass"; | |
$sWkP =~ s/Workbook$/Worksheet/; | |
map { bless( $_, $sWkP ); } @{ $oBook->{Worksheet} }; | |
map { $_->{_Book} = $oBook; } @{ $oBook->{Worksheet} }; | |
return $oBook; | |
} | |
#------------------------------------------------------------------------------ | |
# SaveAs (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub SaveAs { | |
my ( $oBook, $sName ) = @_; | |
# Create a new Excel workbook | |
my $oWrEx = Spreadsheet::WriteExcel->new($sName); | |
$oWrEx->compatibility_mode(); | |
my %hFmt; | |
my $iNo = 0; | |
my @aAlH = ( | |
'left', 'left', 'center', 'right', | |
'fill', 'justify', 'merge', 'equal_space' | |
); | |
my @aAlV = ( 'top', 'vcenter', 'bottom', 'vjustify', 'vequal_space' ); | |
foreach my $pFmt ( @{ $oBook->{Format} } ) { | |
my $oFmt = $oWrEx->addformat(); # Add Formats | |
unless ( $pFmt->{Style} ) { | |
$hFmt{$iNo} = $oFmt; | |
my $rFont = $pFmt->{Font}; | |
$oFmt->set_font( $rFont->{Name} ); | |
$oFmt->set_size( $rFont->{Height} ); | |
$oFmt->set_color( $rFont->{Color} ); | |
$oFmt->set_bold( $rFont->{Bold} ); | |
$oFmt->set_italic( $rFont->{Italic} ); | |
$oFmt->set_underline( $rFont->{Underline} ); | |
$oFmt->set_font_strikeout( $rFont->{Strikeout} ); | |
$oFmt->set_font_script( $rFont->{Super} ); | |
$oFmt->set_hidden( $rFont->{Hidden} ); #Add | |
$oFmt->set_locked( $pFmt->{Lock} ); | |
$oFmt->set_align( $aAlH[ $pFmt->{AlignH} ] ); | |
$oFmt->set_align( $aAlV[ $pFmt->{AlignV} ] ); | |
$oFmt->set_rotation( $pFmt->{Rotate} ); | |
$oFmt->set_num_format( | |
$oBook->{FmtClass}->FmtStringDef( $pFmt->{FmtIdx}, $oBook ) ); | |
$oFmt->set_text_wrap( $pFmt->{Wrap} ); | |
$oFmt->set_pattern( $pFmt->{Fill}->[0] ); | |
$oFmt->set_fg_color( $pFmt->{Fill}->[1] ) | |
if ( ( $pFmt->{Fill}->[1] >= 8 ) | |
&& ( $pFmt->{Fill}->[1] <= 63 ) ); | |
$oFmt->set_bg_color( $pFmt->{Fill}->[2] ) | |
if ( ( $pFmt->{Fill}->[2] >= 8 ) | |
&& ( $pFmt->{Fill}->[2] <= 63 ) ); | |
$oFmt->set_left( | |
( $pFmt->{BdrStyle}->[0] > 7 ) ? 3 : $pFmt->{BdrStyle}->[0] ); | |
$oFmt->set_right( | |
( $pFmt->{BdrStyle}->[1] > 7 ) ? 3 : $pFmt->{BdrStyle}->[1] ); | |
$oFmt->set_top( | |
( $pFmt->{BdrStyle}->[2] > 7 ) ? 3 : $pFmt->{BdrStyle}->[2] ); | |
$oFmt->set_bottom( | |
( $pFmt->{BdrStyle}->[3] > 7 ) ? 3 : $pFmt->{BdrStyle}->[3] ); | |
$oFmt->set_left_color( $pFmt->{BdrColor}->[0] ) | |
if ( ( $pFmt->{BdrColor}->[0] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[0] <= 63 ) ); | |
$oFmt->set_right_color( $pFmt->{BdrColor}->[1] ) | |
if ( ( $pFmt->{BdrColor}->[1] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[1] <= 63 ) ); | |
$oFmt->set_top_color( $pFmt->{BdrColor}->[2] ) | |
if ( ( $pFmt->{BdrColor}->[2] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[2] <= 63 ) ); | |
$oFmt->set_bottom_color( $pFmt->{BdrColor}->[3] ) | |
if ( ( $pFmt->{BdrColor}->[3] >= 8 ) | |
&& ( $pFmt->{BdrColor}->[3] <= 63 ) ); | |
} | |
$iNo++; | |
} | |
for ( my $iSheet = 0 ; $iSheet < $oBook->{SheetCount} ; $iSheet++ ) { | |
my $oWkS = $oBook->{Worksheet}[$iSheet]; | |
my $oWrS = $oWrEx->addworksheet( $oWkS->{Name} ); | |
#Landscape | |
if ( !$oWkS->{Landscape} ) { # Landscape (0:Horizontal, 1:Vertical) | |
$oWrS->set_landscape(); | |
} | |
else { | |
$oWrS->set_portrait(); | |
} | |
#Protect | |
if ( defined $oWkS->{Protect} ) | |
{ # Protect ('':NoPassword, Password:Password) | |
if ( $oWkS->{Protect} ne '' ) { | |
$oWrS->protect( $oWkS->{Protect} ); | |
} | |
else { | |
$oWrS->protect(); | |
} | |
} | |
if ( $oWkS->{Scale} != 100 ) { | |
# Pages on fit with width and Heigt | |
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} ); | |
#Print Scale and reset FitWidth/FitHeight | |
$oWrS->set_print_scale( $oWkS->{Scale} ); | |
} | |
else { | |
#Print Scale | |
$oWrS->set_print_scale( $oWkS->{Scale} ); | |
# Pages on fit with width and Heigt | |
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} ); | |
} | |
# Paper Size | |
$oWrS->set_paper( $oWkS->{PaperSize} ); | |
# Margin | |
$oWrS->set_margin_left( $oWkS->{LeftMargin} ); | |
$oWrS->set_margin_right( $oWkS->{RightMargin} ); | |
$oWrS->set_margin_top( $oWkS->{TopMargin} ); | |
$oWrS->set_margin_bottom( $oWkS->{BottomMargin} ); | |
# HCenter | |
$oWrS->center_horizontally() if ( $oWkS->{HCenter} ); | |
# VCenter | |
$oWrS->center_vertically() if ( $oWkS->{VCenter} ); | |
# Header, Footer | |
$oWrS->set_header( $oWkS->{Header}, $oWkS->{HeaderMargin} ); | |
$oWrS->set_footer( $oWkS->{Footer}, $oWkS->{FooterMargin} ); | |
# Print Area | |
if ( ref( $oBook->{PrintArea}[$iSheet] ) eq 'ARRAY' ) { | |
my $raP; | |
for $raP ( @{ $oBook->{PrintArea}[$iSheet] } ) { | |
$oWrS->print_area(@$raP); | |
} | |
} | |
# Print Title | |
my $raW; | |
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Row} } ) { | |
$oWrS->repeat_rows(@$raW); | |
} | |
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Column} } ) { | |
$oWrS->repeat_columns(@$raW); | |
} | |
# Print Gridlines | |
if ( $oWkS->{PrintGrid} == 1 ) { | |
$oWrS->hide_gridlines(0); | |
} | |
else { | |
$oWrS->hide_gridlines(1); | |
} | |
# Print Headings | |
if ( $oWkS->{PrintHeaders} ) { | |
$oWrS->print_row_col_headers(); | |
} | |
# Horizontal Page Breaks | |
$oWrS->set_h_pagebreaks( @{ $oWkS->{HPageBreak} } ); | |
# Veritical Page Breaks | |
$oWrS->set_v_pagebreaks( @{ $oWkS->{VPageBreak} } ); | |
# PageStart => $oWkS->{PageStart}, # Page number for start | |
# UsePage => $oWkS->{UsePage}, # Use own start page number | |
# NoColor => $oWkS->{NoColor}, # Print in black-white | |
# Draft => $oWkS->{Draft}, # Print in draft mode | |
# Notes => $oWkS->{Notes}, # Print notes | |
# LeftToRight => $oWkS->{LeftToRight}, # Left to Right | |
for ( | |
my $iC = $oWkS->{MinCol} ; | |
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ; | |
$iC++ | |
) | |
{ | |
if ( defined $oWkS->{ColWidth}[$iC] ) { | |
if ( $oWkS->{ColWidth}[$iC] > 0 ) { | |
$oWrS->set_column( $iC, $iC, $oWkS->{ColWidth}[$iC] ) | |
; #, undef, 1) ; | |
} | |
else { | |
$oWrS->set_column( $iC, $iC, 0, undef, 1 ); | |
} | |
} | |
} | |
my $merged_areas = $oWkS->get_merged_areas(); | |
my $merged_areas_h = {}; | |
if ($merged_areas) { | |
foreach my $range (@$merged_areas) { | |
$merged_areas_h->{$range->[0]}{$range->[1]} = $range; | |
} | |
} | |
for ( | |
my $iR = $oWkS->{MinRow} ; | |
defined $oWkS->{MaxRow} && $iR <= $oWkS->{MaxRow} ; | |
$iR++ | |
) | |
{ | |
$oWrS->set_row( $iR, $oWkS->{RowHeight}[$iR] ); | |
for ( | |
my $iC = $oWkS->{MinCol} ; | |
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ; | |
$iC++ | |
) | |
{ | |
my $oWkC = $oWkS->{Cells}[$iR][$iC]; | |
if ($oWkC) { | |
if ( $oWkC->{Merged} and exists $merged_areas_h->{$iR}{$iC} ) { | |
my $oFmtN = $oWrEx->addformat(); | |
$oFmtN->copy( $hFmt{ $oWkC->{FormatNo} } ); | |
$oWrS->merge_range ( | |
@{$merged_areas_h->{$iR}{$iC}}, | |
$oBook->{FmtClass} | |
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ), | |
$oFmtN | |
); | |
} | |
else { | |
$oWrS->write( | |
$iR, | |
$iC, | |
$oBook->{FmtClass} | |
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ), | |
$hFmt{ $oWkC->{FormatNo} } | |
); | |
} | |
} | |
} | |
} | |
} | |
return $oWrEx; | |
} | |
#------------------------------------------------------------------------------ | |
# AddWorksheet (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddWorksheet { | |
my ( $oBook, $sName, %hAttr ) = @_; | |
$oBook->AddFormat if ( $#{ $oBook->{Format} } < 0 ); | |
$hAttr{Name} ||= $sName; | |
$hAttr{LeftMargin} ||= 0; | |
$hAttr{RightMargin} ||= 0; | |
$hAttr{TopMargin} ||= 0; | |
$hAttr{BottomMargin} ||= 0; | |
$hAttr{HeaderMargin} ||= 0; | |
$hAttr{FooterMargin} ||= 0; | |
$hAttr{FitWidth} ||= 0; | |
$hAttr{FitHeight} ||= 0; | |
$hAttr{PrintGrid} ||= 0; | |
my $oWkS = Spreadsheet::ParseExcel::SaveParser::Worksheet->new(%hAttr); | |
$oWkS->{_Book} = $oBook; | |
$oWkS->{_SheetNo} = $oBook->{SheetCount}; | |
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = $oWkS; | |
$oBook->{SheetCount}++; | |
return $oWkS; #$oBook->{SheetCount} - 1; | |
} | |
#------------------------------------------------------------------------------ | |
# AddFont (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddFont { | |
my ( $oBook, %hAttr ) = @_; | |
$hAttr{Name} ||= 'Arial'; | |
$hAttr{Height} ||= 10; | |
$hAttr{Bold} ||= 0; | |
$hAttr{Italic} ||= 0; | |
$hAttr{Underline} ||= 0; | |
$hAttr{Strikeout} ||= 0; | |
$hAttr{Super} ||= 0; | |
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new(%hAttr); | |
return $#{ $oBook->{Font} }; | |
} | |
#------------------------------------------------------------------------------ | |
# AddFormat (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddFormat { | |
my ( $oBook, %hAttr ) = @_; | |
$hAttr{Fill} ||= [ 0, 0, 0 ]; | |
$hAttr{BdrStyle} ||= [ 0, 0, 0, 0 ]; | |
$hAttr{BdrColor} ||= [ 0, 0, 0, 0 ]; | |
$hAttr{AlignH} ||= 0; | |
$hAttr{AlignV} ||= 0; | |
$hAttr{Rotate} ||= 0; | |
$hAttr{Landscape} ||= 0; | |
$hAttr{FmtIdx} ||= 0; | |
if ( !defined( $hAttr{Font} ) ) { | |
my $oFont; | |
if ( defined $hAttr{FontNo} ) { | |
$oFont = $oBook->{Font}[ $hAttr{FontNo} ]; | |
} | |
elsif ( !defined $oFont ) { | |
if ( $#{ $oBook->{Font} } >= 0 ) { | |
$oFont = $oBook->{Font}[0]; | |
} | |
else { | |
my $iNo = $oBook->AddFont; | |
$oFont = $oBook->{Font}[$iNo]; | |
} | |
} | |
$hAttr{Font} = $oFont; | |
} | |
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new(%hAttr); | |
return $#{ $oBook->{Format} }; | |
} | |
#------------------------------------------------------------------------------ | |
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Workbook) | |
#------------------------------------------------------------------------------ | |
sub AddCell { | |
my ( $oBook, $iSheet, $iR, $iC, $sVal, $oCell, $sCode ) = @_; | |
my %rhKey; | |
$oCell ||= $oBook->{Worksheet}[$iSheet] | |
->{Cells}[$iR][$iC]->{FormatNo} || 0; | |
my $iFmt = | |
( UNIVERSAL::isa( $oCell, 'Spreadsheet::ParseExcel::Cell' ) ) | |
? $oCell->{FormatNo} | |
: ( ref($oCell) ) ? 0 | |
: $oCell + 0; | |
$rhKey{FormatNo} = $iFmt; | |
$rhKey{Format} = $oBook->{Format}[$iFmt]; | |
$rhKey{Val} = $sVal; | |
$rhKey{Code} = $sCode || '_native_'; | |
$oBook->{_CurSheet} = $iSheet; | |
my $merged_areas = $oBook->{Worksheet}[$iSheet]->get_merged_areas(); | |
my $merged_areas_h = {}; | |
if ($merged_areas) { | |
foreach my $range (@$merged_areas) { | |
$merged_areas_h->{$range->[0]}{$range->[1]} = $range; | |
} | |
} | |
my $oNewCell = | |
Spreadsheet::ParseExcel::_NewCell( $oBook, $iR, $iC, %rhKey ); | |
Spreadsheet::ParseExcel::_SetDimension( $oBook, $iR, $iC, $iC ); | |
$oNewCell->{Merged} = 1 | |
if exists $merged_areas_h->{$iR}{$iC}; | |
return $oNewCell; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKBOOK | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/SaveParser/Worksheet.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKSHEET'; | |
package Spreadsheet::ParseExcel::SaveParser::Worksheet; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
#============================================================================== | |
# Spreadsheet::ParseExcel::SaveParser::Worksheet | |
#============================================================================== | |
use base 'Spreadsheet::ParseExcel::Worksheet'; | |
our $VERSION = '0.65'; | |
sub new { | |
my ( $sClass, %rhIni ) = @_; | |
$sClass->SUPER::new(%rhIni); # returns object | |
} | |
#------------------------------------------------------------------------------ | |
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Worksheet) | |
#------------------------------------------------------------------------------ | |
sub AddCell { | |
my ( $oSelf, $iR, $iC, $sVal, $oCell, $sCode ) = @_; | |
$oSelf->{_Book} | |
->AddCell( $oSelf->{_SheetNo}, $iR, $iC, $sVal, $oCell, $sCode ); | |
} | |
#------------------------------------------------------------------------------ | |
# Protect (for Spreadsheet::ParseExcel::SaveParser::Worksheet) | |
# - Password = undef -> No protect | |
# - Password = '' -> Protected. No password | |
# - Password = $pwd -> Protected. Password = $pwd | |
#------------------------------------------------------------------------------ | |
sub Protect { | |
my ( $oSelf, $sPassword ) = @_; | |
$oSelf->{Protect} = $sPassword; | |
} | |
1; | |
__END__ | |
=pod | |
=head1 NAME | |
Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets. | |
=head1 SYNOPSIS | |
See the documentation for Spreadsheet::ParseExcel. | |
=head1 DESCRIPTION | |
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel. | |
=head1 AUTHOR | |
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org | |
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org | |
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org | |
Original author: Kawai Takanori kwitknr@cpan.org | |
=head1 COPYRIGHT | |
Copyright (c) 2014 Douglas Wilson | |
Copyright (c) 2009-2013 John McNamara | |
Copyright (c) 2006-2008 Gabor Szabo | |
Copyright (c) 2000-2006 Kawai Takanori | |
All rights reserved. | |
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file. | |
=cut | |
SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKSHEET | |
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Utility.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_UTILITY'; | |
package Spreadsheet::ParseExcel::Utility; | |
############################################################################### | |
# | |
# Spreadsheet::ParseExcel::Utility - Utility functions for ParseExcel. | |
# | |
# Used in conjunction with Spreadsheet::ParseExcel. | |
# | |
# Copyright (c) 2014 Douglas Wilson | |
# Copyright (c) 2009-2013 John McNamara | |
# Copyright (c) 2006-2008 Gabor Szabo | |
# Copyright (c) 2000-2006 Kawai Takanori | |
# | |
# perltidy with standard settings. | |
# | |
# Documentation after __END__ | |
# | |
use strict; | |
use warnings; | |
require Exporter; | |
use vars qw(@ISA @EXPORT_OK); | |
@ISA = qw(Exporter); | |
@EXPORT_OK = qw(ExcelFmt LocaltimeExcel ExcelLocaltime | |
col2int int2col sheetRef xls2csv); | |
our $VERSION = '0.65'; | |
my $qrNUMBER = qr/(^[+-]?\d+(\.\d+)?$)|(^[+-]?\d+\.?(\d*)[eE][+-](\d+))$/; | |
############################################################################### | |
# | |
# ExcelFmt() | |
# | |
# This function takes an Excel style number format and converts a number into | |
# that format. for example: 'hh:mm:ss AM/PM' + 0.01023148 = '12:14:44 AM'. | |
# | |
# It does this with a type of templating mechanism. The format string is parsed | |
# to identify tokens that need to be replaced and their position within the | |
# string is recorded. These can be thought of as placeholders. The number is | |
# then converted to the required formats and substituted into the placeholders. | |
# | |
# Interested parties should refer to the Excel documentation on cell formats for | |
# more information: http://office.microsoft.com/en-us/excel/HP051995001033.aspx | |
# The Microsoft documentation for the Excel Binary File Format, [MS-XLS].pdf, | |
# also contains a ABNF grammar for number format strings. | |
# | |
# Maintainers notes: | |
# ================== | |
# | |
# Note on format subsections: | |
# A format string can contain 4 possible sub-sections separated by semi-colons: | |
# Positive numbers, negative numbers, zero values, and text. | |
# For example: _(* #,##0_);_(* (#,##0);_(* "-"_);_(@_) | |
# | |
# Note on conditional formats. | |
# A number format in Excel can have a conditional expression such as: | |
# [>9999999](000)000-0000;000-0000 | |
# This is equivalent to the following in Perl: | |
# $format = $number > 9999999 ? '(000)000-0000' : '000-0000'; | |
# Nested conditionals are also possible but we don't support them. | |
# | |
# Efficiency: The excessive use of substr() isn't very efficient. However, | |
# it probably doesn't merit rewriting this function with a parser or regular | |
# expressions and \G. | |
# | |
# TODO: I think the single quote handling may not be required. Check. | |
# | |
sub ExcelFmt { | |
my ( $format_str, $number, $is_1904, $number_type, $want_subformats ) = @_; | |
# Return text strings without further formatting. | |
return $number unless $number =~ $qrNUMBER; | |
# Handle OpenOffice.org GENERAL format. | |
$format_str = '@' if uc($format_str) eq "GENERAL"; | |
# Check for a conditional at the start of the format. See notes above. | |
my $conditional; | |
if ( $format_str =~ /^\[([<>=][^\]]+)\](.*)$/ ) { | |
$conditional = $1; | |
$format_str = $2; | |
} | |
# Ignore the underscore token which is used to indicate a padding space. | |
$format_str =~ s/_/ /g; | |
# Split the format string into 4 possible sub-sections: positive numbers, | |
# negative numbers, zero values, and text. See notes above. | |
my @formats; | |
my $section = 0; | |
my $double_quote = 0; | |
my $single_quote = 0; | |
# Initial parsing of the format string to remove escape characters. This | |
# also handles quoted strings. See note about single quotes above. | |
CHARACTER: | |
for my $char ( split //, $format_str ) { | |
if ( $double_quote or $single_quote ) { | |
$formats[$section] .= $char; | |
$double_quote = 0 if $char eq '"'; | |
$single_quote = 0; | |
next CHARACTER; | |
} | |
if ( $char eq ';' ) { | |
$section++; | |
next CHARACTER; | |
} | |
elsif ( $char eq '"' ) { | |
$double_quote = 1; | |
} | |
elsif ( $char eq '!' ) { | |
$single_quote = 1; | |
} | |
elsif ( $char eq '\\' ) { | |
$single_quote = 1; | |
} | |
elsif ( $char eq '(' ) { | |
next CHARACTER; # Ignore. | |
} | |
elsif ( $char eq ')' ) { | |
next CHARACTER; # Ignore. | |
} | |
# Convert upper case OpenOffice.org date/time formats to lowercase.. | |
$char = lc($char) if $char =~ /[DMYHS]/; | |
$formats[$section] .= $char; | |
} | |
# Select the appropriate format from the 4 possible sub-sections: | |
# positive numbers, negative numbers, zero values, and text. | |
# We ignore the Text section since non-numeric values are returned | |
# unformatted at the start of the function. | |
my $format; | |
$section = 0; | |
if ( @formats == 1 ) { | |
$section = 0; | |
} | |
elsif ( @formats == 2 ) { | |
if ( $number < 0 ) { | |
$section = 1; | |
} | |
else { | |
$section = 0; | |
} | |
} | |
elsif ( @formats == 3 ) { | |
if ( $number == 0 ) { | |
$section = 2; | |
} | |
elsif ( $number < 0 ) { | |
$section = 1; | |
} | |
else { | |
$section = 0; | |
} | |
} | |
else { | |
$section = 0; | |
} | |
# Override the previous choice if the format is conditional. | |
if ($conditional) { | |
# TODO. Replace string eval with a function. | |
$section = eval "$number $conditional" ? 0 : 1; | |
} | |
# We now have the required format. | |
$format = $formats[$section]; | |
# The format string can contain one of the following colours: | |
# [Black] [Blue] [Cyan] [Green] [Magenta] [Red] [White] [Yellow] | |
# or the string [ColorX] where x is a colour index from 1 to 56. | |
# We don't use the colour but we return it to the caller. | |
# | |
my $color = ''; | |
if ( $format =~ s/^(\[[A-Za-z]{3,}(\d{1,2})?\])// ) { | |
$color = $1; | |
} | |
# Remove the locale, such as [$-409], from the format string. | |
my $locale = ''; | |
if ( $format =~ s/^(\[\$?-F?\d+\])// ) { | |
$locale = $1; | |
} | |
# Replace currency locale, such as [$$-409], with $ in the format string. | |
# See the RT#60547 test cases in 21_number_format_user.t. | |
if ( $format =~ s/(\[\$([^-]+)(-\d+)?\])/$2/s ) { | |
$locale = $1; | |
} | |
# Remove leading # from '# ?/?', '# ??/??' fraction formats. | |
$format =~ s{# \?}{?}g; | |
# Parse the format string and create an AoA of placeholders that contain | |
# the parts of the string to be replaced. The format of the information | |
# stored is: [ $token, $start_pos, $end_pos, $option_info ]. | |
# | |
my $format_mode = ''; # Either: '', 'number', 'date' | |
my $pos = 0; # Character position within format string. | |
my @placeholders = (); # Arefs with parts of the format to be replaced. | |
my $token = ''; # The actual format extracted from the total str. | |
my $start_pos; # A position variable. Initial parser position. | |
my $token_start = -1; # A position variable. | |
my $decimal_pos = -1; # Position of the punctuation char "." or ",". | |
my $comma_count = 0; # Count of the commas in the format. | |
my $is_fraction = 0; # Number format is a fraction. | |
my $is_currency = 0; # Number format is a currency. | |
my $is_percent = 0; # Number format is a percentage. | |
my $is_12_hour = 0; # Time format is using 12 hour clock. | |
my $seen_dot = 0; # Treat only the first "." as the decimal point. | |
# Parse the format. | |
PARSER: | |
while ( $pos < length $format ) { | |
$start_pos = $pos; | |
my $char = substr( $format, $pos, 1 ); | |
# Ignore control format characters such as '#0+-.?eE,%'. However, | |
# only ignore '.' if it is the first one encountered. RT 45502. | |
if ( ( !$seen_dot && $char !~ /[#0\+\-\.\?eE\,\%]/ ) | |
|| $char !~ /[#0\+\-\?eE\,\%]/ ) | |
{ | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start ), | |
$decimal_pos, $pos - $token_start | |
]; | |
$token_start = -1; | |
} | |
} | |
# Processing for quoted strings within the format. See notes above. | |
if ( $char eq '"' ) { | |
$double_quote = $double_quote ? 0 : 1; | |
$pos++; | |
next PARSER; | |
} | |
elsif ( $char eq '!' ) { | |
$single_quote = 1; | |
$pos++; | |
next PARSER; | |
} | |
elsif ( $char eq '\\' ) { | |
if ( $single_quote != 1 ) { | |
$single_quote = 1; | |
$pos++; | |
next PARSER; | |
} | |
} | |
if ( ( defined($double_quote) and ($double_quote) ) | |
or ( defined($single_quote) and ($single_quote) ) | |
or ( $seen_dot && $char eq '.' ) ) | |
{ | |
$single_quote = 0; | |
if ( | |
( $format_mode ne 'date' ) | |
and ( ( substr( $format, $pos, 2 ) eq "\x81\xA2" ) | |
|| ( substr( $format, $pos, 2 ) eq "\x81\xA3" ) | |
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA4" ) | |
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA5" ) ) | |
) | |
{ | |
# The above matches are currency symbols. | |
push @placeholders, | |
[ substr( $format, $pos, 2 ), length($token), 2 ]; | |
$is_currency = 1; | |
$pos += 2; | |
} | |
else { | |
$pos++; | |
} | |
} | |
elsif ( | |
( $char =~ /[#0\+\.\?eE\,\%]/ ) | |
|| ( ( $format_mode ne 'date' ) | |
and ( ( $char eq '-' ) || ( $char eq '(' ) || ( $char eq ')' ) ) | |
) | |
) | |
{ | |
$format_mode = 'number' unless $format_mode; | |
if ( substr( $format, $pos, 1 ) =~ /[#0]/ ) { | |
if ( | |
substr( $format, $pos ) =~ | |
/^([#0]+[\.]?[0#]*[eE][\+\-][0#]+)/ ) | |
{ | |
push @placeholders, [ $1, $pos, length($1) ]; | |
$pos += length($1); | |
} | |
else { | |
if ( $token_start == -1 ) { | |
$token_start = $pos; | |
$decimal_pos = length($token); | |
} | |
} | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '?' ) { | |
# Look for a fraction format like ?/? or ??/?? | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( | |
$format, $token_start, $pos - $token_start + 1 | |
), | |
$decimal_pos, | |
$pos - $token_start + 1 | |
]; | |
} | |
$token_start = $pos; | |
# Find the end of the fraction format. | |
FRACTION: | |
while ( $pos < length($format) ) { | |
if ( substr( $format, $pos, 1 ) eq '/' ) { | |
$is_fraction = 1; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '?' ) { | |
$pos++; | |
next FRACTION; | |
} | |
else { | |
if ( $is_fraction | |
&& ( substr( $format, $pos, 1 ) =~ /[0-9]/ ) ) | |
{ | |
# TODO: Could invert if() logic and remove this. | |
$pos++; | |
next FRACTION; | |
} | |
else { | |
last FRACTION; | |
} | |
} | |
$pos++; | |
} | |
$pos--; | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start + 1 ), | |
length($token), $pos - $token_start + 1 | |
]; | |
$token_start = -1; | |
} | |
elsif ( substr( $format, $pos, 3 ) =~ /^[eE][\+\-][0#]$/ ) { | |
if ( substr( $format, $pos ) =~ /([eE][\+\-][0#]+)/ ) { | |
push @placeholders, [ $1, $pos, length($1) ]; | |
$pos += length($1); | |
} | |
$token_start = -1; | |
} | |
else { | |
if ( $token_start != -1 ) { | |
push @placeholders, | |
[ | |
substr( $format, $token_start, $pos - $token_start ), | |
$decimal_pos, $pos - $token_start | |
]; | |
$token_start = -1; | |
} | |
if ( substr( $format, $pos, 1 ) =~ /[\+\-]/ ) { | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
$is_currency = 1; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '.' ) { | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
$seen_dot = 1; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq ',' ) { | |
$comma_count++; | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
} | |
elsif ( substr( $format, $pos, 1 ) eq '%' ) { | |
$is_percent = 1; | |
} | |
elsif (( substr( $format, $pos, 1 ) eq '(' ) | |
|| ( substr( $format, $pos, 1 ) eq ')' ) ) | |
{ | |
push @placeholders, | |
[ substr( $format, $pos, 1 ), length($token), 1 ]; | |
$is_currency = 1; | |
} | |
} | |
$pos++; | |
} | |
elsif ( $char =~ /[ymdhsapg]/i ) { | |
$format_mode = 'date' unless $format_mode; | |
if ( substr( $format, $pos, 5 ) =~ /am\/pm/i ) { | |
push @placeholders, [ 'am/pm', length($token), 5 ]; | |
$is_12_hour = 1; | |
$pos += 5; | |
} | |
elsif ( substr( $format, $pos, 3 ) =~ /a\/p/i ) { | |
push @placeholders, [ 'a/p', length($token), 3 ]; | |
$is_12_hour = 1; | |
$pos += 3; | |
} | |
elsif ( substr( $format, $pos, 5 ) eq 'mmmmm' ) { | |
push @placeholders, [ 'mmmmm', length($token), 5 ]; | |
$pos += 5; | |
} | |
elsif (( substr( $format, $pos, 4 ) eq 'mmmm' ) | |
|| ( substr( $format, $pos, 4 ) eq 'dddd' ) | |
|| ( substr( $format, $pos, 4 ) eq 'yyyy' ) | |
|| ( substr( $format, $pos, 4 ) eq 'ggge' ) ) | |
{ | |
push @placeholders, | |
[ substr( $format, $pos, 4 ), length($token), 4 ]; | |
$pos += 4; | |
} | |
elsif (( substr( $format, $pos, 3 ) eq 'ddd' ) | |
|| ( substr( $format, $pos, 3 ) eq 'mmm' ) | |
|| ( substr( $format, $pos, 3 ) eq 'yyy' ) ) | |
{ | |
push @placeholders, | |
[ substr( $format, $pos, 3 ), length($token), 3 ]; | |
$pos += 3; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment