Skip to content

Instantly share code, notes, and snippets.

@Konfekt
Last active September 15, 2021 06:25
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save Konfekt/0f311b7abf4ddccd823954f7cf790b72 to your computer and use it in GitHub Desktop.
self-contained version of git-xlsx-textconv.pl for use in Git under Windows
This file has been truncated, but you can view the full file.
#!/usr/bin/env perl
# Self-contained version of git-xlsx-textconv.pl from
# https://github.com/yappo/p5-git-xlsx-textconv.pl
# for use in Git under Windows.
# created by [fatpacker](https://www.perladvent.org/2012/2012-12-14.html).
# In contrast xls2csv from https://github.com/xevo/xls2csv cannot be made
# self-contained by fatpacker because of binary dependencies (such as Iconv).
# This chunk of stuff was generated by App::FatPacker. To find the original
# file's code, look for the end of this BEGIN block or the string 'FATPACK'
BEGIN {
my %fatpacked;
$fatpacked{"Archive/Zip.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP';
package Archive::Zip;
use 5.006;
use strict;
use Carp ();
use Cwd ();
use IO::File ();
use IO::Seekable ();
use Compress::Raw::Zlib ();
use File::Spec ();
use File::Temp ();
use FileHandle ();
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
require Exporter;
@ISA = qw( Exporter );
}
use vars qw( $ChunkSize $ErrorHandler );
BEGIN {
# This is the size we'll try to read, write, and (de)compress.
# You could set it to something different if you had lots of memory
# and needed more speed.
$ChunkSize ||= 32768;
$ErrorHandler = \&Carp::carp;
}
# BEGIN block is necessary here so that other modules can use the constants.
use vars qw( @EXPORT_OK %EXPORT_TAGS );
BEGIN {
@EXPORT_OK = ('computeCRC32');
%EXPORT_TAGS = (
CONSTANTS => [
qw(
ZIP64_SUPPORTED
FA_MSDOS
FA_UNIX
GPBF_ENCRYPTED_MASK
GPBF_DEFLATING_COMPRESSION_MASK
GPBF_HAS_DATA_DESCRIPTOR_MASK
COMPRESSION_STORED
COMPRESSION_DEFLATED
COMPRESSION_LEVEL_NONE
COMPRESSION_LEVEL_DEFAULT
COMPRESSION_LEVEL_FASTEST
COMPRESSION_LEVEL_BEST_COMPRESSION
IFA_TEXT_FILE_MASK
IFA_TEXT_FILE
IFA_BINARY_FILE
ZIP64_AS_NEEDED
ZIP64_EOCD
ZIP64_HEADERS
)
],
MISC_CONSTANTS => [
qw(
FA_AMIGA
FA_VAX_VMS
FA_VM_CMS
FA_ATARI_ST
FA_OS2_HPFS
FA_MACINTOSH
FA_Z_SYSTEM
FA_CPM
FA_TOPS20
FA_WINDOWS_NTFS
FA_QDOS
FA_ACORN
FA_VFAT
FA_MVS
FA_BEOS
FA_TANDEM
FA_THEOS
GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK
GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK
GPBF_IS_COMPRESSED_PATCHED_DATA_MASK
COMPRESSION_SHRUNK
DEFLATING_COMPRESSION_NORMAL
DEFLATING_COMPRESSION_MAXIMUM
DEFLATING_COMPRESSION_FAST
DEFLATING_COMPRESSION_SUPER_FAST
COMPRESSION_REDUCED_1
COMPRESSION_REDUCED_2
COMPRESSION_REDUCED_3
COMPRESSION_REDUCED_4
COMPRESSION_IMPLODED
COMPRESSION_TOKENIZED
COMPRESSION_DEFLATED_ENHANCED
COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED
)
],
ERROR_CODES => [
qw(
AZ_OK
AZ_STREAM_END
AZ_ERROR
AZ_FORMAT_ERROR
AZ_IO_ERROR
)
],
# For Internal Use Only
PKZIP_CONSTANTS => [
qw(
SIGNATURE_FORMAT
SIGNATURE_LENGTH
LOCAL_FILE_HEADER_SIGNATURE
LOCAL_FILE_HEADER_FORMAT
LOCAL_FILE_HEADER_LENGTH
DATA_DESCRIPTOR_SIGNATURE
DATA_DESCRIPTOR_FORMAT
DATA_DESCRIPTOR_LENGTH
DATA_DESCRIPTOR_ZIP64_FORMAT
DATA_DESCRIPTOR_ZIP64_LENGTH
DATA_DESCRIPTOR_FORMAT_NO_SIG
DATA_DESCRIPTOR_LENGTH_NO_SIG
DATA_DESCRIPTOR_ZIP64_FORMAT_NO_SIG
DATA_DESCRIPTOR_ZIP64_LENGTH_NO_SIG
CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE
CENTRAL_DIRECTORY_FILE_HEADER_FORMAT
CENTRAL_DIRECTORY_FILE_HEADER_LENGTH
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH
END_OF_CENTRAL_DIRECTORY_SIGNATURE
END_OF_CENTRAL_DIRECTORY_FORMAT
END_OF_CENTRAL_DIRECTORY_LENGTH
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE_STRING
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE_STRING
END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING
)
],
# For Internal Use Only
UTILITY_METHODS => [
qw(
_error
_printError
_ioError
_formatError
_zip64NotSupported
_subclassResponsibility
_binmode
_isSeekable
_newFileHandle
_readSignature
_asZipDirName
)
],
);
# Add all the constant names and error code names to @EXPORT_OK
Exporter::export_ok_tags(
qw(
CONSTANTS
ERROR_CODES
PKZIP_CONSTANTS
UTILITY_METHODS
MISC_CONSTANTS
));
}
# Zip64 format support status
use constant ZIP64_SUPPORTED => !! eval { pack("Q<", 1) };
# Error codes
use constant AZ_OK => 0;
use constant AZ_STREAM_END => 1;
use constant AZ_ERROR => 2;
use constant AZ_FORMAT_ERROR => 3;
use constant AZ_IO_ERROR => 4;
# File types
# Values of Archive::Zip::Member->fileAttributeFormat()
use constant FA_MSDOS => 0;
use constant FA_AMIGA => 1;
use constant FA_VAX_VMS => 2;
use constant FA_UNIX => 3;
use constant FA_VM_CMS => 4;
use constant FA_ATARI_ST => 5;
use constant FA_OS2_HPFS => 6;
use constant FA_MACINTOSH => 7;
use constant FA_Z_SYSTEM => 8;
use constant FA_CPM => 9;
use constant FA_TOPS20 => 10;
use constant FA_WINDOWS_NTFS => 11;
use constant FA_QDOS => 12;
use constant FA_ACORN => 13;
use constant FA_VFAT => 14;
use constant FA_MVS => 15;
use constant FA_BEOS => 16;
use constant FA_TANDEM => 17;
use constant FA_THEOS => 18;
# general-purpose bit flag masks
# Found in Archive::Zip::Member->bitFlag()
use constant GPBF_ENCRYPTED_MASK => 1 << 0;
use constant GPBF_DEFLATING_COMPRESSION_MASK => 3 << 1;
use constant GPBF_HAS_DATA_DESCRIPTOR_MASK => 1 << 3;
# deflating compression types, if compressionMethod == COMPRESSION_DEFLATED
# ( Archive::Zip::Member->bitFlag() & GPBF_DEFLATING_COMPRESSION_MASK )
use constant DEFLATING_COMPRESSION_NORMAL => 0 << 1;
use constant DEFLATING_COMPRESSION_MAXIMUM => 1 << 1;
use constant DEFLATING_COMPRESSION_FAST => 2 << 1;
use constant DEFLATING_COMPRESSION_SUPER_FAST => 3 << 1;
# compression method
# these two are the only ones supported in this module
use constant COMPRESSION_STORED => 0; # file is stored (no compression)
use constant COMPRESSION_DEFLATED => 8; # file is Deflated
use constant COMPRESSION_LEVEL_NONE => 0;
use constant COMPRESSION_LEVEL_DEFAULT => -1;
use constant COMPRESSION_LEVEL_FASTEST => 1;
use constant COMPRESSION_LEVEL_BEST_COMPRESSION => 9;
# internal file attribute bits
# Found in Archive::Zip::Member::internalFileAttributes()
use constant IFA_TEXT_FILE_MASK => 1;
use constant IFA_TEXT_FILE => 1;
use constant IFA_BINARY_FILE => 0;
# desired zip64 structures for archive creation
use constant ZIP64_AS_NEEDED => 0;
use constant ZIP64_EOCD => 1;
use constant ZIP64_HEADERS => 2;
# PKZIP file format miscellaneous constants (for internal use only)
use constant SIGNATURE_FORMAT => "V";
use constant SIGNATURE_LENGTH => 4;
# these lengths are without the signature.
use constant LOCAL_FILE_HEADER_SIGNATURE => 0x04034b50;
use constant LOCAL_FILE_HEADER_FORMAT => "v3 V4 v2";
use constant LOCAL_FILE_HEADER_LENGTH => 26;
# PKZIP docs don't mention the signature, but Info-Zip writes it.
use constant DATA_DESCRIPTOR_SIGNATURE => 0x08074b50;
use constant DATA_DESCRIPTOR_FORMAT => "V3";
use constant DATA_DESCRIPTOR_LENGTH => 12;
use constant DATA_DESCRIPTOR_ZIP64_FORMAT => "L< Q<2";
use constant DATA_DESCRIPTOR_ZIP64_LENGTH => 20;
# but the signature is apparently optional.
use constant DATA_DESCRIPTOR_FORMAT_NO_SIG => "V2";
use constant DATA_DESCRIPTOR_LENGTH_NO_SIG => 8;
use constant DATA_DESCRIPTOR_ZIP64_FORMAT_NO_SIG => "Q<2";
use constant DATA_DESCRIPTOR_ZIP64_LENGTH_NO_SIG => 16;
use constant CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE => 0x02014b50;
use constant CENTRAL_DIRECTORY_FILE_HEADER_FORMAT => "C2 v3 V4 v5 V2";
use constant CENTRAL_DIRECTORY_FILE_HEADER_LENGTH => 42;
# zip64 support
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE => 0x06064b50;
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE_STRING =>
pack("V", ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE);
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT => "Q< S<2 L<2 Q<4";
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH => 52;
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE => 0x07064b50;
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE_STRING =>
pack("V", ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE);
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT => "L< Q< L<";
use constant ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH => 16;
use constant END_OF_CENTRAL_DIRECTORY_SIGNATURE => 0x06054b50;
use constant END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING =>
pack("V", END_OF_CENTRAL_DIRECTORY_SIGNATURE);
use constant END_OF_CENTRAL_DIRECTORY_FORMAT => "v4 V2 v";
use constant END_OF_CENTRAL_DIRECTORY_LENGTH => 18;
use constant GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK => 1 << 1;
use constant GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK => 1 << 2;
use constant GPBF_IS_COMPRESSED_PATCHED_DATA_MASK => 1 << 5;
# the rest of these are not supported in this module
use constant COMPRESSION_SHRUNK => 1; # file is Shrunk
use constant COMPRESSION_REDUCED_1 => 2; # file is Reduced CF=1
use constant COMPRESSION_REDUCED_2 => 3; # file is Reduced CF=2
use constant COMPRESSION_REDUCED_3 => 4; # file is Reduced CF=3
use constant COMPRESSION_REDUCED_4 => 5; # file is Reduced CF=4
use constant COMPRESSION_IMPLODED => 6; # file is Imploded
use constant COMPRESSION_TOKENIZED => 7; # reserved for Tokenizing compr.
use constant COMPRESSION_DEFLATED_ENHANCED => 9; # reserved for enh. Deflating
use constant COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED => 10;
# Load the various required classes
require Archive::Zip::Archive;
require Archive::Zip::Member;
require Archive::Zip::FileMember;
require Archive::Zip::DirectoryMember;
require Archive::Zip::ZipFileMember;
require Archive::Zip::NewFileMember;
require Archive::Zip::StringMember;
# Convenience functions
sub _ISA ($$) {
# Can't rely on Scalar::Util, so use the next best way
local $@;
!!eval { ref $_[0] and $_[0]->isa($_[1]) };
}
sub _CAN ($$) {
local $@;
!!eval { ref $_[0] and $_[0]->can($_[1]) };
}
#####################################################################
# Methods
sub new {
my $class = shift;
return Archive::Zip::Archive->new(@_);
}
sub computeCRC32 {
my ($data, $crc);
if (ref($_[0]) eq 'HASH') {
$data = $_[0]->{string};
$crc = $_[0]->{checksum};
} else {
$data = shift;
$data = shift if ref($data);
$crc = shift;
}
return Compress::Raw::Zlib::crc32($data, $crc);
}
# Report or change chunk size used for reading and writing.
# Also sets Zlib's default buffer size (eventually).
sub setChunkSize {
shift if ref($_[0]) eq 'Archive::Zip::Archive';
my $chunkSize = (ref($_[0]) eq 'HASH') ? shift->{chunkSize} : shift;
my $oldChunkSize = $Archive::Zip::ChunkSize;
$Archive::Zip::ChunkSize = $chunkSize if ($chunkSize);
return $oldChunkSize;
}
sub chunkSize {
return $Archive::Zip::ChunkSize;
}
sub setErrorHandler {
my $errorHandler = (ref($_[0]) eq 'HASH') ? shift->{subroutine} : shift;
$errorHandler = \&Carp::carp unless defined($errorHandler);
my $oldErrorHandler = $Archive::Zip::ErrorHandler;
$Archive::Zip::ErrorHandler = $errorHandler;
return $oldErrorHandler;
}
######################################################################
# Private utility functions (not methods).
sub _printError {
my $string = join(' ', @_, "\n");
my $oldCarpLevel = $Carp::CarpLevel;
$Carp::CarpLevel += 2;
&{$ErrorHandler}($string);
$Carp::CarpLevel = $oldCarpLevel;
}
# This is called on format errors.
sub _formatError {
shift if ref($_[0]);
_printError('format error:', @_);
return AZ_FORMAT_ERROR;
}
# This is called on IO errors.
sub _ioError {
shift if ref($_[0]);
_printError('IO error:', @_, ':', $!);
return AZ_IO_ERROR;
}
# This is called on generic errors.
sub _error {
shift if ref($_[0]);
_printError('error:', @_);
return AZ_ERROR;
}
# This is called if zip64 format is not supported but would be
# required.
sub _zip64NotSupported {
shift if ref($_[0]);
_printError('zip64 format not supported on this Perl interpreter');
return AZ_ERROR;
}
# Called when a subclass should have implemented
# something but didn't
sub _subclassResponsibility {
Carp::croak("subclass Responsibility\n");
}
# Try to set the given file handle or object into binary mode.
sub _binmode {
my $fh = shift;
return _CAN($fh, 'binmode') ? $fh->binmode() : binmode($fh);
}
# Attempt to guess whether file handle is seekable.
# Because of problems with Windows, this only returns true when
# the file handle is a real file.
sub _isSeekable {
my $fh = shift;
return 0 unless ref $fh;
_ISA($fh, "IO::Scalar") # IO::Scalar objects are brokenly-seekable
and return 0;
_ISA($fh, "IO::String")
and return 1;
if (_ISA($fh, "IO::Seekable")) {
# Unfortunately, some things like FileHandle objects
# return true for Seekable, but AREN'T!!!!!
_ISA($fh, "FileHandle")
and return 0;
return 1;
}
# open my $fh, "+<", \$data;
ref $fh eq "GLOB" && eval { seek $fh, 0, 1 } and return 1;
_CAN($fh, "stat")
and return -f $fh;
return (_CAN($fh, "seek") and _CAN($fh, "tell")) ? 1 : 0;
}
# Print to the filehandle, while making sure the pesky Perl special global
# variables don't interfere.
sub _print {
my ($self, $fh, @data) = @_;
local $\;
return $fh->print(@data);
}
# Return an opened IO::Handle
# my ( $status, fh ) = _newFileHandle( 'fileName', 'w' );
# Can take a filename, file handle, or ref to GLOB
# Or, if given something that is a ref but not an IO::Handle,
# passes back the same thing.
sub _newFileHandle {
my $fd = shift;
my $status = 1;
my $handle;
if (ref($fd)) {
if (_ISA($fd, 'IO::Scalar') or _ISA($fd, 'IO::String')) {
$handle = $fd;
} elsif (_ISA($fd, 'IO::Handle') or ref($fd) eq 'GLOB') {
$handle = IO::File->new;
$status = $handle->fdopen($fd, @_);
} else {
$handle = $fd;
}
} else {
$handle = IO::File->new;
$status = $handle->open($fd, @_);
}
return ($status, $handle);
}
# Returns next signature from given file handle, leaves
# file handle positioned afterwards.
#
# In list context, returns ($status, $signature)
# ( $status, $signature ) = _readSignature( $fh, $fileName );
#
# This function returns one of AZ_OK, AZ_IO_ERROR, or
# AZ_FORMAT_ERROR and calls the respective error handlers in the
# latter two cases. If optional $noFormatError is true, it does
# not call the error handler on format error, but only returns
# AZ_FORMAT_ERROR.
sub _readSignature {
my $fh = shift;
my $fileName = shift;
my $expectedSignature = shift; # optional
my $noFormatError = shift; # optional
my $signatureData;
my $bytesRead = $fh->read($signatureData, SIGNATURE_LENGTH);
if ($bytesRead != SIGNATURE_LENGTH) {
return _ioError("reading header signature");
}
my $signature = unpack(SIGNATURE_FORMAT, $signatureData);
my $status = AZ_OK;
# compare with expected signature, if any, or any known signature.
if (
(defined($expectedSignature) && $signature != $expectedSignature)
|| ( !defined($expectedSignature)
&& $signature != CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE
&& $signature != LOCAL_FILE_HEADER_SIGNATURE
&& $signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE
&& $signature != DATA_DESCRIPTOR_SIGNATURE
&& $signature != ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE
&& $signature != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE
)
) {
if (! $noFormatError ) {
my $errmsg = sprintf("bad signature: 0x%08x", $signature);
if (_isSeekable($fh)) {
$errmsg .= sprintf(" at offset %d", $fh->tell() - SIGNATURE_LENGTH);
}
$status = _formatError("$errmsg in file $fileName");
}
else {
$status = AZ_FORMAT_ERROR;
}
}
return ($status, $signature);
}
# Utility method to make and open a temp file.
# Will create $temp_dir if it does not exist.
# Returns file handle and name:
#
# my ($fh, $name) = Archive::Zip::tempFile();
# my ($fh, $name) = Archive::Zip::tempFile('mytempdir');
#
sub tempFile {
my $dir = (ref($_[0]) eq 'HASH') ? shift->{tempDir} : shift;
my ($fh, $filename) = File::Temp::tempfile(
SUFFIX => '.zip',
UNLINK => 1,
$dir ? (DIR => $dir) : ());
return (undef, undef) unless $fh;
my ($status, $newfh) = _newFileHandle($fh, 'w+');
$fh->close();
return ($newfh, $filename);
}
# Return the normalized directory name as used in a zip file (path
# separators become slashes, etc.).
# Will translate internal slashes in path components (i.e. on Macs) to
# underscores. Discards volume names.
# When $forceDir is set, returns paths with trailing slashes (or arrays
# with trailing blank members).
#
# If third argument is a reference, returns volume information there.
#
# input output
# . ('.') '.'
# ./a ('a') a
# ./a/b ('a','b') a/b
# ./a/b/ ('a','b') a/b
# a/b/ ('a','b') a/b
# /a/b/ ('','a','b') a/b
# c:\a\b\c.doc ('','a','b','c.doc') a/b/c.doc # on Windows
# "i/o maps:whatever" ('i_o maps', 'whatever') "i_o maps/whatever" # on Macs
sub _asZipDirName {
my $name = shift;
my $forceDir = shift;
my $volReturn = shift;
my ($volume, $directories, $file) =
File::Spec->splitpath(File::Spec->canonpath($name), $forceDir);
$$volReturn = $volume if (ref($volReturn));
my @dirs = map { $_ =~ y{/}{_}; $_ } File::Spec->splitdir($directories);
if (@dirs > 0) { pop(@dirs) unless $dirs[-1] } # remove empty component
push(@dirs, defined($file) ? $file : '');
#return wantarray ? @dirs : join ( '/', @dirs );
my $normalised_path = join '/', @dirs;
# Leading directory separators should not be stored in zip archives.
# Example:
# C:\a\b\c\ a/b/c
# C:\a\b\c.txt a/b/c.txt
# /a/b/c/ a/b/c
# /a/b/c.txt a/b/c.txt
$normalised_path =~ s{^/}{}; # remove leading separator
return $normalised_path;
}
# Return an absolute local name for a zip name.
# Assume a directory if zip name has trailing slash.
# Takes an optional volume name in FS format (like 'a:').
#
sub _asLocalName {
my $name = shift; # zip format
my $volume = shift;
$volume = '' unless defined($volume); # local FS format
my @paths = split(/\//, $name);
my $filename = pop(@paths);
$filename = '' unless defined($filename);
my $localDirs = @paths ? File::Spec->catdir(@paths) : '';
my $localName = File::Spec->catpath($volume, $localDirs, $filename);
unless ($volume) {
$localName = File::Spec->rel2abs($localName, Cwd::getcwd());
}
return $localName;
}
1;
__END__
=pod
=encoding utf8
=head1 NAME
Archive::Zip - Provide an interface to ZIP archive files.
=head1 SYNOPSIS
# Create a Zip file
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
my $zip = Archive::Zip->new();
# Add a directory
my $dir_member = $zip->addDirectory( 'dirname/' );
# Add a file from a string with compression
my $string_member = $zip->addString( 'This is a test', 'stringMember.txt' );
$string_member->desiredCompressionMethod( COMPRESSION_DEFLATED );
# Add a file from disk
my $file_member = $zip->addFile( 'xyz.pl', 'AnotherName.pl' );
# Save the Zip file
unless ( $zip->writeToFileNamed('someZip.zip') == AZ_OK ) {
die 'write error';
}
# Read a Zip file
my $somezip = Archive::Zip->new();
unless ( $somezip->read( 'someZip.zip' ) == AZ_OK ) {
die 'read error';
}
# Change the compression type for a file in the Zip
my $member = $somezip->memberNamed( 'stringMember.txt' );
$member->desiredCompressionMethod( COMPRESSION_STORED );
unless ( $zip->writeToFileNamed( 'someOtherZip.zip' ) == AZ_OK ) {
die 'write error';
}
=head1 DESCRIPTION
The Archive::Zip module allows a Perl program to create, manipulate, read,
and write Zip archive files.
Zip archives can be created, or you can read from existing zip files.
Once created, they can be written to files, streams, or strings. Members
can be added, removed, extracted, replaced, rearranged, and enumerated.
They can also be renamed or have their dates, comments, or other attributes
queried or modified. Their data can be compressed or uncompressed as needed.
Members can be created from members in existing Zip files, or from existing
directories, files, or strings.
This module uses the L<Compress::Raw::Zlib> library to read and write the
compressed streams inside the files.
One can use L<Archive::Zip::MemberRead> to read the zip file archive members
as if they were files.
=head2 File Naming
Regardless of what your local file system uses for file naming, names in a
Zip file are in Unix format (I<forward> slashes (/) separating directory
names, etc.).
C<Archive::Zip> tries to be consistent with file naming conventions, and will
translate back and forth between native and Zip file names.
However, it can't guess which format names are in. So two rules control what
kind of file name you must pass various routines:
=over 4
=item Names of files are in local format.
C<File::Spec> and C<File::Basename> are used for various file
operations. When you're referring to a file on your system, use its
file naming conventions.
=item Names of archive members are in Unix format.
This applies to every method that refers to an archive member, or
provides a name for new archive members. The C<extract()> methods
that can take one or two names will convert from local to zip names
if you call them with a single name.
=back
=head2 Archive::Zip Object Model
=head3 Overview
Archive::Zip::Archive objects are what you ordinarily deal with.
These maintain the structure of a zip file, without necessarily
holding data. When a zip is read from a disk file, the (possibly
compressed) data still lives in the file, not in memory. Archive
members hold information about the individual members, but not
(usually) the actual member data. When the zip is written to a
(different) file, the member data is compressed or copied as needed.
It is possible to make archive members whose data is held in a string
in memory, but this is not done when a zip file is read. Directory
members don't have any data.
=head2 Inheritance
Exporter
Archive::Zip Common base class, has defs.
Archive::Zip::Archive A Zip archive.
Archive::Zip::Member Abstract superclass for all members.
Archive::Zip::StringMember Member made from a string
Archive::Zip::FileMember Member made from an external file
Archive::Zip::ZipFileMember Member that lives in a zip file
Archive::Zip::NewFileMember Member whose data is in a file
Archive::Zip::DirectoryMember Member that is a directory
=head1 EXPORTS
=over 4
=item :CONSTANTS
Exports the following constants:
FA_MSDOS FA_UNIX GPBF_ENCRYPTED_MASK
GPBF_DEFLATING_COMPRESSION_MASK GPBF_HAS_DATA_DESCRIPTOR_MASK
COMPRESSION_STORED COMPRESSION_DEFLATED IFA_TEXT_FILE_MASK
IFA_TEXT_FILE IFA_BINARY_FILE COMPRESSION_LEVEL_NONE
COMPRESSION_LEVEL_DEFAULT COMPRESSION_LEVEL_FASTEST
COMPRESSION_LEVEL_BEST_COMPRESSION
ZIP64_SUPPORTED ZIP64_AS_NEEDED ZIP64_EOCD ZIP64_HEADERS
=item :MISC_CONSTANTS
Exports the following constants (only necessary for extending the
module):
FA_AMIGA FA_VAX_VMS FA_VM_CMS FA_ATARI_ST FA_OS2_HPFS
FA_MACINTOSH FA_Z_SYSTEM FA_CPM FA_WINDOWS_NTFS
GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK
GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK
GPBF_IS_COMPRESSED_PATCHED_DATA_MASK COMPRESSION_SHRUNK
DEFLATING_COMPRESSION_NORMAL DEFLATING_COMPRESSION_MAXIMUM
DEFLATING_COMPRESSION_FAST DEFLATING_COMPRESSION_SUPER_FAST
COMPRESSION_REDUCED_1 COMPRESSION_REDUCED_2 COMPRESSION_REDUCED_3
COMPRESSION_REDUCED_4 COMPRESSION_IMPLODED COMPRESSION_TOKENIZED
COMPRESSION_DEFLATED_ENHANCED
COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED
=item :ERROR_CODES
Explained below. Returned from most methods.
AZ_OK AZ_STREAM_END AZ_ERROR AZ_FORMAT_ERROR AZ_IO_ERROR
=back
=head1 ERROR CODES
Many of the methods in Archive::Zip return error codes. These are implemented
as inline subroutines, using the C<use constant> pragma. They can be imported
into your namespace using the C<:ERROR_CODES> tag:
use Archive::Zip qw( :ERROR_CODES );
...
unless ( $zip->read( 'myfile.zip' ) == AZ_OK ) {
die "whoops!";
}
=over 4
=item AZ_OK (0)
Everything is fine.
=item AZ_STREAM_END (1)
The read stream (or central directory) ended normally.
=item AZ_ERROR (2)
There was some generic kind of error.
=item AZ_FORMAT_ERROR (3)
There is a format error in a ZIP file being read.
=item AZ_IO_ERROR (4)
There was an IO error.
=back
=head2 Compression
Archive::Zip allows each member of a ZIP file to be compressed (using the
Deflate algorithm) or uncompressed.
Other compression algorithms that some versions of ZIP have been able to
produce are not supported. Each member has two compression methods: the
one it's stored as (this is always COMPRESSION_STORED for string and external
file members), and the one you desire for the member in the zip file.
These can be different, of course, so you can make a zip member that is not
compressed out of one that is, and vice versa.
You can inquire about the current compression and set the desired
compression method:
my $member = $zip->memberNamed( 'xyz.txt' );
$member->compressionMethod(); # return current compression
# set to read uncompressed
$member->desiredCompressionMethod( COMPRESSION_STORED );
# set to read compressed
$member->desiredCompressionMethod( COMPRESSION_DEFLATED );
There are two different compression methods:
=over 4
=item COMPRESSION_STORED
File is stored (no compression)
=item COMPRESSION_DEFLATED
File is Deflated
=back
=head2 Compression Levels
If a member's desiredCompressionMethod is COMPRESSION_DEFLATED, you
can choose different compression levels. This choice may affect the
speed of compression and decompression, as well as the size of the
compressed member data.
$member->desiredCompressionLevel( 9 );
The levels given can be:
=over 4
=item * 0 or COMPRESSION_LEVEL_NONE
This is the same as saying
$member->desiredCompressionMethod( COMPRESSION_STORED );
=item * 1 .. 9
1 gives the best speed and worst compression, and 9 gives the
best compression and worst speed.
=item * COMPRESSION_LEVEL_FASTEST
This is a synonym for level 1.
=item * COMPRESSION_LEVEL_BEST_COMPRESSION
This is a synonym for level 9.
=item * COMPRESSION_LEVEL_DEFAULT
This gives a good compromise between speed and compression,
and is currently equivalent to 6 (this is in the zlib code).
This is the level that will be used if not specified.
=back
=head1 Archive::Zip Methods
The Archive::Zip class (and its invisible subclass Archive::Zip::Archive)
implement generic zip file functionality. Creating a new Archive::Zip object
actually makes an Archive::Zip::Archive object, but you don't have to worry
about this unless you're subclassing.
=head2 Constructor
=over 4
=item new( [$fileName] )
=item new( { filename => $fileName } )
Make a new, empty zip archive.
my $zip = Archive::Zip->new();
If an additional argument is passed, new() will call read()
to read the contents of an archive:
my $zip = Archive::Zip->new( 'xyz.zip' );
If a filename argument is passed and the read fails for any
reason, new will return undef. For this reason, it may be
better to call read separately.
=back
=head2 Zip Archive Utility Methods
These Archive::Zip methods may be called as functions or as object
methods. Do not call them as class methods:
$zip = Archive::Zip->new();
$crc = Archive::Zip::computeCRC32( 'ghijkl' ); # OK
$crc = $zip->computeCRC32( 'ghijkl' ); # also OK
$crc = Archive::Zip->computeCRC32( 'ghijkl' ); # NOT OK
=over 4
=item Archive::Zip::computeCRC32( $string [, $crc] )
=item Archive::Zip::computeCRC32( { string => $string [, checksum => $crc ] } )
This is a utility function that uses the Compress::Raw::Zlib CRC
routine to compute a CRC-32. You can get the CRC of a string:
$crc = Archive::Zip::computeCRC32( $string );
Or you can compute the running CRC:
$crc = 0;
$crc = Archive::Zip::computeCRC32( 'abcdef', $crc );
$crc = Archive::Zip::computeCRC32( 'ghijkl', $crc );
=item Archive::Zip::setChunkSize( $number )
=item Archive::Zip::setChunkSize( { chunkSize => $number } )
Report or change chunk size used for reading and writing.
This can make big differences in dealing with large files.
Currently, this defaults to 32K. This also changes the chunk
size used for Compress::Raw::Zlib. You must call setChunkSize()
before reading or writing. This is not exportable, so you
must call it like:
Archive::Zip::setChunkSize( 4096 );
or as a method on a zip (though this is a global setting).
Returns old chunk size.
=item Archive::Zip::chunkSize()
Returns the current chunk size:
my $chunkSize = Archive::Zip::chunkSize();
=item Archive::Zip::setErrorHandler( \&subroutine )
=item Archive::Zip::setErrorHandler( { subroutine => \&subroutine } )
Change the subroutine called with error strings. This
defaults to \&Carp::carp, but you may want to change it to
get the error strings. This is not exportable, so you must
call it like:
Archive::Zip::setErrorHandler( \&myErrorHandler );
If myErrorHandler is undef, resets handler to default.
Returns old error handler. Note that if you call Carp::carp
or a similar routine or if you're chaining to the default
error handler from your error handler, you may want to
increment the number of caller levels that are skipped (do
not just set it to a number):
$Carp::CarpLevel++;
=item Archive::Zip::tempFile( [ $tmpdir ] )
=item Archive::Zip::tempFile( { tempDir => $tmpdir } )
Create a uniquely named temp file. It will be returned open
for read/write. If C<$tmpdir> is given, it is used as the
name of a directory to create the file in. If not given,
creates the file using C<File::Spec::tmpdir()>. Generally, you can
override this choice using the
$ENV{TMPDIR}
environment variable. But see the L<File::Spec|File::Spec>
documentation for your system. Note that on many systems, if you're
running in taint mode, then you must make sure that C<$ENV{TMPDIR}> is
untainted for it to be used.
Will I<NOT> create C<$tmpdir> if it does not exist (this is a change
from prior versions!). Returns file handle and name:
my ($fh, $name) = Archive::Zip::tempFile();
my ($fh, $name) = Archive::Zip::tempFile('myTempDir');
my $fh = Archive::Zip::tempFile(); # if you don't need the name
=back
=head2 Zip Archive Accessors
=over 4
=item members()
Return a copy of the members array
my @members = $zip->members();
=item numberOfMembers()
Return the number of members I have
=item memberNames()
Return a list of the (internal) file names of the zip members
=item memberNamed( $string )
=item memberNamed( { zipName => $string } )
Return ref to member whose filename equals given filename or
undef. C<$string> must be in Zip (Unix) filename format.
=item membersMatching( $regex )
=item membersMatching( { regex => $regex } )
Return array of members whose filenames match given regular
expression in list context. Returns number of matching
members in scalar context.
my @textFileMembers = $zip->membersMatching( '.*\.txt' );
# or
my $numberOfTextFiles = $zip->membersMatching( '.*\.txt' );
=item zip64()
Returns whether the previous read or write of the archive has
been done in zip64 format.
=item desiredZip64Mode()
Gets or sets which parts of the archive should be written in
zip64 format: All parts as needed (ZIP64_AS_NEEDED), the default,
force writing the zip64 end of central directory record
(ZIP64_EOCD), force writing the zip64 EOCD record and all headers
in zip64 format (ZIP64_HEADERS).
=item versionMadeBy()
=item versionNeededToExtract()
Gets the fields from the zip64 end of central directory
record. These are always 0 if the archive is not in zip64 format.
=item diskNumber()
Return the disk that I start on. Not used for writing zips,
but might be interesting if you read a zip in. This should be
0, as Archive::Zip does not handle multi-volume archives.
=item diskNumberWithStartOfCentralDirectory()
Return the disk number that holds the beginning of the
central directory. Not used for writing zips, but might be
interesting if you read a zip in. This should be 0, as
Archive::Zip does not handle multi-volume archives.
=item numberOfCentralDirectoriesOnThisDisk()
Return the number of CD structures in the zipfile last read in.
Not used for writing zips, but might be interesting if you read a zip
in.
=item numberOfCentralDirectories()
Return the number of CD structures in the zipfile last read in.
Not used for writing zips, but might be interesting if you read a zip
in.
=item centralDirectorySize()
Returns central directory size, as read from an external zip
file. Not used for writing zips, but might be interesting if
you read a zip in.
=item centralDirectoryOffsetWRTStartingDiskNumber()
Returns the offset into the zip file where the CD begins. Not
used for writing zips, but might be interesting if you read a
zip in.
=item zipfileComment( [ $string ] )
=item zipfileComment( [ { comment => $string } ] )
Get or set the zipfile comment. Returns the old comment.
print $zip->zipfileComment();
$zip->zipfileComment( 'New Comment' );
=item eocdOffset()
Returns the (unexpected) number of bytes between where the
EOCD was found and where it expected to be. This is normally
0, but would be positive if something (a virus, perhaps) had
added bytes somewhere before the EOCD. Not used for writing
zips, but might be interesting if you read a zip in. Here is
an example of how you can diagnose this:
my $zip = Archive::Zip->new('somefile.zip');
if ($zip->eocdOffset())
{
warn "A virus has added ", $zip->eocdOffset, " bytes of garbage\n";
}
The C<eocdOffset()> is used to adjust the starting position of member
headers, if necessary.
=item fileName()
Returns the name of the file last read from. If nothing has
been read yet, returns an empty string; if read from a file
handle, returns the handle in string form.
=back
=head2 Zip Archive Member Operations
Various operations on a zip file modify members. When a member is
passed as an argument, you can either use a reference to the member
itself, or the name of a member. Of course, using the name requires
that names be unique within a zip (this is not enforced).
=over 4
=item removeMember( $memberOrName )
=item removeMember( { memberOrZipName => $memberOrName } )
Remove and return the given member, or match its name and
remove it. Returns undef if member or name does not exist in this
Zip. No-op if member does not belong to this zip.
=item replaceMember( $memberOrName, $newMember )
=item replaceMember( { memberOrZipName => $memberOrName,
newMember => $newMember } )
Remove and return the given member, or match its name and
remove it. Replace with new member. Returns undef if member or
name does not exist in this Zip, or if C<$newMember> is undefined.
It is an (undiagnosed) error to provide a C<$newMember> that is a
member of the zip being modified.
my $member1 = $zip->removeMember( 'xyz' );
my $member2 = $zip->replaceMember( 'abc', $member1 );
# now, $member2 (named 'abc') is not in $zip,
# and $member1 (named 'xyz') is, having taken $member2's place.
=item extractMember( $memberOrName [, $extractedName ] )
=item extractMember( { memberOrZipName => $memberOrName
[, name => $extractedName ] } )
Extract the given member, or match its name and extract it.
Returns undef if member does not exist in this Zip. If
optional second arg is given, use it as the name of the
extracted member. Otherwise, the internal filename of the
member is used as the name of the extracted file or
directory.
If you pass C<$extractedName>, it should be in the local file
system's format.
If you do not pass C<$extractedName> and the internal filename traverses
a parent directory or a symbolic link, the extraction will be aborted with
C<AC_ERROR> for security reason.
All necessary directories will be created. Returns C<AZ_OK>
on success.
=item extractMemberWithoutPaths( $memberOrName [, $extractedName ] )
=item extractMemberWithoutPaths( { memberOrZipName => $memberOrName
[, name => $extractedName ] } )
Extract the given member, or match its name and extract it.
Does not use path information (extracts into the current
directory). Returns undef if member does not exist in this
Zip.
If optional second arg is given, use it as the name of the
extracted member (its paths will be deleted too). Otherwise,
the internal filename of the member (minus paths) is used as
the name of the extracted file or directory. Returns C<AZ_OK>
on success.
If you do not pass C<$extractedName> and the internal filename is equalled
to a local symbolic link, the extraction will be aborted with C<AC_ERROR> for
security reason.
=item addMember( $member )
=item addMember( { member => $member } )
Append a member (possibly from another zip file) to the zip
file. Returns the new member. Generally, you will use
addFile(), addDirectory(), addFileOrDirectory(), addString(),
or read() to add members.
# Move member named 'abc' to end of zip:
my $member = $zip->removeMember( 'abc' );
$zip->addMember( $member );
=item updateMember( $memberOrName, $fileName )
=item updateMember( { memberOrZipName => $memberOrName, name => $fileName } )
Update a single member from the file or directory named C<$fileName>.
Returns the (possibly added or updated) member, if any; C<undef> on
errors.
The comparison is based on C<lastModTime()> and (in the case of a
non-directory) the size of the file.
=item addFile( $fileName [, $newName, $compressionLevel ] )
=item addFile( { filename => $fileName
[, zipName => $newName, compressionLevel => $compressionLevel } ] )
Append a member whose data comes from an external file,
returning the member or undef. The member will have its file
name set to the name of the external file, and its
desiredCompressionMethod set to COMPRESSION_DEFLATED. The
file attributes and last modification time will be set from
the file.
If the name given does not represent a readable plain file or
symbolic link, undef will be returned. C<$fileName> must be
in the format required for the local file system.
The optional C<$newName> argument sets the internal file name
to something different than the given $fileName. C<$newName>,
if given, must be in Zip name format (i.e. Unix).
The text mode bit will be set if the contents appears to be
text (as returned by the C<-T> perl operator).
I<NOTE> that you should not (generally) use absolute path names
in zip member names, as this will cause problems with some zip
tools as well as introduce a security hole and make the zip
harder to use.
=item addDirectory( $directoryName [, $fileName ] )
=item addDirectory( { directoryName => $directoryName
[, zipName => $fileName ] } )
Append a member created from the given directory name. The
directory name does not have to name an existing directory.
If the named directory exists, the file modification time and
permissions are set from the existing directory, otherwise
they are set to now and permissive default permissions.
C<$directoryName> must be in local file system format.
The optional second argument sets the name of the archive
member (which defaults to C<$directoryName>). If given, it
must be in Zip (Unix) format.
Returns the new member.
=item addFileOrDirectory( $name [, $newName, $compressionLevel ] )
=item addFileOrDirectory( { name => $name [, zipName => $newName,
compressionLevel => $compressionLevel ] } )
Append a member from the file or directory named $name. If
$newName is given, use it for the name of the new member.
Will add or remove trailing slashes from $newName as needed.
C<$name> must be in local file system format.
The optional second argument sets the name of the archive
member (which defaults to C<$name>). If given, it must be in
Zip (Unix) format.
=item addString( $stringOrStringRef, $name, [$compressionLevel] )
=item addString( { string => $stringOrStringRef [, zipName => $name,
compressionLevel => $compressionLevel ] } )
Append a member created from the given string or string
reference. The name is given by the second argument.
Returns the new member. The last modification time will be
set to now, and the file attributes will be set to permissive
defaults.
my $member = $zip->addString( 'This is a test', 'test.txt' );
=item contents( $memberOrMemberName [, $newContents ] )
=item contents( { memberOrZipName => $memberOrMemberName
[, contents => $newContents ] } )
Returns the uncompressed data for a particular member, or
undef.
print "xyz.txt contains " . $zip->contents( 'xyz.txt' );
Also can change the contents of a member:
$zip->contents( 'xyz.txt', 'This is the new contents' );
If called expecting an array as the return value, it will include
the status as the second value in the array.
($content, $status) = $zip->contents( 'xyz.txt');
=back
=head2 Zip Archive I/O operations
A Zip archive can be written to a file or file handle, or read from
one.
=over 4
=item writeToFileNamed( $fileName )
=item writeToFileNamed( { fileName => $fileName } )
Write a zip archive to named file. Returns C<AZ_OK> on
success.
my $status = $zip->writeToFileNamed( 'xx.zip' );
die "error somewhere" if $status != AZ_OK;
Note that if you use the same name as an existing zip file
that you read in, you will clobber ZipFileMembers. So
instead, write to a different file name, then delete the
original.
If you use the C<overwrite()> or C<overwriteAs()> methods, you can
re-write the original zip in this way.
C<$fileName> should be a valid file name on your system.
=item writeToFileHandle( $fileHandle [, $seekable] )
Write a zip archive to a file handle. Return AZ_OK on
success. The optional second arg tells whether or not to try
to seek backwards to re-write headers. If not provided, it is
set if the Perl C<-f> test returns true. This could fail on
some operating systems, though.
my $fh = IO::File->new( 'someFile.zip', 'w' );
unless ( $zip->writeToFileHandle( $fh ) == AZ_OK ) {
# error handling
}
If you pass a file handle that is not seekable (like if
you're writing to a pipe or a socket), pass a false second
argument:
my $fh = IO::File->new( '| cat > somefile.zip', 'w' );
$zip->writeToFileHandle( $fh, 0 ); # fh is not seekable
If this method fails during the write of a member, that
member and all following it will return false from
C<wasWritten()>. See writeCentralDirectory() for a way to
deal with this.
If you want, you can write data to the file handle before
passing it to writeToFileHandle(); this could be used (for
instance) for making self-extracting archives. However, this
only works reliably when writing to a real file (as opposed
to STDOUT or some other possible non-file).
See examples/selfex.pl for how to write a self-extracting
archive.
=item writeCentralDirectory( $fileHandle [, $offset ] )
=item writeCentralDirectory( { fileHandle => $fileHandle
[, offset => $offset ] } )
Writes the central directory structure to the given file
handle.
Returns AZ_OK on success. If given an $offset, will
seek to that point before writing. This can be used for
recovery in cases where writeToFileHandle or writeToFileNamed
returns an IO error because of running out of space on the
destination file.
You can truncate the zip by seeking backwards and then writing the
directory:
my $fh = IO::File->new( 'someFile.zip', 'w' );
my $retval = $zip->writeToFileHandle( $fh );
if ( $retval == AZ_IO_ERROR ) {
my @unwritten = grep { not $_->wasWritten() } $zip->members();
if (@unwritten) {
$zip->removeMember( $member ) foreach my $member ( @unwritten );
$zip->writeCentralDirectory( $fh,
$unwritten[0]->writeLocalHeaderRelativeOffset());
}
}
=item overwriteAs( $newName )
=item overwriteAs( { filename => $newName } )
Write the zip to the specified file, as safely as possible.
This is done by first writing to a temp file, then renaming
the original if it exists, then renaming the temp file, then
deleting the renamed original if it exists. Returns AZ_OK if
successful.
=item overwrite()
Write back to the original zip file. See overwriteAs() above.
If the zip was not ever read from a file, this generates an
error.
=item read( $fileName )
=item read( { filename => $fileName } )
Read zipfile headers from a zip file, appending new members.
Returns C<AZ_OK> or error code.
my $zipFile = Archive::Zip->new();
my $status = $zipFile->read( '/some/FileName.zip' );
=item readFromFileHandle( $fileHandle, $filename )
=item readFromFileHandle( { fileHandle => $fileHandle, filename => $filename } )
Read zipfile headers from an already-opened file handle,
appending new members. Does not close the file handle.
Returns C<AZ_OK> or error code. Note that this requires a
seekable file handle; reading from a stream is not yet
supported, but using in-memory data is.
my $fh = IO::File->new( '/some/FileName.zip', 'r' );
my $zip1 = Archive::Zip->new();
my $status = $zip1->readFromFileHandle( $fh );
my $zip2 = Archive::Zip->new();
$status = $zip2->readFromFileHandle( $fh );
Read zip using in-memory data (recursable):
open my $fh, "<", "archive.zip" or die $!;
my $zip_data = do { local $.; <$fh> };
my $zip = Archive::Zip->new;
open my $dh, "+<", \$zip_data;
$zip->readFromFileHandle ($dh);
=back
=head2 Zip Archive Tree operations
These used to be in Archive::Zip::Tree but got moved into
Archive::Zip. They enable operation on an entire tree of members or
files.
A usage example:
use Archive::Zip;
my $zip = Archive::Zip->new();
# add all readable files and directories below . as xyz/*
$zip->addTree( '.', 'xyz' );
# add all readable plain files below /abc as def/*
$zip->addTree( '/abc', 'def', sub { -f && -r } );
# add all .c files below /tmp as stuff/*
$zip->addTreeMatching( '/tmp', 'stuff', '\.c$' );
# add all .o files below /tmp as stuff/* if they aren't writable
$zip->addTreeMatching( '/tmp', 'stuff', '\.o$', sub { ! -w } );
# add all .so files below /tmp that are smaller than 200 bytes as stuff/*
$zip->addTreeMatching( '/tmp', 'stuff', '\.o$', sub { -s < 200 } );
# and write them into a file
$zip->writeToFileNamed('xxx.zip');
# now extract the same files into /tmpx
$zip->extractTree( 'stuff', '/tmpx' );
=over 4
=item $zip->addTree( $root, $dest [, $pred, $compressionLevel ] ) -- Add tree of files to a zip
=item $zip->addTree( { root => $root, zipName => $dest [, select => $pred,
compressionLevel => $compressionLevel ] )
C<$root> is the root of the tree of files and directories to be
added. It is a valid directory name on your system. C<$dest> is
the name for the root in the zip file (undef or blank means
to use relative pathnames). It is a valid ZIP directory name
(that is, it uses forward slashes (/) for separating
directory components). C<$pred> is an optional subroutine
reference to select files: it is passed the name of the
prospective file or directory using C<$_>, and if it returns
true, the file or directory will be included. The default is
to add all readable files and directories. For instance,
using
my $pred = sub { /\.txt/ };
$zip->addTree( '.', '', $pred );
will add all the .txt files in and below the current
directory, using relative names, and making the names
identical in the zipfile:
original name zip member name
./xyz xyz
./a/ a/
./a/b a/b
To translate absolute to relative pathnames, just pass them
in: $zip->addTree( '/c/d', 'a' );
original name zip member name
/c/d/xyz a/xyz
/c/d/a/ a/a/
/c/d/a/b a/a/b
Returns AZ_OK on success. Note that this will not follow
symbolic links to directories. Note also that this does not
check for the validity of filenames.
Note that you generally I<don't> want to make zip archive member names
absolute.
=item $zip->addTreeMatching( $root, $dest, $pattern [, $pred, $compressionLevel ] )
=item $zip->addTreeMatching( { root => $root, zipName => $dest, pattern =>
$pattern [, select => $pred, compressionLevel => $compressionLevel ] } )
$root is the root of the tree of files and directories to be
added $dest is the name for the root in the zip file (undef
means to use relative pathnames) $pattern is a (non-anchored)
regular expression for filenames to match $pred is an
optional subroutine reference to select files: it is passed
the name of the prospective file or directory in C<$_>, and
if it returns true, the file or directory will be included.
The default is to add all readable files and directories. To
add all files in and below the current directory whose names
end in C<.pl>, and make them extract into a subdirectory
named C<xyz>, do this:
$zip->addTreeMatching( '.', 'xyz', '\.pl$' )
To add all I<writable> files in and below the directory named
C</abc> whose names end in C<.pl>, and make them extract into
a subdirectory named C<xyz>, do this:
$zip->addTreeMatching( '/abc', 'xyz', '\.pl$', sub { -w } )
Returns AZ_OK on success. Note that this will not follow
symbolic links to directories.
=item $zip->updateTree( $root [, $dest , $pred , $mirror, $compressionLevel ] );
=item $zip->updateTree( { root => $root [, zipName => $dest, select => $pred,
mirror => $mirror, compressionLevel => $compressionLevel ] } );
Update a zip file from a directory tree.
C<updateTree()> takes the same arguments as C<addTree()>, but first
checks to see whether the file or directory already exists in the zip
file, and whether it has been changed.
If the fourth argument C<$mirror> is true, then delete all my members
if corresponding files were not found.
Returns an error code or AZ_OK if all is well.
=item $zip->extractTree( [ $root, $dest, $volume } ] )
=item $zip->extractTree( [ { root => $root, zipName => $dest, volume => $volume } ] )
If you don't give any arguments at all, will extract all the
files in the zip with their original names.
If you supply one argument for C<$root>, C<extractTree> will extract
all the members whose names start with C<$root> into the current
directory, stripping off C<$root> first.
C<$root> is in Zip (Unix) format.
For instance,
$zip->extractTree( 'a' );
when applied to a zip containing the files:
a/x a/b/c ax/d/e d/e will extract:
a/x as ./x
a/b/c as ./b/c
If you give two arguments, C<extractTree> extracts all the members
whose names start with C<$root>. It will translate C<$root> into
C<$dest> to construct the destination file name.
C<$root> and C<$dest> are in Zip (Unix) format.
For instance,
$zip->extractTree( 'a', 'd/e' );
when applied to a zip containing the files:
a/x a/b/c ax/d/e d/e will extract:
a/x to d/e/x
a/b/c to d/e/b/c and ignore ax/d/e and d/e
If you give three arguments, C<extractTree> extracts all the members
whose names start with C<$root>. It will translate C<$root> into
C<$dest> to construct the destination file name, and then it will
convert to local file system format, using C<$volume> as the name of
the destination volume.
C<$root> and C<$dest> are in Zip (Unix) format.
C<$volume> is in local file system format.
For instance, under Windows,
$zip->extractTree( 'a', 'd/e', 'f:' );
when applied to a zip containing the files:
a/x a/b/c ax/d/e d/e will extract:
a/x to f:d/e/x
a/b/c to f:d/e/b/c and ignore ax/d/e and d/e
If you want absolute paths (the prior example used paths relative to
the current directory on the destination volume, you can specify these
in C<$dest>:
$zip->extractTree( 'a', '/d/e', 'f:' );
when applied to a zip containing the files:
a/x a/b/c ax/d/e d/e will extract:
a/x to f:\d\e\x
a/b/c to f:\d\e\b\c and ignore ax/d/e and d/e
If the path to the extracted file traverses a parent directory or a symbolic
link, the extraction will be aborted with C<AC_ERROR> for security reason.
Returns an error code or AZ_OK if everything worked OK.
=back
=head1 Archive::Zip Global Variables
=over 4
=item $Archive::Zip::UNICODE
This variable governs how Unicode file and directory names are added
to or extracted from an archive. If set, file and directory names are considered
to be UTF-8 encoded. This is I<EXPERIMENTAL AND BUGGY (there are some edge cases
on Win32)>. Please report problems.
{
local $Archive::Zip::UNICODE = 1;
$zip->addFile('Déjà vu.txt');
}
=back
=head1 MEMBER OPERATIONS
=head2 Member Class Methods
Several constructors allow you to construct members without adding
them to a zip archive. These work the same as the addFile(),
addDirectory(), and addString() zip instance methods described above,
but they don't add the new members to a zip.
=over 4
=item Archive::Zip::Member->newFromString( $stringOrStringRef [, $fileName ] )
=item Archive::Zip::Member->newFromString( { string => $stringOrStringRef
[, zipName => $fileName ] )
Construct a new member from the given string. Returns undef
on error.
my $member = Archive::Zip::Member->newFromString( 'This is a test' );
my $member = Archive::Zip::Member->newFromString( 'This is a test', 'test.txt' );
my $member = Archive::Zip::Member->newFromString( { string => 'This is a test', zipName => 'test.txt' } );
=item newFromFile( $fileName [, $zipName ] )
=item newFromFile( { filename => $fileName [, zipName => $zipName ] } )
Construct a new member from the given file. Returns undef on
error.
my $member = Archive::Zip::Member->newFromFile( 'xyz.txt' );
=item newDirectoryNamed( $directoryName [, $zipname ] )
=item newDirectoryNamed( { directoryName => $directoryName
[, zipName => $zipname ] } )
Construct a new member from the given directory.
C<$directoryName> must be a valid name on your file system; it does not
have to exist.
If given, C<$zipname> will be the name of the zip member; it must be a
valid Zip (Unix) name. If not given, it will be converted from
C<$directoryName>.
Returns undef on error.
my $member = Archive::Zip::Member->newDirectoryNamed( 'CVS/' );
=back
=head2 Member Simple Accessors
These methods get (and/or set) member attribute values.
The zip64 format requires parts of the member data to be stored
in the so-called extra fields. You cannot get nor set this zip64
data through the extra field accessors described in this section.
In fact, the low-level member methods ensure that the zip64 data
in the extra fields is handled completely transparently and
invisibly to the user when members are read or written.
=over 4
=item zip64()
Returns whether the previous read or write of the member has been
done in zip64 format.
=item desiredZip64Mode()
Gets or sets whether the member's headers should be written in
zip64 format: As needed (ZIP64_AS_NEEDED), the default, or always
(ZIP64_HEADERS).
=item versionMadeBy()
Gets the field from the member header.
=item fileAttributeFormat( [ $format ] )
=item fileAttributeFormat( [ { format => $format ] } )
Gets or sets the field from the member header. These are
C<FA_*> values.
=item versionNeededToExtract()
Gets the field from the member header.
=item bitFlag()
Gets the general purpose bit field from the member header.
This is where the C<GPBF_*> bits live.
=item compressionMethod()
Returns the member compression method. This is the method
that is currently being used to compress the member data.
This will be COMPRESSION_STORED for added string or file
members, or any of the C<COMPRESSION_*> values for members
from a zip file. However, this module can only handle members
whose data is in COMPRESSION_STORED or COMPRESSION_DEFLATED
format.
=item desiredCompressionMethod( [ $method ] )
=item desiredCompressionMethod( [ { compressionMethod => $method } ] )
Get or set the member's C<desiredCompressionMethod>. This is
the compression method that will be used when the member is
written. Returns prior desiredCompressionMethod. Only
COMPRESSION_DEFLATED or COMPRESSION_STORED are valid
arguments. Changing to COMPRESSION_STORED will change the
member desiredCompressionLevel to 0; changing to
COMPRESSION_DEFLATED will change the member
desiredCompressionLevel to COMPRESSION_LEVEL_DEFAULT.
=item desiredCompressionLevel( [ $level ] )
=item desiredCompressionLevel( [ { compressionLevel => $level } ] )
Get or set the member's desiredCompressionLevel This is the
method that will be used to write. Returns prior
desiredCompressionLevel. Valid arguments are 0 through 9,
COMPRESSION_LEVEL_NONE, COMPRESSION_LEVEL_DEFAULT,
COMPRESSION_LEVEL_BEST_COMPRESSION, and
COMPRESSION_LEVEL_FASTEST. 0 or COMPRESSION_LEVEL_NONE will
change the desiredCompressionMethod to COMPRESSION_STORED.
All other arguments will change the desiredCompressionMethod
to COMPRESSION_DEFLATED.
=item externalFileName()
Return the member's external file name, if any, or undef.
=item fileName()
Get or set the member's internal filename. Returns the
(possibly new) filename. Names will have backslashes
converted to forward slashes, and will have multiple
consecutive slashes converted to single ones.
=item lastModFileDateTime()
Return the member's last modification date/time stamp in
MS-DOS format.
=item lastModTime()
Return the member's last modification date/time stamp,
converted to unix localtime format.
print "Mod Time: " . scalar( localtime( $member->lastModTime() ) );
=item setLastModFileDateTimeFromUnix()
Set the member's lastModFileDateTime from the given unix
time.
$member->setLastModFileDateTimeFromUnix( time() );
=item internalFileAttributes()
Return the internal file attributes field from the zip
header. This is only set for members read from a zip file.
=item externalFileAttributes()
Return member attributes as read from the ZIP file. Note that
these are NOT UNIX!
=item unixFileAttributes( [ $newAttributes ] )
=item unixFileAttributes( [ { attributes => $newAttributes } ] )
Get or set the member's file attributes using UNIX file
attributes. Returns old attributes.
my $oldAttribs = $member->unixFileAttributes( 0666 );
Note that the return value has more than just the file
permissions, so you will have to mask off the lowest bits for
comparisons.
=item localExtraField( [ $newField ] )
=item localExtraField( [ { field => $newField } ] )
Gets or sets the extra field that was read from the local
header. The extra field must be in the proper format. If it is
not or if the new field contains data related to the zip64
format, this method does not modify the extra field and returns
AZ_FORMAT_ERROR, otherwise it returns AZ_OK.
=item cdExtraField( [ $newField ] )
=item cdExtraField( [ { field => $newField } ] )
Gets or sets the extra field that was read from the central
directory header. The extra field must be in the proper format.
If it is not or if the new field contains data related to the
zip64 format, this method does not modify the extra field and
returns AZ_FORMAT_ERROR, otherwise it returns AZ_OK.
=item extraFields()
Return both local and CD extra fields, concatenated.
=item fileComment( [ $newComment ] )
=item fileComment( [ { comment => $newComment } ] )
Get or set the member's file comment.
=item hasDataDescriptor()
Get or set the data descriptor flag. If this is set, the
local header will not necessarily have the correct data
sizes. Instead, a small structure will be stored at the end
of the member data with these values. This should be
transparent in normal operation.
=item crc32()
Return the CRC-32 value for this member. This will not be set
for members that were constructed from strings or external
files until after the member has been written.
=item crc32String()
Return the CRC-32 value for this member as an 8 character
printable hex string. This will not be set for members that
were constructed from strings or external files until after
the member has been written.
=item compressedSize()
Return the compressed size for this member. This will not be
set for members that were constructed from strings or
external files until after the member has been written.
=item uncompressedSize()
Return the uncompressed size for this member.
=item password( [ $password ] )
Returns the password for this member to be used on decryption.
If $password is given, it will set the password for the decryption.
=item isEncrypted()
Return true if this member is encrypted. The Archive::Zip
module does not currently support creation of encrypted
members. Decryption works more or less like this:
my $zip = Archive::Zip->new;
$zip->read ("encrypted.zip");
for my $m (map { $zip->memberNamed ($_) } $zip->memberNames) {
$m->password ("secret");
$m->contents; # is "" when password was wrong
That shows that the password has to be set per member, and not per
archive. This might change in the future.
=item isTextFile( [ $flag ] )
=item isTextFile( [ { flag => $flag } ] )
Returns true if I am a text file. Also can set the status if
given an argument (then returns old state). Note that this
module does not currently do anything with this flag upon
extraction or storage. That is, bytes are stored in native
format whether or not they came from a text file.
=item isBinaryFile()
Returns true if I am a binary file. Also can set the status
if given an argument (then returns old state). Note that this
module does not currently do anything with this flag upon
extraction or storage. That is, bytes are stored in native
format whether or not they came from a text file.
=item extractToFileNamed( $fileName )
=item extractToFileNamed( { name => $fileName } )
Extract me to a file with the given name. The file will be
created with default modes. Directories will be created as
needed.
The C<$fileName> argument should be a valid file name on your
file system.
Returns AZ_OK on success.
=item isDirectory()
Returns true if I am a directory.
=item isSymbolicLink()
Returns true if I am a symbolic link.
=item writeLocalHeaderRelativeOffset()
Returns the file offset in bytes the last time I was written.
=item wasWritten()
Returns true if I was successfully written. Reset at the
beginning of a write attempt.
=back
=head2 Low-level member data reading
It is possible to use lower-level routines to access member data
streams, rather than the extract* methods and contents(). For
instance, here is how to print the uncompressed contents of a member
in chunks using these methods:
my ( $member, $status, $bufferRef );
$member = $zip->memberNamed( 'xyz.txt' );
$member->desiredCompressionMethod( COMPRESSION_STORED );
$status = $member->rewindData();
die "error $status" unless $status == AZ_OK;
while ( ! $member->readIsDone() )
{
( $bufferRef, $status ) = $member->readChunk();
die "error $status"
if $status != AZ_OK && $status != AZ_STREAM_END;
# do something with $bufferRef:
print $$bufferRef;
}
$member->endRead();
=over 4
=item readChunk( [ $chunkSize ] )
=item readChunk( [ { chunkSize => $chunkSize } ] )
This reads the next chunk of given size from the member's
data stream and compresses or uncompresses it as necessary,
returning a reference to the bytes read and a status. If size
argument is not given, defaults to global set by
Archive::Zip::setChunkSize. Status is AZ_OK on success until
the last chunk, where it returns AZ_STREAM_END. Returns C<(
\$bytes, $status)>.
my ( $outRef, $status ) = $self->readChunk();
print $$outRef if $status != AZ_OK && $status != AZ_STREAM_END;
=item rewindData()
Rewind data and set up for reading data streams or writing
zip files. Can take options for C<inflateInit()> or
C<deflateInit()>, but this is not likely to be necessary.
Subclass overrides should call this method. Returns C<AZ_OK>
on success.
=item endRead()
Reset the read variables and free the inflater or deflater.
Must be called to close files, etc. Returns AZ_OK on success.
=item readIsDone()
Return true if the read has run out of data or encountered an error.
=item contents()
Return the entire uncompressed member data or undef in scalar
context. When called in array context, returns C<( $string,
$status )>; status will be AZ_OK on success:
my $string = $member->contents();
# or
my ( $string, $status ) = $member->contents();
die "error $status" unless $status == AZ_OK;
Can also be used to set the contents of a member (this may
change the class of the member):
$member->contents( "this is my new contents" );
=item extractToFileHandle( $fh )
=item extractToFileHandle( { fileHandle => $fh } )
Extract (and uncompress, if necessary) the member's contents
to the given file handle. Return AZ_OK on success.
For members representing symbolic links, pass the name of the
symbolic link as file handle. Ensure that all directories in the
path to the symbolic link already exist.
=back
=head1 Archive::Zip::FileMember methods
The Archive::Zip::FileMember class extends Archive::Zip::Member. It is the
base class for both ZipFileMember and NewFileMember classes. This class adds
an C<externalFileName> and an C<fh> member to keep track of the external
file.
=over 4
=item externalFileName()
Return the member's external filename.
=item fh()
Return the member's read file handle. Automatically opens file if
necessary.
=back
=head1 Archive::Zip::ZipFileMember methods
The Archive::Zip::ZipFileMember class represents members that have been read
from external zip files.
=over 4
=item diskNumberStart()
Returns the disk number that the member's local header resides in.
Should be 0.
=item localHeaderRelativeOffset()
Returns the offset into the zip file where the member's local header
is.
=item dataOffset()
Returns the offset from the beginning of the zip file to the member's
data.
=back
=head1 REQUIRED MODULES
L<Archive::Zip> requires several other modules:
L<Carp>
L<Compress::Raw::Zlib>
L<Cwd>
L<File::Basename>
L<File::Copy>
L<File::Find>
L<File::Path>
L<File::Spec>
L<IO::File>
L<IO::Seekable>
L<Time::Local>
=head1 BUGS AND CAVEATS
=head2 When not to use Archive::Zip
If you are just going to be extracting zips (and/or other archives) you
are recommended to look at using L<Archive::Extract> instead, as it is much
easier to use and factors out archive-specific functionality.
=head2 Zip64 Format Support
Since version 1.66 Archive::Zip supports the so-called zip64
format, which overcomes various limitations in the original zip
file format. On some Perl interpreters, however, even version
1.66 and newer of Archive::Zip cannot support the zip64 format.
Among these are all Perl interpreters that lack 64-bit support
and those older than version 5.10.0.
Constant C<ZIP64_SUPPORTED>, exported with tag L<:CONSTANTS>,
equals true if Archive::Zip on the current Perl interpreter
supports the zip64 format. If it does not and you try to read or
write an archive in zip64 format, anyway, Archive::Zip returns an
error C<AZ_ERROR> and reports an error message along the lines of
"zip64 format not supported on this Perl interpreter".
=head2 C<versionMadeBy> and C<versionNeededToExtract>
The zip64 format and the zip file format in general specify what
values to use for the C<versionMadeBy> and
C<versionNeededToExtract> fields in the local file header,
central directory file header, and zip64 EOCD record. In
practice however, these fields seem to be more or less randomly
used by various archiver implementations.
To achieve a compromise between backward compatibility and
(whatever) standard compliance, Archive::Zip handles them as
follows:
=over 4
=item
For field C<versionMadeBy>, Archive::Zip uses default value 20
(45 for the zip64 EOCD record) or any previously read value. It
never changes that value when writing a header, even if it is
written in zip64 format, or when writing the zip64 EOCD record.
=item
Likewise for field C<versionNeededToExtract>, but here
Archive::Zip forces a minimum value of 45 when writing a header
in zip64 format or the zip64 EOCD record.
=item
Finally, Archive::Zip never depends on the values of these fields
in any way when reading an archive from a file or file handle.
=back
=head2 Try to avoid IO::Scalar
One of the most common ways to use Archive::Zip is to generate Zip files
in-memory. Most people use L<IO::Scalar> for this purpose.
Unfortunately, as of 1.11 this module no longer works with L<IO::Scalar>
as it incorrectly implements seeking.
Anybody using L<IO::Scalar> should consider porting to L<IO::String>,
which is smaller, lighter, and is implemented to be perfectly compatible
with regular seekable filehandles.
Support for L<IO::Scalar> most likely will B<not> be restored in the
future, as L<IO::Scalar> itself cannot change the way it is implemented
due to back-compatibility issues.
=head2 Wrong password for encrypted members
When an encrypted member is read using the wrong password, you currently
have to re-read the entire archive to try again with the correct password.
=head1 TO DO
* auto-choosing storing vs compression
* extra field hooks (see notes.txt)
* check for duplicates on addition/renaming?
* Text file extraction (line end translation)
* Reading zip files from non-seekable inputs
(Perhaps by proxying through IO::String?)
* separate unused constants into separate module
* cookbook style docs
* Handle tainted paths correctly
* Work on better compatibility with other IO:: modules
* Support encryption
* More user-friendly decryption
=head1 SUPPORT
Bugs should be reported on GitHub
L<https://github.com/redhotpenguin/perl-Archive-Zip/issues>
For other issues contact the maintainer.
=head1 AUTHOR
Currently maintained by Fred Moyer <fred@redhotpenguin.com>
Previously maintained by Adam Kennedy <adamk@cpan.org>
Previously maintained by Steve Peters E<lt>steve@fisharerojo.orgE<gt>.
File attributes code by Maurice Aubrey E<lt>maurice@lovelyfilth.comE<gt>.
Originally by Ned Konz E<lt>nedkonz@cpan.orgE<gt>.
=head1 COPYRIGHT
Some parts copyright 2006 - 2012 Adam Kennedy.
Some parts copyright 2005 Steve Peters.
Original work copyright 2000 - 2004 Ned Konz.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
Look at L<Archive::Zip::MemberRead> which is a wrapper that allows one to
read Zip archive members as if they were files.
L<Compress::Raw::Zlib>, L<Archive::Tar>, L<Archive::Extract>
=cut
ARCHIVE_ZIP
$fatpacked{"Archive/Zip/Archive.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_ARCHIVE';
package Archive::Zip::Archive;
# Represents a generic ZIP archive
use strict;
use File::Path;
use File::Find ();
use File::Spec ();
use File::Copy ();
use File::Basename;
use Cwd;
use Encode qw(encode_utf8 decode_utf8);
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw( Archive::Zip );
}
use Archive::Zip qw(
:CONSTANTS
:ERROR_CODES
:PKZIP_CONSTANTS
:UTILITY_METHODS
);
our $UNICODE;
our $UNTAINT = qr/\A(.+)\z/;
# Note that this returns undef on read errors, else new zip object.
sub new {
my $class = shift;
# Info-Zip 3.0 (I guess) seems to use the following values
# for the version fields in the zip64 EOCD record:
#
# version made by:
# 30 (plus upper byte indicating host system)
#
# version needed to extract:
# 45
my $self = bless(
{
'zip64' => 0,
'desiredZip64Mode' => ZIP64_AS_NEEDED,
'versionMadeBy' => 0,
'versionNeededToExtract' => 0,
'diskNumber' => 0,
'diskNumberWithStartOfCentralDirectory' =>
0,
'numberOfCentralDirectoriesOnThisDisk' =>
0, # should be # of members
'numberOfCentralDirectories' => 0, # should be # of members
'centralDirectorySize' => 0, # must re-compute on write
'centralDirectoryOffsetWRTStartingDiskNumber' =>
0, # must re-compute
'writeEOCDOffset' => 0,
'writeCentralDirectoryOffset' => 0,
'zipfileComment' => '',
'eocdOffset' => 0,
'fileName' => ''
},
$class
);
$self->{'members'} = [];
my $fileName = (ref($_[0]) eq 'HASH') ? shift->{filename} : shift;
if ($fileName) {
my $status = $self->read($fileName);
return $status == AZ_OK ? $self : undef;
}
return $self;
}
sub storeSymbolicLink {
my $self = shift;
$self->{'storeSymbolicLink'} = shift;
}
sub members {
@{shift->{'members'}};
}
sub numberOfMembers {
scalar(shift->members());
}
sub memberNames {
my $self = shift;
return map { $_->fileName() } $self->members();
}
# return ref to member with given name or undef
sub memberNamed {
my $self = shift;
my $fileName = (ref($_[0]) eq 'HASH') ? shift->{zipName} : shift;
foreach my $member ($self->members()) {
return $member if $member->fileName() eq $fileName;
}
return undef;
}
sub membersMatching {
my $self = shift;
my $pattern = (ref($_[0]) eq 'HASH') ? shift->{regex} : shift;
return grep { $_->fileName() =~ /$pattern/ } $self->members();
}
sub zip64 {
shift->{'zip64'};
}
sub desiredZip64Mode {
my $self = shift;
my $desiredZip64Mode = $self->{'desiredZip64Mode'};
if (@_) {
$self->{'desiredZip64Mode'} =
ref($_[0]) eq 'HASH' ? shift->{desiredZip64Mode} : shift;
}
return $desiredZip64Mode;
}
sub versionMadeBy {
shift->{'versionMadeBy'};
}
sub versionNeededToExtract {
shift->{'versionNeededToExtract'};
}
sub diskNumber {
shift->{'diskNumber'};
}
sub diskNumberWithStartOfCentralDirectory {
shift->{'diskNumberWithStartOfCentralDirectory'};
}
sub numberOfCentralDirectoriesOnThisDisk {
shift->{'numberOfCentralDirectoriesOnThisDisk'};
}
sub numberOfCentralDirectories {
shift->{'numberOfCentralDirectories'};
}
sub centralDirectorySize {
shift->{'centralDirectorySize'};
}
sub centralDirectoryOffsetWRTStartingDiskNumber {
shift->{'centralDirectoryOffsetWRTStartingDiskNumber'};
}
sub zipfileComment {
my $self = shift;
my $comment = $self->{'zipfileComment'};
if (@_) {
my $new_comment = (ref($_[0]) eq 'HASH') ? shift->{comment} : shift;
$self->{'zipfileComment'} = pack('C0a*', $new_comment); # avoid Unicode
}
return $comment;
}
sub eocdOffset {
shift->{'eocdOffset'};
}
# Return the name of the file last read.
sub fileName {
shift->{'fileName'};
}
sub removeMember {
my $self = shift;
my $member = (ref($_[0]) eq 'HASH') ? shift->{memberOrZipName} : shift;
$member = $self->memberNamed($member) unless ref($member);
return undef unless $member;
my @newMembers = grep { $_ != $member } $self->members();
$self->{'members'} = \@newMembers;
return $member;
}
sub replaceMember {
my $self = shift;
my ($oldMember, $newMember);
if (ref($_[0]) eq 'HASH') {
$oldMember = $_[0]->{memberOrZipName};
$newMember = $_[0]->{newMember};
} else {
($oldMember, $newMember) = @_;
}
$oldMember = $self->memberNamed($oldMember) unless ref($oldMember);
return undef unless $oldMember;
return undef unless $newMember;
my @newMembers =
map { ($_ == $oldMember) ? $newMember : $_ } $self->members();
$self->{'members'} = \@newMembers;
return $oldMember;
}
sub extractMember {
my $self = shift;
my ($member, $name);
if (ref($_[0]) eq 'HASH') {
$member = $_[0]->{memberOrZipName};
$name = $_[0]->{name};
} else {
($member, $name) = @_;
}
$member = $self->memberNamed($member) unless ref($member);
return _error('member not found') unless $member;
my $originalSize = $member->compressedSize();
my ($volumeName, $dirName, $fileName);
if (defined($name)) {
($volumeName, $dirName, $fileName) = File::Spec->splitpath($name);
$dirName = File::Spec->catpath($volumeName, $dirName, '');
} else {
$name = $member->fileName();
if ((my $ret = _extractionNameIsSafe($name))
!= AZ_OK) { return $ret; }
($dirName = $name) =~ s{[^/]*$}{};
$dirName = Archive::Zip::_asLocalName($dirName);
$name = Archive::Zip::_asLocalName($name);
}
if ($dirName && !-d $dirName) {
mkpath($dirName);
return _ioError("can't create dir $dirName") if (!-d $dirName);
}
my $rc = $member->extractToFileNamed($name, @_);
# TODO refactor this fix into extractToFileNamed()
$member->{'compressedSize'} = $originalSize;
return $rc;
}
sub extractMemberWithoutPaths {
my $self = shift;
my ($member, $name);
if (ref($_[0]) eq 'HASH') {
$member = $_[0]->{memberOrZipName};
$name = $_[0]->{name};
} else {
($member, $name) = @_;
}
$member = $self->memberNamed($member) unless ref($member);
return _error('member not found') unless $member;
my $originalSize = $member->compressedSize();
return AZ_OK if $member->isDirectory();
unless ($name) {
$name = $member->fileName();
$name =~ s{.*/}{}; # strip off directories, if any
if ((my $ret = _extractionNameIsSafe($name))
!= AZ_OK) { return $ret; }
$name = Archive::Zip::_asLocalName($name);
}
my $rc = $member->extractToFileNamed($name, @_);
$member->{'compressedSize'} = $originalSize;
return $rc;
}
sub addMember {
my $self = shift;
my $newMember = (ref($_[0]) eq 'HASH') ? shift->{member} : shift;
push(@{$self->{'members'}}, $newMember) if $newMember;
if($newMember && ($newMember->{bitFlag} & 0x800)
&& !utf8::is_utf8($newMember->{fileName})){
$newMember->{fileName} = Encode::decode_utf8($newMember->{fileName});
}
return $newMember;
}
sub addFile {
my $self = shift;
my ($fileName, $newName, $compressionLevel);
if (ref($_[0]) eq 'HASH') {
$fileName = $_[0]->{filename};
$newName = $_[0]->{zipName};
$compressionLevel = $_[0]->{compressionLevel};
} else {
($fileName, $newName, $compressionLevel) = @_;
}
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
$fileName = Win32::GetANSIPathName($fileName);
}
my $newMember = Archive::Zip::Member->newFromFile($fileName, $newName);
$newMember->desiredCompressionLevel($compressionLevel);
if ($self->{'storeSymbolicLink'} && -l $fileName) {
my $newMember =
Archive::Zip::Member->newFromString(readlink $fileName, $newName);
# For symbolic links, External File Attribute is set to 0xA1FF0000 by Info-ZIP
$newMember->{'externalFileAttributes'} = 0xA1FF0000;
$self->addMember($newMember);
} else {
$self->addMember($newMember);
}
return $newMember;
}
sub addString {
my $self = shift;
my ($stringOrStringRef, $name, $compressionLevel);
if (ref($_[0]) eq 'HASH') {
$stringOrStringRef = $_[0]->{string};
$name = $_[0]->{zipName};
$compressionLevel = $_[0]->{compressionLevel};
} else {
($stringOrStringRef, $name, $compressionLevel) = @_;
}
my $newMember =
Archive::Zip::Member->newFromString($stringOrStringRef, $name);
$newMember->desiredCompressionLevel($compressionLevel);
return $self->addMember($newMember);
}
sub addDirectory {
my $self = shift;
my ($name, $newName);
if (ref($_[0]) eq 'HASH') {
$name = $_[0]->{directoryName};
$newName = $_[0]->{zipName};
} else {
($name, $newName) = @_;
}
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
$name = Win32::GetANSIPathName($name);
}
my $newMember = Archive::Zip::Member->newDirectoryNamed($name, $newName);
if ($self->{'storeSymbolicLink'} && -l $name) {
my $link = readlink $name;
($newName =~ s{/$}{}) if $newName; # Strip trailing /
my $newMember = Archive::Zip::Member->newFromString($link, $newName);
# For symbolic links, External File Attribute is set to 0xA1FF0000 by Info-ZIP
$newMember->{'externalFileAttributes'} = 0xA1FF0000;
$self->addMember($newMember);
} else {
$self->addMember($newMember);
}
return $newMember;
}
# add either a file or a directory.
sub addFileOrDirectory {
my $self = shift;
my ($name, $newName, $compressionLevel);
if (ref($_[0]) eq 'HASH') {
$name = $_[0]->{name};
$newName = $_[0]->{zipName};
$compressionLevel = $_[0]->{compressionLevel};
} else {
($name, $newName, $compressionLevel) = @_;
}
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
$name = Win32::GetANSIPathName($name);
}
$name =~ s{/$}{};
if ($newName) {
$newName =~ s{/$}{};
} else {
$newName = $name;
}
if (-f $name) {
return $self->addFile($name, $newName, $compressionLevel);
} elsif (-d $name) {
return $self->addDirectory($name, $newName);
} else {
return _error("$name is neither a file nor a directory");
}
}
sub contents {
my $self = shift;
my ($member, $newContents);
if (ref($_[0]) eq 'HASH') {
$member = $_[0]->{memberOrZipName};
$newContents = $_[0]->{contents};
} else {
($member, $newContents) = @_;
}
my ($contents, $status) = (undef, AZ_OK);
if ($status == AZ_OK) {
$status = _error('No member name given') unless defined($member);
}
if ($status == AZ_OK && ! ref($member)) {
my $memberName = $member;
$member = $self->memberNamed($memberName);
$status = _error('No member named $memberName') unless defined($member);
}
if ($status == AZ_OK) {
($contents, $status) = $member->contents($newContents);
}
return
wantarray
? ($contents, $status)
: $contents;
}
sub writeToFileNamed {
my $self = shift;
my $fileName =
(ref($_[0]) eq 'HASH') ? shift->{filename} : shift; # local FS format
foreach my $member ($self->members()) {
if ($member->_usesFileNamed($fileName)) {
return _error("$fileName is needed by member "
. $member->fileName()
. "; consider using overwrite() or overwriteAs() instead.");
}
}
my ($status, $fh) = _newFileHandle($fileName, 'w');
return _ioError("Can't open $fileName for write") unless $status;
$status = $self->writeToFileHandle($fh, 1);
$fh->close();
$fh = undef;
return $status;
}
# It is possible to write data to the FH before calling this,
# perhaps to make a self-extracting archive.
sub writeToFileHandle {
my $self = shift;
my ($fh, $fhIsSeekable);
if (ref($_[0]) eq 'HASH') {
$fh = $_[0]->{fileHandle};
$fhIsSeekable =
exists($_[0]->{seek}) ? $_[0]->{seek} : _isSeekable($fh);
} else {
$fh = shift;
$fhIsSeekable = @_ ? shift : _isSeekable($fh);
}
return _error('No filehandle given') unless $fh;
return _ioError('filehandle not open') unless $fh->opened();
_binmode($fh);
# Find out where the current position is.
my $offset = $fhIsSeekable ? $fh->tell() : 0;
$offset = 0 if $offset < 0;
# (Re-)set the "was-successfully-written" flag so that the
# contract advertised in the documentation ("that member and
# *all following it* will return false from wasWritten()")
# also holds for members written more than once.
#
# Not sure whether that mechanism works, anyway. If method
# $member->_writeToFileHandle fails with an error below and
# user continues with calling $zip->writeCentralDirectory
# manually, we should end up with the following picture
# unless the user seeks back to writeCentralDirectoryOffset:
#
# ...
# [last successfully written member]
# <- writeCentralDirectoryOffset points here
# [half-written member junk with unknown size]
# [central directory entry 0]
# ...
foreach my $member ($self->members()) {
$member->{'wasWritten'} = 0;
}
foreach my $member ($self->members()) {
# (Re-)set object member zip64 flag. Here is what
# happens next to that flag:
#
# $member->_writeToFileHandle
# Determines a local flag value depending on
# necessity and user desire and ors it to
# the object member
# $member->_writeLocalFileHeader
# Queries the object member to write appropriate
# local header
# $member->_writeDataDescriptor
# Queries the object member to write appropriate
# data descriptor
# $member->_writeCentralDirectoryFileHeader
# Determines a local flag value depending on
# necessity and user desire. Writes a central
# directory header appropriate to the local flag.
# Ors the local flag to the object member.
$member->{'zip64'} = 0;
my ($status, $memberSize) =
$member->_writeToFileHandle($fh, $fhIsSeekable, $offset,
$self->desiredZip64Mode());
$member->endRead();
return $status if $status != AZ_OK;
$offset += $memberSize;
# Change this so it reflects write status and last
# successful position
$member->{'wasWritten'} = 1;
$self->{'writeCentralDirectoryOffset'} = $offset;
}
return $self->writeCentralDirectory($fh);
}
# Write zip back to the original file,
# as safely as possible.
# Returns AZ_OK if successful.
sub overwrite {
my $self = shift;
return $self->overwriteAs($self->{'fileName'});
}
# Write zip to the specified file,
# as safely as possible.
# Returns AZ_OK if successful.
sub overwriteAs {
my $self = shift;
my $zipName = (ref($_[0]) eq 'HASH') ? $_[0]->{filename} : shift;
return _error("no filename in overwriteAs()") unless defined($zipName);
my ($fh, $tempName) = Archive::Zip::tempFile();
return _error("Can't open temp file", $!) unless $fh;
(my $backupName = $zipName) =~ s{(\.[^.]*)?$}{.zbk};
my $status = $self->writeToFileHandle($fh);
$fh->close();
$fh = undef;
if ($status != AZ_OK) {
unlink($tempName);
_printError("Can't write to $tempName");
return $status;
}
my $err;
# rename the zip
if (-f $zipName && !rename($zipName, $backupName)) {
$err = $!;
unlink($tempName);
return _error("Can't rename $zipName as $backupName", $err);
}
# move the temp to the original name (possibly copying)
unless (File::Copy::move($tempName, $zipName)
|| File::Copy::copy($tempName, $zipName)) {
$err = $!;
rename($backupName, $zipName);
unlink($tempName);
return _error("Can't move $tempName to $zipName", $err);
}
# unlink the backup
if (-f $backupName && !unlink($backupName)) {
$err = $!;
return _error("Can't unlink $backupName", $err);
}
return AZ_OK;
}
# Used only during writing
sub _writeCentralDirectoryOffset {
shift->{'writeCentralDirectoryOffset'};
}
sub _writeEOCDOffset {
shift->{'writeEOCDOffset'};
}
# Expects to have _writeEOCDOffset() set
sub _writeEndOfCentralDirectory {
my ($self, $fh, $membersZip64) = @_;
my $zip64 = 0;
my $versionMadeBy = $self->versionMadeBy();
my $versionNeededToExtract = $self->versionNeededToExtract();
my $diskNumber = 0;
my $diskNumberWithStartOfCentralDirectory = 0;
my $numberOfCentralDirectoriesOnThisDisk = $self->numberOfMembers();
my $numberOfCentralDirectories = $self->numberOfMembers();
my $centralDirectorySize =
$self->_writeEOCDOffset() - $self->_writeCentralDirectoryOffset();
my $centralDirectoryOffsetWRTStartingDiskNumber =
$self->_writeCentralDirectoryOffset();
my $zipfileCommentLength = length($self->zipfileComment());
my $eocdDataZip64 = 0;
$eocdDataZip64 ||= $numberOfCentralDirectoriesOnThisDisk > 0xffff;
$eocdDataZip64 ||= $numberOfCentralDirectories > 0xffff;
$eocdDataZip64 ||= $centralDirectorySize > 0xffffffff;
$eocdDataZip64 ||= $centralDirectoryOffsetWRTStartingDiskNumber > 0xffffffff;
if ( $membersZip64
|| $eocdDataZip64
|| $self->desiredZip64Mode() == ZIP64_EOCD) {
return _zip64NotSupported() unless ZIP64_SUPPORTED;
$zip64 = 1;
$versionMadeBy = 45 if ($versionMadeBy == 0);
$versionNeededToExtract = 45 if ($versionNeededToExtract < 45);
$self->_print($fh, ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE_STRING)
or return _ioError('writing zip64 EOCD record signature');
my $record = pack(
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT,
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH +
SIGNATURE_LENGTH - 12,
$versionMadeBy,
$versionNeededToExtract,
$diskNumber,
$diskNumberWithStartOfCentralDirectory,
$numberOfCentralDirectoriesOnThisDisk,
$numberOfCentralDirectories,
$centralDirectorySize,
$centralDirectoryOffsetWRTStartingDiskNumber
);
$self->_print($fh, $record)
or return _ioError('writing zip64 EOCD record');
$self->_print($fh, ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE_STRING)
or return _ioError('writing zip64 EOCD locator signature');
my $locator = pack(
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT,
0,
$self->_writeEOCDOffset(),
1
);
$self->_print($fh, $locator)
or return _ioError('writing zip64 EOCD locator');
}
$self->_print($fh, END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING)
or return _ioError('writing EOCD Signature');
my $header = pack(
END_OF_CENTRAL_DIRECTORY_FORMAT,
$diskNumber,
$diskNumberWithStartOfCentralDirectory,
$numberOfCentralDirectoriesOnThisDisk > 0xffff
? 0xffff : $numberOfCentralDirectoriesOnThisDisk,
$numberOfCentralDirectories > 0xffff
? 0xffff : $numberOfCentralDirectories,
$centralDirectorySize > 0xffffffff
? 0xffffffff : $centralDirectorySize,
$centralDirectoryOffsetWRTStartingDiskNumber > 0xffffffff
? 0xffffffff : $centralDirectoryOffsetWRTStartingDiskNumber,
$zipfileCommentLength
);
$self->_print($fh, $header)
or return _ioError('writing EOCD header');
if ($zipfileCommentLength) {
$self->_print($fh, $self->zipfileComment())
or return _ioError('writing zipfile comment');
}
# Adjust object members related to zip64 format
$self->{'zip64'} = $zip64;
$self->{'versionMadeBy'} = $versionMadeBy;
$self->{'versionNeededToExtract'} = $versionNeededToExtract;
return AZ_OK;
}
# $offset can be specified to truncate a zip file.
sub writeCentralDirectory {
my $self = shift;
my ($fh, $offset);
if (ref($_[0]) eq 'HASH') {
$fh = $_[0]->{fileHandle};
$offset = $_[0]->{offset};
} else {
($fh, $offset) = @_;
}
if (defined($offset)) {
$self->{'writeCentralDirectoryOffset'} = $offset;
$fh->seek($offset, IO::Seekable::SEEK_SET)
or return _ioError('seeking to write central directory');
} else {
$offset = $self->_writeCentralDirectoryOffset();
}
my $membersZip64 = 0;
foreach my $member ($self->members()) {
my ($status, $headerSize) =
$member->_writeCentralDirectoryFileHeader($fh, $self->desiredZip64Mode());
return $status if $status != AZ_OK;
$membersZip64 ||= $member->zip64();
$offset += $headerSize;
$self->{'writeEOCDOffset'} = $offset;
}
return $self->_writeEndOfCentralDirectory($fh, $membersZip64);
}
sub read {
my $self = shift;
my $fileName = (ref($_[0]) eq 'HASH') ? shift->{filename} : shift;
return _error('No filename given') unless $fileName;
my ($status, $fh) = _newFileHandle($fileName, 'r');
return _ioError("opening $fileName for read") unless $status;
$status = $self->readFromFileHandle($fh, $fileName);
return $status if $status != AZ_OK;
$fh->close();
$self->{'fileName'} = $fileName;
return AZ_OK;
}
sub readFromFileHandle {
my $self = shift;
my ($fh, $fileName);
if (ref($_[0]) eq 'HASH') {
$fh = $_[0]->{fileHandle};
$fileName = $_[0]->{filename};
} else {
($fh, $fileName) = @_;
}
$fileName = $fh unless defined($fileName);
return _error('No filehandle given') unless $fh;
return _ioError('filehandle not open') unless $fh->opened();
_binmode($fh);
$self->{'fileName'} = "$fh";
# TODO: how to support non-seekable zips?
return _error('file not seekable')
unless _isSeekable($fh);
$fh->seek(0, 0); # rewind the file
my $status = $self->_findEndOfCentralDirectory($fh);
return $status if $status != AZ_OK;
my $eocdPosition;
($status, $eocdPosition) = $self->_readEndOfCentralDirectory($fh, $fileName);
return $status if $status != AZ_OK;
my $zip64 = $self->zip64();
$fh->seek($eocdPosition - $self->centralDirectorySize(),
IO::Seekable::SEEK_SET)
or return _ioError("Can't seek $fileName");
# Try to detect garbage at beginning of archives
# This should be 0
$self->{'eocdOffset'} = $eocdPosition - $self->centralDirectorySize() # here
- $self->centralDirectoryOffsetWRTStartingDiskNumber();
for (; ;) {
my $newMember =
Archive::Zip::Member->_newFromZipFile($fh, $fileName, $zip64,
$self->eocdOffset());
my $signature;
($status, $signature) = _readSignature($fh, $fileName);
return $status if $status != AZ_OK;
if (! $zip64) {
last if $signature == END_OF_CENTRAL_DIRECTORY_SIGNATURE;
}
else {
last if $signature == ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE;
}
$status = $newMember->_readCentralDirectoryFileHeader();
return $status if $status != AZ_OK;
$status = $newMember->endRead();
return $status if $status != AZ_OK;
if ($newMember->isDirectory()) {
$newMember->_become('Archive::Zip::DirectoryMember');
# Ensure above call suceeded to avoid future trouble
$newMember->_ISA('Archive::Zip::DirectoryMember') or
return $self->_error('becoming Archive::Zip::DirectoryMember');
}
if(($newMember->{bitFlag} & 0x800) && !utf8::is_utf8($newMember->{fileName})){
$newMember->{fileName} = Encode::decode_utf8($newMember->{fileName});
}
push(@{$self->{'members'}}, $newMember);
}
return AZ_OK;
}
# Read EOCD, starting from position before signature.
# Checks for a zip64 EOCD record and uses that if present.
#
# Return AZ_OK (in scalar context) or a pair (AZ_OK,
# $eocdPosition) (in list context) on success:
# ( $status, $eocdPosition ) = $zip->_readEndOfCentralDirectory( $fh, $fileName );
# where the returned EOCD position either points to the beginning
# of the EOCD or to the beginning of the zip64 EOCD record.
#
# APPNOTE.TXT as of version 6.3.6 is a bit vague on the
# "ZIP64(tm) format". It has a lot of conditions like "if an
# archive is in ZIP64 format", but never explicitly mentions
# *when* an archive is in that format. (Or at least I haven't
# found it.)
#
# So I decided that an archive is in ZIP64 format if zip64 EOCD
# locator and zip64 EOCD record are present before the EOCD with
# the format given in the specification.
sub _readEndOfCentralDirectory {
my $self = shift;
my $fh = shift;
my $fileName = shift;
# Remember current position, which is just before the EOCD
# signature
my $eocdPosition = $fh->tell();
# Reset the zip64 format flag
$self->{'zip64'} = 0;
my $zip64EOCDPosition;
# Check for zip64 EOCD locator and zip64 EOCD record. Be
# extra careful here to not interpret any random data as
# zip64 data structures. If in doubt, silently continue
# reading the regular EOCD.
NOZIP64:
{
# Do not even start looking for any zip64 structures if
# that would not be supported.
if (! ZIP64_SUPPORTED) {
last NOZIP64;
}
if ($eocdPosition < ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH + SIGNATURE_LENGTH) {
last NOZIP64;
}
# Skip to before potential zip64 EOCD locator
$fh->seek(-(ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH) - SIGNATURE_LENGTH,
IO::Seekable::SEEK_CUR)
or return _ioError("seeking to before zip 64 EOCD locator");
my $zip64EOCDLocatorPosition =
$eocdPosition - ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH - SIGNATURE_LENGTH;
my $status;
my $bytesRead;
# Read potential zip64 EOCD locator signature
$status =
_readSignature($fh, $fileName,
ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE, 1);
return $status if $status == AZ_IO_ERROR;
if ($status == AZ_FORMAT_ERROR) {
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET)
or return _ioError("seeking to EOCD");
last NOZIP64;
}
# Read potential zip64 EOCD locator and verify it
my $locator = '';
$bytesRead = $fh->read($locator, ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH);
if ($bytesRead != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_LENGTH) {
return _ioError("reading zip64 EOCD locator");
}
(undef, $zip64EOCDPosition, undef) =
unpack(ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_FORMAT, $locator);
if ($zip64EOCDPosition >
($zip64EOCDLocatorPosition - ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH - SIGNATURE_LENGTH)) {
# No need to seek to EOCD since we're already there
last NOZIP64;
}
# Skip to potential zip64 EOCD record
$fh->seek($zip64EOCDPosition, IO::Seekable::SEEK_SET)
or return _ioError("seeking to zip64 EOCD record");
# Read potential zip64 EOCD record signature
$status =
_readSignature($fh, $fileName,
ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_SIGNATURE, 1);
return $status if $status == AZ_IO_ERROR;
if ($status == AZ_FORMAT_ERROR) {
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET)
or return _ioError("seeking to EOCD");
last NOZIP64;
}
# Read potential zip64 EOCD record. Ignore the zip64
# extensible data sector.
my $record = '';
$bytesRead = $fh->read($record, ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH);
if ($bytesRead != ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_LENGTH) {
return _ioError("reading zip64 EOCD record");
}
# Perform one final check, hoping that all implementors
# follow the recommendation of the specification
# regarding the size of the zip64 EOCD record
my ($zip64EODCRecordSize) = unpack("Q<", $record);
if ($zip64EOCDPosition + 12 + $zip64EODCRecordSize != $zip64EOCDLocatorPosition) {
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET)
or return _ioError("seeking to EOCD");
last NOZIP64;
}
$self->{'zip64'} = 1;
(
undef,
$self->{'versionMadeBy'},
$self->{'versionNeededToExtract'},
$self->{'diskNumber'},
$self->{'diskNumberWithStartOfCentralDirectory'},
$self->{'numberOfCentralDirectoriesOnThisDisk'},
$self->{'numberOfCentralDirectories'},
$self->{'centralDirectorySize'},
$self->{'centralDirectoryOffsetWRTStartingDiskNumber'}
) = unpack(ZIP64_END_OF_CENTRAL_DIRECTORY_RECORD_FORMAT, $record);
# Don't just happily bail out, we still need to read the
# zip file comment!
$fh->seek($eocdPosition, IO::Seekable::SEEK_SET)
or return _ioError("seeking to EOCD");
}
# Skip past signature
$fh->seek(SIGNATURE_LENGTH, IO::Seekable::SEEK_CUR)
or return _ioError("seeking past EOCD signature");
my $header = '';
my $bytesRead = $fh->read($header, END_OF_CENTRAL_DIRECTORY_LENGTH);
if ($bytesRead != END_OF_CENTRAL_DIRECTORY_LENGTH) {
return _ioError("reading end of central directory");
}
my $zipfileCommentLength;
if (! $self->{'zip64'}) {
(
$self->{'diskNumber'},
$self->{'diskNumberWithStartOfCentralDirectory'},
$self->{'numberOfCentralDirectoriesOnThisDisk'},
$self->{'numberOfCentralDirectories'},
$self->{'centralDirectorySize'},
$self->{'centralDirectoryOffsetWRTStartingDiskNumber'},
$zipfileCommentLength
) = unpack(END_OF_CENTRAL_DIRECTORY_FORMAT, $header);
if ( $self->{'diskNumber'} == 0xffff
|| $self->{'diskNumberWithStartOfCentralDirectory'} == 0xffff
|| $self->{'numberOfCentralDirectoriesOnThisDisk'} == 0xffff
|| $self->{'numberOfCentralDirectories'} == 0xffff
|| $self->{'centralDirectorySize'} == 0xffffffff
|| $self->{'centralDirectoryOffsetWRTStartingDiskNumber'} == 0xffffffff) {
if (ZIP64_SUPPORTED) {
return _formatError("unexpected zip64 marker values in EOCD");
}
else {
return _zip64NotSupported();
}
}
}
else {
(
undef,
undef,
undef,
undef,
undef,
undef,
$zipfileCommentLength
) = unpack(END_OF_CENTRAL_DIRECTORY_FORMAT, $header);
}
if ($zipfileCommentLength) {
my $zipfileComment = '';
$bytesRead = $fh->read($zipfileComment, $zipfileCommentLength);
if ($bytesRead != $zipfileCommentLength) {
return _ioError("reading zipfile comment");
}
$self->{'zipfileComment'} = $zipfileComment;
}
if (! $self->{'zip64'}) {
return
wantarray
? (AZ_OK, $eocdPosition)
: AZ_OK;
}
else {
return
wantarray
? (AZ_OK, $zip64EOCDPosition)
: AZ_OK;
}
}
# Seek in my file to the end, then read backwards until we find the
# signature of the central directory record. Leave the file positioned right
# before the signature. Returns AZ_OK if success.
sub _findEndOfCentralDirectory {
my $self = shift;
my $fh = shift;
my $data = '';
$fh->seek(0, IO::Seekable::SEEK_END)
or return _ioError("seeking to end");
my $fileLength = $fh->tell();
if ($fileLength < END_OF_CENTRAL_DIRECTORY_LENGTH + 4) {
return _formatError("file is too short");
}
my $seekOffset = 0;
my $pos = -1;
for (; ;) {
$seekOffset += 512;
$seekOffset = $fileLength if ($seekOffset > $fileLength);
$fh->seek(-$seekOffset, IO::Seekable::SEEK_END)
or return _ioError("seek failed");
my $bytesRead = $fh->read($data, $seekOffset);
if ($bytesRead != $seekOffset) {
return _ioError("read failed");
}
$pos = rindex($data, END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING);
last
if ( $pos >= 0
or $seekOffset == $fileLength
or $seekOffset >= $Archive::Zip::ChunkSize);
}
if ($pos >= 0) {
$fh->seek($pos - $seekOffset, IO::Seekable::SEEK_CUR)
or return _ioError("seeking to EOCD");
return AZ_OK;
} else {
return _formatError("can't find EOCD signature");
}
}
# Used to avoid taint problems when chdir'ing.
# Not intended to increase security in any way; just intended to shut up the -T
# complaints. If your Cwd module is giving you unreliable returns from cwd()
# you have bigger problems than this.
sub _untaintDir {
my $dir = shift;
$dir =~ m/$UNTAINT/s;
return $1;
}
sub addTree {
my $self = shift;
my ($root, $dest, $pred, $compressionLevel);
if (ref($_[0]) eq 'HASH') {
$root = $_[0]->{root};
$dest = $_[0]->{zipName};
$pred = $_[0]->{select};
$compressionLevel = $_[0]->{compressionLevel};
} else {
($root, $dest, $pred, $compressionLevel) = @_;
}
return _error("root arg missing in call to addTree()")
unless defined($root);
$dest = '' unless defined($dest);
$pred = sub { -r }
unless defined($pred);
my @files;
my $startDir = _untaintDir(cwd());
return _error('undef returned by _untaintDir on cwd ', cwd())
unless $startDir;
# This avoids chdir'ing in Find, in a way compatible with older
# versions of File::Find.
my $wanted = sub {
local $main::_ = $File::Find::name;
my $dir = _untaintDir($File::Find::dir);
chdir($startDir);
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
push(@files, Win32::GetANSIPathName($File::Find::name)) if (&$pred);
$dir = Win32::GetANSIPathName($dir);
} else {
push(@files, $File::Find::name) if (&$pred);
}
chdir($dir);
};
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
$root = Win32::GetANSIPathName($root);
}
# File::Find will not untaint unless you explicitly pass the flag and regex pattern.
File::Find::find({ wanted => $wanted, untaint => 1, untaint_pattern => $UNTAINT }, $root);
my $rootZipName = _asZipDirName($root, 1); # with trailing slash
my $pattern = $rootZipName eq './' ? '^' : "^\Q$rootZipName\E";
$dest = _asZipDirName($dest, 1); # with trailing slash
foreach my $fileName (@files) {
my $isDir;
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
$isDir = -d Win32::GetANSIPathName($fileName);
} else {
$isDir = -d $fileName;
}
# normalize, remove leading ./
my $archiveName = _asZipDirName($fileName, $isDir);
if ($archiveName eq $rootZipName) { $archiveName = $dest }
else { $archiveName =~ s{$pattern}{$dest} }
next if $archiveName =~ m{^\.?/?$}; # skip current dir
my $member =
$isDir
? $self->addDirectory($fileName, $archiveName)
: $self->addFile($fileName, $archiveName);
$member->desiredCompressionLevel($compressionLevel);
return _error("add $fileName failed in addTree()") if !$member;
}
return AZ_OK;
}
sub addTreeMatching {
my $self = shift;
my ($root, $dest, $pattern, $pred, $compressionLevel);
if (ref($_[0]) eq 'HASH') {
$root = $_[0]->{root};
$dest = $_[0]->{zipName};
$pattern = $_[0]->{pattern};
$pred = $_[0]->{select};
$compressionLevel = $_[0]->{compressionLevel};
} else {
($root, $dest, $pattern, $pred, $compressionLevel) = @_;
}
return _error("root arg missing in call to addTreeMatching()")
unless defined($root);
$dest = '' unless defined($dest);
return _error("pattern missing in call to addTreeMatching()")
unless defined($pattern);
my $matcher =
$pred ? sub { m{$pattern} && &$pred } : sub { m{$pattern} && -r };
return $self->addTree($root, $dest, $matcher, $compressionLevel);
}
# Check if one of the components of a path to the file or the file name
# itself is an already existing symbolic link. If yes then return an
# error. Continuing and writing to a file traversing a link posseses
# a security threat, especially if the link was extracted from an
# attacker-supplied archive. This would allow writing to an arbitrary
# file. The same applies when using ".." to escape from a working
# directory. <https://bugzilla.redhat.com/show_bug.cgi?id=1591449>
sub _extractionNameIsSafe {
my $name = shift;
my ($volume, $directories) = File::Spec->splitpath($name, 1);
my @directories = File::Spec->splitdir($directories);
if (grep '..' eq $_, @directories) {
return _error(
"Could not extract $name safely: a parent directory is used");
}
my @path;
my $path;
for my $directory (@directories) {
push @path, $directory;
$path = File::Spec->catpath($volume, File::Spec->catdir(@path), '');
if (-l $path) {
return _error(
"Could not extract $name safely: $path is an existing symbolic link");
}
if (!-e $path) {
last;
}
}
return AZ_OK;
}
# $zip->extractTree( $root, $dest [, $volume] );
#
# $root and $dest are Unix-style.
# $volume is in local FS format.
#
sub extractTree {
my $self = shift;
my ($root, $dest, $volume);
if (ref($_[0]) eq 'HASH') {
$root = $_[0]->{root};
$dest = $_[0]->{zipName};
$volume = $_[0]->{volume};
} else {
($root, $dest, $volume) = @_;
}
$root = '' unless defined($root);
if (defined $dest) {
if ($dest !~ m{/$}) {
$dest .= '/';
}
} else {
$dest = './';
}
my $pattern = "^\Q$root";
my @members = $self->membersMatching($pattern);
foreach my $member (@members) {
my $fileName = $member->fileName(); # in Unix format
$fileName =~ s{$pattern}{$dest}; # in Unix format
# convert to platform format:
$fileName = Archive::Zip::_asLocalName($fileName, $volume);
if ((my $ret = _extractionNameIsSafe($fileName))
!= AZ_OK) { return $ret; }
my $status = $member->extractToFileNamed($fileName);
return $status if $status != AZ_OK;
}
return AZ_OK;
}
# $zip->updateMember( $memberOrName, $fileName );
# Returns (possibly updated) member, if any; undef on errors.
sub updateMember {
my $self = shift;
my ($oldMember, $fileName);
if (ref($_[0]) eq 'HASH') {
$oldMember = $_[0]->{memberOrZipName};
$fileName = $_[0]->{name};
} else {
($oldMember, $fileName) = @_;
}
if (!defined($fileName)) {
_error("updateMember(): missing fileName argument");
return undef;
}
my @newStat = stat($fileName);
if (!@newStat) {
_ioError("Can't stat $fileName");
return undef;
}
my $isDir = -d _;
my $memberName;
if (ref($oldMember)) {
$memberName = $oldMember->fileName();
} else {
$oldMember = $self->memberNamed($memberName = $oldMember)
|| $self->memberNamed($memberName =
_asZipDirName($oldMember, $isDir));
}
unless (defined($oldMember)
&& $oldMember->lastModTime() == $newStat[9]
&& $oldMember->isDirectory() == $isDir
&& ($isDir || ($oldMember->uncompressedSize() == $newStat[7]))) {
# create the new member
my $newMember =
$isDir
? Archive::Zip::Member->newDirectoryNamed($fileName, $memberName)
: Archive::Zip::Member->newFromFile($fileName, $memberName);
unless (defined($newMember)) {
_error("creation of member $fileName failed in updateMember()");
return undef;
}
# replace old member or append new one
if (defined($oldMember)) {
$self->replaceMember($oldMember, $newMember);
} else {
$self->addMember($newMember);
}
return $newMember;
}
return $oldMember;
}
# $zip->updateTree( $root, [ $dest, [ $pred [, $mirror]]] );
#
# This takes the same arguments as addTree, but first checks to see
# whether the file or directory already exists in the zip file.
#
# If the fourth argument $mirror is true, then delete all my members
# if corresponding files were not found.
sub updateTree {
my $self = shift;
my ($root, $dest, $pred, $mirror, $compressionLevel);
if (ref($_[0]) eq 'HASH') {
$root = $_[0]->{root};
$dest = $_[0]->{zipName};
$pred = $_[0]->{select};
$mirror = $_[0]->{mirror};
$compressionLevel = $_[0]->{compressionLevel};
} else {
($root, $dest, $pred, $mirror, $compressionLevel) = @_;
}
return _error("root arg missing in call to updateTree()")
unless defined($root);
$dest = '' unless defined($dest);
$pred = sub { -r }
unless defined($pred);
$dest = _asZipDirName($dest, 1);
my $rootZipName = _asZipDirName($root, 1); # with trailing slash
my $pattern = $rootZipName eq './' ? '^' : "^\Q$rootZipName\E";
my @files;
my $startDir = _untaintDir(cwd());
return _error('undef returned by _untaintDir on cwd ', cwd())
unless $startDir;
# This avoids chdir'ing in Find, in a way compatible with older
# versions of File::Find.
my $wanted = sub {
local $main::_ = $File::Find::name;
my $dir = _untaintDir($File::Find::dir);
chdir($startDir);
push(@files, $File::Find::name) if (&$pred);
chdir($dir);
};
File::Find::find($wanted, $root);
# Now @files has all the files that I could potentially be adding to
# the zip. Only add the ones that are necessary.
# For each file (updated or not), add its member name to @done.
my %done;
foreach my $fileName (@files) {
my @newStat = stat($fileName);
my $isDir = -d _;
# normalize, remove leading ./
my $memberName = _asZipDirName($fileName, $isDir);
if ($memberName eq $rootZipName) { $memberName = $dest }
else { $memberName =~ s{$pattern}{$dest} }
next if $memberName =~ m{^\.?/?$}; # skip current dir
$done{$memberName} = 1;
my $changedMember = $self->updateMember($memberName, $fileName);
$changedMember->desiredCompressionLevel($compressionLevel);
return _error("updateTree failed to update $fileName")
unless ref($changedMember);
}
# @done now has the archive names corresponding to all the found files.
# If we're mirroring, delete all those members that aren't in @done.
if ($mirror) {
foreach my $member ($self->members()) {
$self->removeMember($member)
unless $done{$member->fileName()};
}
}
return AZ_OK;
}
1;
ARCHIVE_ZIP_ARCHIVE
$fatpacked{"Archive/Zip/BufferedFileHandle.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_BUFFEREDFILEHANDLE';
package Archive::Zip::BufferedFileHandle;
# File handle that uses a string internally and can seek
# This is given as a demo for getting a zip file written
# to a string.
# I probably should just use IO::Scalar instead.
# Ned Konz, March 2000
use strict;
use IO::File;
use Carp;
use vars qw{$VERSION};
BEGIN {
$VERSION = '1.68';
$VERSION = eval $VERSION;
}
sub new {
my $class = shift || __PACKAGE__;
$class = ref($class) || $class;
my $self = bless(
{
content => '',
position => 0,
size => 0
},
$class
);
return $self;
}
# Utility method to read entire file
sub readFromFile {
my $self = shift;
my $fileName = shift;
my $fh = IO::File->new($fileName, "r");
CORE::binmode($fh);
if (!$fh) {
Carp::carp("Can't open $fileName: $!\n");
return undef;
}
local $/ = undef;
$self->{content} = <$fh>;
$self->{size} = length($self->{content});
return $self;
}
sub contents {
my $self = shift;
if (@_) {
$self->{content} = shift;
$self->{size} = length($self->{content});
}
return $self->{content};
}
sub binmode { 1 }
sub close { 1 }
sub opened { 1 }
sub eof {
my $self = shift;
return $self->{position} >= $self->{size};
}
sub seek {
my $self = shift;
my $pos = shift;
my $whence = shift;
# SEEK_SET
if ($whence == 0) { $self->{position} = $pos; }
# SEEK_CUR
elsif ($whence == 1) { $self->{position} += $pos; }
# SEEK_END
elsif ($whence == 2) { $self->{position} = $self->{size} + $pos; }
else { return 0; }
return 1;
}
sub tell { return shift->{position}; }
# Copy my data to given buffer
sub read {
my $self = shift;
my $buf = \($_[0]);
shift;
my $len = shift;
my $offset = shift || 0;
$$buf = '' if not defined($$buf);
my $bytesRead =
($self->{position} + $len > $self->{size})
? ($self->{size} - $self->{position})
: $len;
substr($$buf, $offset, $bytesRead) =
substr($self->{content}, $self->{position}, $bytesRead);
$self->{position} += $bytesRead;
return $bytesRead;
}
# Copy given buffer to me
sub write {
my $self = shift;
my $buf = \($_[0]);
shift;
my $len = shift;
my $offset = shift || 0;
$$buf = '' if not defined($$buf);
my $bufLen = length($$buf);
my $bytesWritten =
($offset + $len > $bufLen)
? $bufLen - $offset
: $len;
substr($self->{content}, $self->{position}, $bytesWritten) =
substr($$buf, $offset, $bytesWritten);
$self->{size} = length($self->{content});
return $bytesWritten;
}
sub clearerr() { 1 }
1;
ARCHIVE_ZIP_BUFFEREDFILEHANDLE
$fatpacked{"Archive/Zip/DirectoryMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_DIRECTORYMEMBER';
package Archive::Zip::DirectoryMember;
use strict;
use File::Path;
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw( Archive::Zip::Member );
}
use Archive::Zip qw(
:ERROR_CODES
:UTILITY_METHODS
);
sub _newNamed {
my $class = shift;
my $fileName = shift; # FS name
my $newName = shift; # Zip name
$newName = _asZipDirName($fileName) unless $newName;
my $self = $class->new(@_);
$self->{'externalFileName'} = $fileName;
$self->fileName($newName);
if (-e $fileName) {
# -e does NOT do a full stat, so we need to do one now
if (-d _ ) {
my @stat = stat(_);
$self->unixFileAttributes($stat[2]);
my $mod_t = $stat[9];
if ($^O eq 'MSWin32' and !$mod_t) {
$mod_t = time();
}
$self->setLastModFileDateTimeFromUnix($mod_t);
} else { # hmm.. trying to add a non-directory?
_error($fileName, ' exists but is not a directory');
return undef;
}
} else {
$self->unixFileAttributes($self->DEFAULT_DIRECTORY_PERMISSIONS);
$self->setLastModFileDateTimeFromUnix(time());
}
return $self;
}
sub externalFileName {
shift->{'externalFileName'};
}
sub isDirectory {
return 1;
}
sub extractToFileNamed {
my $self = shift;
my $name = shift; # local FS name
my $attribs = $self->unixFileAttributes() & 07777;
mkpath($name, 0, $attribs); # croaks on error
utime($self->lastModTime(), $self->lastModTime(), $name);
return AZ_OK;
}
sub fileName {
my $self = shift;
my $newName = shift;
$newName =~ s{/?$}{/} if defined($newName);
return $self->SUPER::fileName($newName);
}
# So people don't get too confused. This way it looks like the problem
# is in their code...
sub contents {
return wantarray ? (undef, AZ_OK) : undef;
}
1;
ARCHIVE_ZIP_DIRECTORYMEMBER
$fatpacked{"Archive/Zip/FileMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_FILEMEMBER';
package Archive::Zip::FileMember;
use strict;
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw ( Archive::Zip::Member );
}
use Archive::Zip qw(
:UTILITY_METHODS
);
sub externalFileName {
shift->{'externalFileName'};
}
# Return true if I depend on the named file
sub _usesFileNamed {
my $self = shift;
my $fileName = shift;
my $xfn = $self->externalFileName();
return undef if ref($xfn);
return $xfn eq $fileName;
}
sub fh {
my $self = shift;
$self->_openFile()
if !defined($self->{'fh'}) || !$self->{'fh'}->opened();
return $self->{'fh'};
}
# opens my file handle from my file name
sub _openFile {
my $self = shift;
my ($status, $fh) = _newFileHandle($self->externalFileName(), 'r');
if (!$status) {
_ioError("Can't open", $self->externalFileName());
return undef;
}
$self->{'fh'} = $fh;
_binmode($fh);
return $fh;
}
# Make sure I close my file handle
sub endRead {
my $self = shift;
undef $self->{'fh'}; # _closeFile();
return $self->SUPER::endRead(@_);
}
sub _become {
my $self = shift;
my $newClass = shift;
return $self if ref($self) eq $newClass;
delete($self->{'externalFileName'});
delete($self->{'fh'});
return $self->SUPER::_become($newClass);
}
1;
ARCHIVE_ZIP_FILEMEMBER
$fatpacked{"Archive/Zip/Member.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_MEMBER';
package Archive::Zip::Member;
# A generic member of an archive
use strict;
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw( Archive::Zip );
if ($^O eq 'MSWin32') {
require Win32;
require Encode;
Encode->import(qw{ decode_utf8 });
}
}
use Archive::Zip qw(
:CONSTANTS
:MISC_CONSTANTS
:ERROR_CODES
:PKZIP_CONSTANTS
:UTILITY_METHODS
);
use Time::Local ();
use Compress::Raw::Zlib qw( Z_OK Z_STREAM_END MAX_WBITS );
use File::Path;
use File::Basename;
# Unix perms for default creation of files/dirs.
use constant DEFAULT_DIRECTORY_PERMISSIONS => 040755;
use constant DEFAULT_FILE_PERMISSIONS => 0100666;
use constant DIRECTORY_ATTRIB => 040000;
use constant FILE_ATTRIB => 0100000;
use constant OS_SUPPORTS_SYMLINK => do {
local $@;
!!eval { symlink("",""); 1 };
};
# Returns self if successful, else undef
# Assumes that fh is positioned at beginning of central directory file header.
# Leaves fh positioned immediately after file header or EOCD signature.
sub _newFromZipFile {
my $class = shift;
my $self = Archive::Zip::ZipFileMember->_newFromZipFile(@_);
return $self;
}
sub newFromString {
my $class = shift;
my ($stringOrStringRef, $fileName);
if (ref($_[0]) eq 'HASH') {
$stringOrStringRef = $_[0]->{string};
$fileName = $_[0]->{zipName};
} else {
($stringOrStringRef, $fileName) = @_;
}
my $self =
Archive::Zip::StringMember->_newFromString($stringOrStringRef, $fileName);
return $self;
}
sub newFromFile {
my $class = shift;
my ($fileName, $zipName);
if (ref($_[0]) eq 'HASH') {
$fileName = $_[0]->{fileName};
$zipName = $_[0]->{zipName};
} else {
($fileName, $zipName) = @_;
}
my $self =
Archive::Zip::NewFileMember->_newFromFileNamed($fileName, $zipName);
return $self;
}
sub newDirectoryNamed {
my $class = shift;
my ($directoryName, $newName);
if (ref($_[0]) eq 'HASH') {
$directoryName = $_[0]->{directoryName};
$newName = $_[0]->{zipName};
} else {
($directoryName, $newName) = @_;
}
my $self =
Archive::Zip::DirectoryMember->_newNamed($directoryName, $newName);
return $self;
}
sub new {
my $class = shift;
# Info-Zip 3.0 (I guess) seems to use the following values
# for the version fields in local and central directory
# headers, regardless of whether the member has an zip64
# extended information extra field or not:
#
# version made by:
# 30
#
# version needed to extract:
# 10 for directory and stored entries
# 20 for anything else
my $self = {
'lastModFileDateTime' => 0,
'fileAttributeFormat' => FA_UNIX,
'zip64' => 0,
'desiredZip64Mode' => ZIP64_AS_NEEDED,
'versionMadeBy' => 20,
'versionNeededToExtract' => 20,
'bitFlag' => ($Archive::Zip::UNICODE ? 0x0800 : 0),
'compressionMethod' => COMPRESSION_STORED,
'desiredCompressionMethod' => COMPRESSION_STORED,
'desiredCompressionLevel' => COMPRESSION_LEVEL_NONE,
'internalFileAttributes' => 0,
'externalFileAttributes' => 0, # set later
'fileName' => '',
'cdExtraField' => '',
'localExtraField' => '',
'fileComment' => '',
'crc32' => 0,
'compressedSize' => 0,
'uncompressedSize' => 0,
'password' => undef, # password for encrypted data
'crc32c' => -1, # crc for decrypted data
@_
};
bless($self, $class);
$self->unixFileAttributes($self->DEFAULT_FILE_PERMISSIONS);
return $self;
}
# Morph into given class (do whatever cleanup I need to do)
sub _become {
return bless($_[0], $_[1]);
}
sub fileAttributeFormat {
my $self = shift;
if (@_) {
$self->{fileAttributeFormat} =
(ref($_[0]) eq 'HASH') ? $_[0]->{format} : $_[0];
} else {
return $self->{fileAttributeFormat};
}
}
sub zip64 {
shift->{'zip64'};
}
sub desiredZip64Mode {
my $self = shift;
my $desiredZip64Mode = $self->{'desiredZip64Mode'};
if (@_) {
$self->{'desiredZip64Mode'} =
ref($_[0]) eq 'HASH' ? shift->{desiredZip64Mode} : shift;
}
return $desiredZip64Mode;
}
sub versionMadeBy {
shift->{'versionMadeBy'};
}
sub versionNeededToExtract {
shift->{'versionNeededToExtract'};
}
sub bitFlag {
my $self = shift;
# Set General Purpose Bit Flags according to the desiredCompressionLevel setting
if ( $self->desiredCompressionLevel == 1
|| $self->desiredCompressionLevel == 2) {
$self->{'bitFlag'} |= DEFLATING_COMPRESSION_FAST;
} elsif ($self->desiredCompressionLevel == 3
|| $self->desiredCompressionLevel == 4
|| $self->desiredCompressionLevel == 5
|| $self->desiredCompressionLevel == 6
|| $self->desiredCompressionLevel == 7) {
$self->{'bitFlag'} |= DEFLATING_COMPRESSION_NORMAL;
} elsif ($self->desiredCompressionLevel == 8
|| $self->desiredCompressionLevel == 9) {
$self->{'bitFlag'} |= DEFLATING_COMPRESSION_MAXIMUM;
}
if ($Archive::Zip::UNICODE) {
$self->{'bitFlag'} |= 0x0800;
}
$self->{'bitFlag'};
}
sub password {
my $self = shift;
$self->{'password'} = shift if @_;
$self->{'password'};
}
sub compressionMethod {
shift->{'compressionMethod'};
}
sub desiredCompressionMethod {
my $self = shift;
my $newDesiredCompressionMethod =
(ref($_[0]) eq 'HASH') ? shift->{compressionMethod} : shift;
my $oldDesiredCompressionMethod = $self->{'desiredCompressionMethod'};
if (defined($newDesiredCompressionMethod)) {
$self->{'desiredCompressionMethod'} = $newDesiredCompressionMethod;
if ($newDesiredCompressionMethod == COMPRESSION_STORED) {
$self->{'desiredCompressionLevel'} = 0;
$self->{'bitFlag'} &= ~GPBF_HAS_DATA_DESCRIPTOR_MASK
if $self->uncompressedSize() == 0;
} elsif ($oldDesiredCompressionMethod == COMPRESSION_STORED) {
$self->{'desiredCompressionLevel'} = COMPRESSION_LEVEL_DEFAULT;
}
}
return $oldDesiredCompressionMethod;
}
sub desiredCompressionLevel {
my $self = shift;
my $newDesiredCompressionLevel =
(ref($_[0]) eq 'HASH') ? shift->{compressionLevel} : shift;
my $oldDesiredCompressionLevel = $self->{'desiredCompressionLevel'};
if (defined($newDesiredCompressionLevel)) {
$self->{'desiredCompressionLevel'} = $newDesiredCompressionLevel;
$self->{'desiredCompressionMethod'} = (
$newDesiredCompressionLevel
? COMPRESSION_DEFLATED
: COMPRESSION_STORED
);
}
return $oldDesiredCompressionLevel;
}
sub fileName {
my $self = shift;
my $newName = shift;
if (defined $newName) {
$newName =~ y{\\/}{/}s; # deal with dos/windoze problems
$self->{'fileName'} = $newName;
}
return $self->{'fileName'};
}
sub fileNameAsBytes {
my $self = shift;
my $bytes = $self->{'fileName'};
if($self->{'bitFlag'} & 0x800){
$bytes = Encode::encode_utf8($bytes);
}
return $bytes;
}
sub lastModFileDateTime {
my $modTime = shift->{'lastModFileDateTime'};
$modTime =~ m/^(\d+)$/; # untaint
return $1;
}
sub lastModTime {
my $self = shift;
return _dosToUnixTime($self->lastModFileDateTime());
}
sub setLastModFileDateTimeFromUnix {
my $self = shift;
my $time_t = shift;
$self->{'lastModFileDateTime'} = _unixToDosTime($time_t);
}
sub internalFileAttributes {
shift->{'internalFileAttributes'};
}
sub externalFileAttributes {
shift->{'externalFileAttributes'};
}
# Convert UNIX permissions into proper value for zip file
# Usable as a function or a method
sub _mapPermissionsFromUnix {
my $self = shift;
my $mode = shift;
my $attribs = $mode << 16;
# Microsoft Windows Explorer needs this bit set for directories
if ($mode & DIRECTORY_ATTRIB) {
$attribs |= 16;
}
return $attribs;
# TODO: map more MS-DOS perms
}
# Convert ZIP permissions into Unix ones
#
# This was taken from Info-ZIP group's portable UnZip
# zipfile-extraction program, version 5.50.
# http://www.info-zip.org/pub/infozip/
#
# See the mapattr() function in unix/unix.c
# See the attribute format constants in unzpriv.h
#
# XXX Note that there's one situation that is not implemented
# yet that depends on the "extra field."
sub _mapPermissionsToUnix {
my $self = shift;
my $format = $self->{'fileAttributeFormat'};
my $attribs = $self->{'externalFileAttributes'};
my $mode = 0;
if ($format == FA_AMIGA) {
$attribs = $attribs >> 17 & 7; # Amiga RWE bits
$mode = $attribs << 6 | $attribs << 3 | $attribs;
return $mode;
}
if ($format == FA_THEOS) {
$attribs &= 0xF1FFFFFF;
if (($attribs & 0xF0000000) != 0x40000000) {
$attribs &= 0x01FFFFFF; # not a dir, mask all ftype bits
} else {
$attribs &= 0x41FFFFFF; # leave directory bit as set
}
}
if ( $format == FA_UNIX
|| $format == FA_VAX_VMS
|| $format == FA_ACORN
|| $format == FA_ATARI_ST
|| $format == FA_BEOS
|| $format == FA_QDOS
|| $format == FA_TANDEM) {
$mode = $attribs >> 16;
return $mode if $mode != 0 or not $self->localExtraField;
# warn("local extra field is: ", $self->localExtraField, "\n");
# XXX This condition is not implemented
# I'm just including the comments from the info-zip section for now.
# Some (non-Info-ZIP) implementations of Zip for Unix and
# VMS (and probably others ??) leave 0 in the upper 16-bit
# part of the external_file_attributes field. Instead, they
# store file permission attributes in some extra field.
# As a work-around, we search for the presence of one of
# these extra fields and fall back to the MSDOS compatible
# part of external_file_attributes if one of the known
# e.f. types has been detected.
# Later, we might implement extraction of the permission
# bits from the VMS extra field. But for now, the work-around
# should be sufficient to provide "readable" extracted files.
# (For ASI Unix e.f., an experimental remap from the e.f.
# mode value IS already provided!)
}
# PKWARE's PKZip for Unix marks entries as FA_MSDOS, but stores the
# Unix attributes in the upper 16 bits of the external attributes
# field, just like Info-ZIP's Zip for Unix. We try to use that
# value, after a check for consistency with the MSDOS attribute
# bits (see below).
if ($format == FA_MSDOS) {
$mode = $attribs >> 16;
}
# FA_MSDOS, FA_OS2_HPFS, FA_WINDOWS_NTFS, FA_MACINTOSH, FA_TOPS20
$attribs = !($attribs & 1) << 1 | ($attribs & 0x10) >> 4;
# keep previous $mode setting when its "owner"
# part appears to be consistent with DOS attribute flags!
return $mode if ($mode & 0700) == (0400 | $attribs << 6);
$mode = 0444 | $attribs << 6 | $attribs << 3 | $attribs;
return $mode;
}
sub unixFileAttributes {
my $self = shift;
my $oldPerms = $self->_mapPermissionsToUnix;
my $perms;
if (@_) {
$perms = (ref($_[0]) eq 'HASH') ? $_[0]->{attributes} : $_[0];
if ($self->isDirectory) {
$perms &= ~FILE_ATTRIB;
$perms |= DIRECTORY_ATTRIB;
} else {
$perms &= ~DIRECTORY_ATTRIB;
$perms |= FILE_ATTRIB;
}
$self->{externalFileAttributes} =
$self->_mapPermissionsFromUnix($perms);
}
return $oldPerms;
}
sub localExtraField {
my $self = shift;
if (@_) {
my $localExtraField =
(ref($_[0]) eq 'HASH') ? $_[0]->{field} : $_[0];
my ($status, $zip64) =
$self->_extractZip64ExtraField($localExtraField, undef, undef);
if ($status != AZ_OK) {
return $status;
}
elsif ($zip64) {
return _formatError('invalid extra field (contains zip64 information)');
}
else {
$self->{localExtraField} = $localExtraField;
return AZ_OK;
}
} else {
return $self->{localExtraField};
}
}
sub cdExtraField {
my $self = shift;
if (@_) {
my $cdExtraField =
(ref($_[0]) eq 'HASH') ? $_[0]->{field} : $_[0];
my ($status, $zip64) =
$self->_extractZip64ExtraField($cdExtraField, undef, undef);
if ($status != AZ_OK) {
return $status;
}
elsif ($zip64) {
return _formatError('invalid extra field (contains zip64 information)');
}
else {
$self->{cdExtraField} = $cdExtraField;
return AZ_OK;
}
} else {
return $self->{cdExtraField};
}
}
sub extraFields {
my $self = shift;
return $self->localExtraField() . $self->cdExtraField();
}
sub fileComment {
my $self = shift;
if (@_) {
$self->{fileComment} =
(ref($_[0]) eq 'HASH')
? pack('C0a*', $_[0]->{comment})
: pack('C0a*', $_[0]);
} else {
return $self->{fileComment};
}
}
sub hasDataDescriptor {
my $self = shift;
if (@_) {
my $shouldHave = shift;
if ($shouldHave) {
$self->{'bitFlag'} |= GPBF_HAS_DATA_DESCRIPTOR_MASK;
} else {
$self->{'bitFlag'} &= ~GPBF_HAS_DATA_DESCRIPTOR_MASK;
}
}
return $self->{'bitFlag'} & GPBF_HAS_DATA_DESCRIPTOR_MASK;
}
sub crc32 {
shift->{'crc32'};
}
sub crc32String {
sprintf("%08x", shift->{'crc32'});
}
sub compressedSize {
shift->{'compressedSize'};
}
sub uncompressedSize {
shift->{'uncompressedSize'};
}
sub isEncrypted {
shift->{'bitFlag'} & GPBF_ENCRYPTED_MASK;
}
sub isTextFile {
my $self = shift;
my $bit = $self->internalFileAttributes() & IFA_TEXT_FILE_MASK;
if (@_) {
my $flag = (ref($_[0]) eq 'HASH') ? shift->{flag} : shift;
$self->{'internalFileAttributes'} &= ~IFA_TEXT_FILE_MASK;
$self->{'internalFileAttributes'} |=
($flag ? IFA_TEXT_FILE : IFA_BINARY_FILE);
}
return $bit == IFA_TEXT_FILE;
}
sub isBinaryFile {
my $self = shift;
my $bit = $self->internalFileAttributes() & IFA_TEXT_FILE_MASK;
if (@_) {
my $flag = shift;
$self->{'internalFileAttributes'} &= ~IFA_TEXT_FILE_MASK;
$self->{'internalFileAttributes'} |=
($flag ? IFA_BINARY_FILE : IFA_TEXT_FILE);
}
return $bit == IFA_BINARY_FILE;
}
sub extractToFileNamed {
my $self = shift;
# local FS name
my $name = (ref($_[0]) eq 'HASH') ? $_[0]->{name} : $_[0];
# Create directory for regular files as well as for symbolic
# links
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
$name = decode_utf8(Win32::GetFullPathName($name));
mkpath_win32($name);
} else {
mkpath(dirname($name)); # croaks on error
}
# Check if the file / directory is a symbolic link *and* if
# the operating system supports these. Only in that case
# call method extractToFileHandle with the name of the
# symbolic link. If the operating system does not support
# symbolic links, process the member using the usual
# extraction routines, which creates a file containing the
# link target.
if ($self->isSymbolicLink() && OS_SUPPORTS_SYMLINK) {
return $self->extractToFileHandle($name);
} else {
my ($status, $fh);
if ($^O eq 'MSWin32' && $Archive::Zip::UNICODE) {
Win32::CreateFile($name);
($status, $fh) = _newFileHandle(Win32::GetANSIPathName($name), 'w');
} else {
($status, $fh) = _newFileHandle($name, 'w');
}
return _ioError("Can't open file $name for write") unless $status;
$status = $self->extractToFileHandle($fh);
$fh->close();
chmod($self->unixFileAttributes(), $name)
or return _error("Can't chmod() ${name}: $!");
utime($self->lastModTime(), $self->lastModTime(), $name);
return $status;
}
}
sub mkpath_win32 {
my $path = shift;
use File::Spec;
my ($volume, @path) = File::Spec->splitdir($path);
$path = File::Spec->catfile($volume, shift @path);
pop @path;
while (@path) {
$path = File::Spec->catfile($path, shift @path);
Win32::CreateDirectory($path);
}
}
sub isSymbolicLink {
return shift->{'externalFileAttributes'} == 0xA1FF0000;
}
sub isDirectory {
return 0;
}
sub externalFileName {
return undef;
}
# Search the given extra field string for a zip64 extended
# information extra field and "correct" the header fields given
# in the remaining parameters with the information from that
# extra field, if required. Writes back the extra field string
# sans the zip64 information. The extra field string and all
# header fields must be passed as lvalues or the undefined value.
#
# This method returns a pair ($status, $zip64) in list context,
# where the latter flag specifies whether a zip64 extended
# information extra field was found.
#
# This method must be called with two header fields for local
# file headers and with four header fields for Central Directory
# headers.
sub _extractZip64ExtraField
{
my $classOrSelf = shift;
my $extraField = $_[0];
my ($zip64Data, $newExtraField) = (undef, '');
while (length($extraField) >= 4) {
my ($headerId, $dataSize) = unpack('v v', $extraField);
if (length($extraField) < 4 + $dataSize) {
return _formatError('invalid extra field (bad data)');
}
elsif ($headerId != 0x0001) {
$newExtraField .= substr($extraField, 0, 4 + $dataSize);
$extraField = substr($extraField, 4 + $dataSize);
}
else {
$zip64Data = substr($extraField, 4, $dataSize);
$extraField = substr($extraField, 4 + $dataSize);
}
}
if (length($extraField) != 0) {
return _formatError('invalid extra field (bad header ID or data size)');
}
my $zip64 = 0;
if (defined($zip64Data)) {
return _zip64NotSupported() unless ZIP64_SUPPORTED;
my $dataLength = length($zip64Data);
# Try to be tolerant with respect to the fields to be
# extracted from the zip64 extended information extra
# field and derive that information from the data itself,
# if possible. This works around, for example, incorrect
# extra fields written by certain versions of package
# IO::Compress::Zip. That package provides the disk
# number start in the extra field without setting the
# corresponding regular field to 0xffff. Plus it
# provides the full set of fields even for the local file
# header.
#
# Field zero is the extra field string which we must keep
# in @_ for future modification, so account for that.
my @fields;
if (@_ == 3 && $dataLength == 16) {
@fields = (undef, 0xffffffff, 0xffffffff);
}
elsif (@_ == 3 && $dataLength == 24) {
push(@_, undef);
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff);
}
elsif (@_ == 3 && $dataLength == 28) {
push(@_, undef, undef);
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff, 0xffff);
}
elsif (@_ == 5 && $dataLength == 24) {
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff);
}
elsif (@_ == 5 && $dataLength == 28) {
@fields = (undef, 0xffffffff, 0xffffffff, 0xffffffff, 0xffff);
}
else {
@fields = map { defined $_ ? $_ : 0 } @_;
}
my @fieldIndexes = (0);
my $fieldFormat = '';
my $expDataLength = 0;
if ($fields[1] == 0xffffffff) {
push(@fieldIndexes, 1);
$fieldFormat .= 'Q< ';
$expDataLength += 8;
}
if ($fields[2] == 0xffffffff) {
push(@fieldIndexes, 2);
$fieldFormat .= 'Q< ';
$expDataLength += 8;
}
if (@fields > 3 && $fields[3] == 0xffffffff) {
push(@fieldIndexes, 3);
$fieldFormat .= 'Q< ';
$expDataLength += 8;
}
if (@fields > 3 && $fields[4] == 0xffff) {
push(@fieldIndexes, 4);
$fieldFormat .= 'L< ';
$expDataLength += 4;
}
if ($dataLength == $expDataLength) {
@_[@fieldIndexes] = ($newExtraField, unpack($fieldFormat, $zip64Data));
$zip64 = 1;
}
else {
return _formatError('invalid zip64 extended information extra field');
}
}
return (AZ_OK, $zip64);
}
# The following are used when copying data
sub _writeOffset {
shift->{'writeOffset'};
}
sub _readOffset {
shift->{'readOffset'};
}
sub writeLocalHeaderRelativeOffset {
shift->{'writeLocalHeaderRelativeOffset'};
}
# Maintained in method Archive::Zip::Archive::writeToFileHandle
sub wasWritten {
shift->{'wasWritten'}
}
sub _dataEnded {
shift->{'dataEnded'};
}
sub _readDataRemaining {
shift->{'readDataRemaining'};
}
sub _inflater {
shift->{'inflater'};
}
sub _deflater {
shift->{'deflater'};
}
# DOS date/time format
# 0-4 (5) Second divided by 2
# 5-10 (6) Minute (0-59)
# 11-15 (5) Hour (0-23 on a 24-hour clock)
# 16-20 (5) Day of the month (1-31)
# 21-24 (4) Month (1 = January, 2 = February, etc.)
# 25-31 (7) Year offset from 1980 (add 1980 to get actual year)
# Convert DOS date/time format to unix time_t format
# NOT AN OBJECT METHOD!
sub _dosToUnixTime {
my $dt = shift;
return time() unless defined($dt);
my $year = (($dt >> 25) & 0x7f) + 1980;
my $mon = (($dt >> 21) & 0x0f) - 1;
my $mday = (($dt >> 16) & 0x1f);
my $hour = (($dt >> 11) & 0x1f);
my $min = (($dt >> 5) & 0x3f);
my $sec = (($dt << 1) & 0x3e);
# catch errors
my $time_t =
eval { Time::Local::timelocal($sec, $min, $hour, $mday, $mon, $year); };
return time() if ($@);
return $time_t;
}
# Note, this is not exactly UTC 1980, it's 1980 + 12 hours and 1
# minute so that nothing timezoney can muck us up.
my $safe_epoch = 31.686060;
# convert a unix time to DOS date/time
# NOT AN OBJECT METHOD!
sub _unixToDosTime {
my $time_t = shift;
unless ($time_t) {
_error("Tried to add member with zero or undef value for time");
$time_t = $safe_epoch;
}
if ($time_t < $safe_epoch) {
_ioError("Unsupported date before 1980 encountered, moving to 1980");
$time_t = $safe_epoch;
}
my ($sec, $min, $hour, $mday, $mon, $year) = localtime($time_t);
my $dt = 0;
$dt += ($sec >> 1);
$dt += ($min << 5);
$dt += ($hour << 11);
$dt += ($mday << 16);
$dt += (($mon + 1) << 21);
$dt += (($year - 80) << 25);
return $dt;
}
# Write my local header to a file handle.
# Returns a pair (AZ_OK, $headerSize) on success.
sub _writeLocalFileHeader {
my $self = shift;
my $fh = shift;
my $refresh = @_ ? shift : 0;
my $zip64 = $self->zip64();
my $hasDataDescriptor = $self->hasDataDescriptor();
my $versionNeededToExtract = $self->versionNeededToExtract();
my $crc32;
my $compressedSize;
my $uncompressedSize;
my $localExtraField = $self->localExtraField();
if (! $zip64) {
if ($refresh) {
$crc32 = $self->crc32();
$compressedSize = $self->_writeOffset();
$uncompressedSize = $self->uncompressedSize();
# Handle a brain-dead corner case gracefully.
# Otherwise we a) would always need to write zip64
# format or b) re-write the complete member data on
# refresh (which might not always be possible).
if ($compressedSize > 0xffffffff) {
return _formatError('compressed size too large for refresh');
}
}
elsif ($hasDataDescriptor) {
$crc32 = 0;
$compressedSize = 0;
$uncompressedSize = 0;
}
else {
$crc32 = $self->crc32();
$compressedSize = $self->_writeOffset();
$uncompressedSize = $self->uncompressedSize();
}
}
else {
return _zip64NotSupported() unless ZIP64_SUPPORTED;
$versionNeededToExtract = 45 if ($versionNeededToExtract < 45);
my $zip64CompressedSize;
my $zip64UncompressedSize;
if ($refresh) {
$crc32 = $self->crc32();
$compressedSize = 0xffffffff;
$uncompressedSize = 0xffffffff;
$zip64CompressedSize = $self->_writeOffset();
$zip64UncompressedSize = $self->uncompressedSize();
}
elsif ($hasDataDescriptor) {
$crc32 = 0;
$compressedSize = 0xffffffff;
$uncompressedSize = 0xffffffff;
$zip64CompressedSize = 0;
$zip64UncompressedSize = 0;
}
else {
$crc32 = $self->crc32();
$compressedSize = 0xffffffff;
$uncompressedSize = 0xffffffff;
$zip64CompressedSize = $self->_writeOffset();
$zip64UncompressedSize = $self->uncompressedSize();
}
$localExtraField .= pack('S< S< Q< Q<',
0x0001, 16,
$zip64UncompressedSize,
$zip64CompressedSize);
}
my $fileNameLength = length($self->fileNameAsBytes());
my $localFieldLength = length($localExtraField);
my $signatureData = pack(SIGNATURE_FORMAT, LOCAL_FILE_HEADER_SIGNATURE);
$self->_print($fh, $signatureData)
or return _ioError("writing local header signature");
my $header =
pack(LOCAL_FILE_HEADER_FORMAT,
$versionNeededToExtract,
$self->{'bitFlag'},
$self->desiredCompressionMethod(),
$self->lastModFileDateTime(),
$crc32,
$compressedSize,
$uncompressedSize,
$fileNameLength,
$localFieldLength);
$self->_print($fh, $header)
or return _ioError("writing local header");
# Write these only if required
if (! $refresh || $zip64) {
if ($fileNameLength) {
$self->_print($fh, $self->fileNameAsBytes())
or return _ioError("writing local header filename");
}
if ($localFieldLength) {
$self->_print($fh, $localExtraField)
or return _ioError("writing local extra field");
}
}
return
(AZ_OK,
LOCAL_FILE_HEADER_LENGTH +
SIGNATURE_LENGTH +
$fileNameLength +
$localFieldLength);
}
# Re-writes the local file header with new crc32 and compressedSize fields.
# To be called after writing the data stream.
# Assumes that filename and extraField sizes didn't change since last written.
sub _refreshLocalFileHeader {
my $self = shift;
my $fh = shift;
my $here = $fh->tell();
$fh->seek($self->writeLocalHeaderRelativeOffset(), IO::Seekable::SEEK_SET)
or return _ioError("seeking to rewrite local header");
my ($status, undef) = $self->_writeLocalFileHeader($fh, 1);
return $status if $status != AZ_OK;
$fh->seek($here, IO::Seekable::SEEK_SET)
or return _ioError("seeking after rewrite of local header");
return AZ_OK;
}
# Write central directory file header.
# Returns a pair (AZ_OK, $headerSize) on success.
sub _writeCentralDirectoryFileHeader {
my $self = shift;
my $fh = shift;
my $adz64m = shift; # $archiveDesiredZip64Mode
# (Re-)Determine whether to write zip64 format. Assume
# {'diskNumberStart'} is always zero.
my $zip64 = $adz64m == ZIP64_HEADERS
|| $self->desiredZip64Mode() == ZIP64_HEADERS
|| $self->_writeOffset() > 0xffffffff
|| $self->uncompressedSize() > 0xffffffff
|| $self->writeLocalHeaderRelativeOffset() > 0xffffffff;
$self->{'zip64'} ||= $zip64;
my $versionMadeBy = $self->versionMadeBy();
my $versionNeededToExtract = $self->versionNeededToExtract();
my $compressedSize = $self->_writeOffset();
my $uncompressedSize = $self->uncompressedSize();
my $localHeaderRelativeOffset = $self->writeLocalHeaderRelativeOffset();
my $cdExtraField = $self->cdExtraField();
if (!$zip64) {
# no-op
}
else {
return _zip64NotSupported() unless ZIP64_SUPPORTED;
$versionNeededToExtract = 45 if ($versionNeededToExtract < 45);
my $extraFieldFormat = '';
my @extraFieldValues = ();
my $extraFieldSize = 0;
if ($uncompressedSize > 0xffffffff) {
$extraFieldFormat .= 'Q< ';
push(@extraFieldValues, $uncompressedSize);
$extraFieldSize += 8;
$uncompressedSize = 0xffffffff;
}
if ($compressedSize > 0xffffffff) {
$extraFieldFormat .= 'Q< ';
push(@extraFieldValues, $compressedSize);
$extraFieldSize += 8;
$compressedSize = 0xffffffff;
}
# Avoid empty zip64 extended information extra fields
if ( $localHeaderRelativeOffset > 0xffffffff
|| @extraFieldValues == 0) {
$extraFieldFormat .= 'Q< ';
push(@extraFieldValues, $localHeaderRelativeOffset);
$extraFieldSize += 8;
$localHeaderRelativeOffset = 0xffffffff;
}
$cdExtraField .=
pack("S< S< $extraFieldFormat",
0x0001, $extraFieldSize,
@extraFieldValues);
}
my $fileNameLength = length($self->fileNameAsBytes());
my $extraFieldLength = length($cdExtraField);
my $fileCommentLength = length($self->fileComment());
my $sigData =
pack(SIGNATURE_FORMAT, CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE);
$self->_print($fh, $sigData)
or return _ioError("writing central directory header signature");
my $header = pack(
CENTRAL_DIRECTORY_FILE_HEADER_FORMAT,
$versionMadeBy,
$self->fileAttributeFormat(),
$versionNeededToExtract,
$self->bitFlag(),
$self->desiredCompressionMethod(),
$self->lastModFileDateTime(),
$self->crc32(), # these three fields should have been updated
$compressedSize, # by writing the data stream out
$uncompressedSize, #
$fileNameLength,
$extraFieldLength,
$fileCommentLength,
0, # {'diskNumberStart'},
$self->internalFileAttributes(),
$self->externalFileAttributes(),
$localHeaderRelativeOffset);
$self->_print($fh, $header)
or return _ioError("writing central directory header");
if ($fileNameLength) {
$self->_print($fh, $self->fileNameAsBytes())
or return _ioError("writing central directory header signature");
}
if ($extraFieldLength) {
$self->_print($fh, $cdExtraField)
or return _ioError("writing central directory extra field");
}
if ($fileCommentLength) {
$self->_print($fh, $self->fileComment())
or return _ioError("writing central directory file comment");
}
# Update object members with information which might have
# changed while writing this member. We already did the
# zip64 flag. We must not update the extra fields with any
# zip64 information, since we consider that internal.
$self->{'versionNeededToExtract'} = $versionNeededToExtract;
$self->{'compressedSize'} = $self->_writeOffset();
return
(AZ_OK,
CENTRAL_DIRECTORY_FILE_HEADER_LENGTH +
SIGNATURE_LENGTH +
$fileNameLength +
$extraFieldLength +
$fileCommentLength)
}
# This writes a data descriptor to the given file handle.
# Assumes that crc32, writeOffset, and uncompressedSize are
# set correctly (they should be after a write).
# Returns a pair (AZ_OK, $dataDescriptorSize) on success.
# Further, the local file header should have the
# GPBF_HAS_DATA_DESCRIPTOR_MASK bit set.
sub _writeDataDescriptor {
my $self = shift;
my $fh = shift;
my $descriptor;
if (! $self->zip64()) {
$descriptor =
pack(SIGNATURE_FORMAT . DATA_DESCRIPTOR_FORMAT,
DATA_DESCRIPTOR_SIGNATURE,
$self->crc32(),
$self->_writeOffset(), # compressed size
$self->uncompressedSize());
}
else {
return _zip64NotSupported() unless ZIP64_SUPPORTED;
$descriptor =
pack(SIGNATURE_FORMAT . DATA_DESCRIPTOR_ZIP64_FORMAT,
DATA_DESCRIPTOR_SIGNATURE,
$self->crc32(),
$self->_writeOffset(), # compressed size
$self->uncompressedSize());
}
$self->_print($fh, $descriptor)
or return _ioError("writing data descriptor");
return (AZ_OK, length($descriptor));
}
sub readChunk {
my $self = shift;
my $chunkSize = (ref($_[0]) eq 'HASH') ? $_[0]->{chunkSize} : $_[0];
if ($self->readIsDone()) {
$self->endRead();
my $dummy = '';
return (\$dummy, AZ_STREAM_END);
}
$chunkSize = $Archive::Zip::ChunkSize if not defined($chunkSize);
$chunkSize = $self->_readDataRemaining()
if $chunkSize > $self->_readDataRemaining();
my $buffer = '';
my $outputRef;
my ($bytesRead, $status) = $self->_readRawChunk(\$buffer, $chunkSize);
return (\$buffer, $status) unless $status == AZ_OK;
$buffer && $self->isEncrypted and $buffer = $self->_decode($buffer);
$self->{'readDataRemaining'} -= $bytesRead;
$self->{'readOffset'} += $bytesRead;
if ($self->compressionMethod() == COMPRESSION_STORED) {
$self->{'crc32'} = $self->computeCRC32($buffer, $self->{'crc32'});
}
($outputRef, $status) = &{$self->{'chunkHandler'}}($self, \$buffer);
$self->{'writeOffset'} += length($$outputRef);
$self->endRead()
if $self->readIsDone();
return ($outputRef, $status);
}
# Read the next raw chunk of my data. Subclasses MUST implement.
# my ( $bytesRead, $status) = $self->_readRawChunk( \$buffer, $chunkSize );
sub _readRawChunk {
my $self = shift;
return $self->_subclassResponsibility();
}
# A place holder to catch rewindData errors if someone ignores
# the error code.
sub _noChunk {
my $self = shift;
return (\undef, _error("trying to copy chunk when init failed"));
}
# Basically a no-op so that I can have a consistent interface.
# ( $outputRef, $status) = $self->_copyChunk( \$buffer );
sub _copyChunk {
my ($self, $dataRef) = @_;
return ($dataRef, AZ_OK);
}
# ( $outputRef, $status) = $self->_deflateChunk( \$buffer );
sub _deflateChunk {
my ($self, $buffer) = @_;
my ($status) = $self->_deflater()->deflate($buffer, my $out);
if ($self->_readDataRemaining() == 0) {
my $extraOutput;
($status) = $self->_deflater()->flush($extraOutput);
$out .= $extraOutput;
$self->endRead();
return (\$out, AZ_STREAM_END);
} elsif ($status == Z_OK) {
return (\$out, AZ_OK);
} else {
$self->endRead();
my $retval = _error('deflate error', $status);
my $dummy = '';
return (\$dummy, $retval);
}
}
# ( $outputRef, $status) = $self->_inflateChunk( \$buffer );
sub _inflateChunk {
my ($self, $buffer) = @_;
my ($status) = $self->_inflater()->inflate($buffer, my $out);
my $retval;
$self->endRead() unless $status == Z_OK;
if ($status == Z_OK || $status == Z_STREAM_END) {
$retval = ($status == Z_STREAM_END) ? AZ_STREAM_END : AZ_OK;
return (\$out, $retval);
} else {
$retval = _error('inflate error', $status);
my $dummy = '';
return (\$dummy, $retval);
}
}
sub rewindData {
my $self = shift;
my $status;
# set to trap init errors
$self->{'chunkHandler'} = $self->can('_noChunk');
# Work around WinZip bug with 0-length DEFLATED files
$self->desiredCompressionMethod(COMPRESSION_STORED)
if $self->uncompressedSize() == 0;
# assume that we're going to read the whole file, and compute the CRC anew.
$self->{'crc32'} = 0
if ($self->compressionMethod() == COMPRESSION_STORED);
# These are the only combinations of methods we deal with right now.
if ( $self->compressionMethod() == COMPRESSION_STORED
and $self->desiredCompressionMethod() == COMPRESSION_DEFLATED) {
($self->{'deflater'}, $status) = Compress::Raw::Zlib::Deflate->new(
'-Level' => $self->desiredCompressionLevel(),
'-WindowBits' => -MAX_WBITS(), # necessary magic
'-Bufsize' => $Archive::Zip::ChunkSize,
@_
); # pass additional options
return _error('deflateInit error:', $status)
unless $status == Z_OK;
$self->{'chunkHandler'} = $self->can('_deflateChunk');
} elsif ($self->compressionMethod() == COMPRESSION_DEFLATED
and $self->desiredCompressionMethod() == COMPRESSION_STORED) {
($self->{'inflater'}, $status) = Compress::Raw::Zlib::Inflate->new(
'-WindowBits' => -MAX_WBITS(), # necessary magic
'-Bufsize' => $Archive::Zip::ChunkSize,
@_
); # pass additional options
return _error('inflateInit error:', $status)
unless $status == Z_OK;
$self->{'chunkHandler'} = $self->can('_inflateChunk');
} elsif ($self->compressionMethod() == $self->desiredCompressionMethod()) {
$self->{'chunkHandler'} = $self->can('_copyChunk');
} else {
return _error(
sprintf(
"Unsupported compression combination: read %d, write %d",
$self->compressionMethod(),
$self->desiredCompressionMethod()));
}
$self->{'readDataRemaining'} =
($self->compressionMethod() == COMPRESSION_STORED)
? $self->uncompressedSize()
: $self->compressedSize();
$self->{'dataEnded'} = 0;
$self->{'readOffset'} = 0;
return AZ_OK;
}
sub endRead {
my $self = shift;
delete $self->{'inflater'};
delete $self->{'deflater'};
$self->{'dataEnded'} = 1;
$self->{'readDataRemaining'} = 0;
return AZ_OK;
}
sub readIsDone {
my $self = shift;
return ($self->_dataEnded() or !$self->_readDataRemaining());
}
sub contents {
my $self = shift;
my $newContents = shift;
if (defined($newContents)) {
# Change our type and ensure that succeeded to avoid
# endless recursion
$self->_become('Archive::Zip::StringMember');
$self->_ISA('Archive::Zip::StringMember') or
return
wantarray
? (undef, $self->_error('becoming Archive::Zip::StringMember'))
: undef;
# Now call the subclass contents method
my $retval =
$self->contents(pack('C0a*', $newContents)); # in case of Unicode
return wantarray ? ($retval, AZ_OK) : $retval;
} else {
my $oldCompression =
$self->desiredCompressionMethod(COMPRESSION_STORED);
my $status = $self->rewindData(@_);
if ($status != AZ_OK) {
$self->endRead();
return wantarray ? (undef, $status) : undef;
}
my $retval = '';
while ($status == AZ_OK) {
my $ref;
($ref, $status) = $self->readChunk($self->_readDataRemaining());
# did we get it in one chunk?
if (length($$ref) == $self->uncompressedSize()) {
$retval = $$ref;
} else {
$retval .= $$ref
}
}
$self->desiredCompressionMethod($oldCompression);
$self->endRead();
$status = AZ_OK if $status == AZ_STREAM_END;
$retval = undef unless $status == AZ_OK;
return wantarray ? ($retval, $status) : $retval;
}
}
sub extractToFileHandle {
my $self = shift;
# This can be the link name when "extracting" symbolic links
my $fhOrName = (ref($_[0]) eq 'HASH') ? shift->{fileHandle} : shift;
_binmode($fhOrName) if ref($fhOrName);
my $oldCompression = $self->desiredCompressionMethod(COMPRESSION_STORED);
my $status = $self->rewindData(@_);
$status = $self->_writeData($fhOrName) if $status == AZ_OK;
$self->desiredCompressionMethod($oldCompression);
$self->endRead();
return $status;
}
# write local header and data stream to file handle.
# Returns a pair ($status, $memberSize) if successful.
# Stores the offset to the start of the header in my
# writeLocalHeaderRelativeOffset member.
sub _writeToFileHandle {
my $self = shift;
my $fh = shift;
my $fhIsSeekable = shift;
my $offset = shift;
my $adz64m = shift; # $archiveDesiredZip64Mode
return _error("no member name given for $self")
if $self->fileName() eq '';
$self->{'writeLocalHeaderRelativeOffset'} = $offset;
# Determine if I need to refresh the header in a second pass
# later. If in doubt, I'd rather refresh, since it does not
# seem to be worth the hassle to save the extra seeks and
# writes. In addition, having below condition independent of
# any specific compression methods helps me piping through
# members with unknown compression methods unchanged. See
# test t/26_bzip2.t for details.
my $headerFieldsUnknown = $self->uncompressedSize() > 0;
# Determine if I need to write a data descriptor
# I need to do this if I can't refresh the header
# and I don't know compressed size or crc32 fields.
my $shouldWriteDataDescriptor =
($headerFieldsUnknown and not $fhIsSeekable);
$self->hasDataDescriptor(1)
if ($shouldWriteDataDescriptor);
# Determine whether to write zip64 format
my $zip64 = $adz64m == ZIP64_HEADERS
|| $self->desiredZip64Mode() == ZIP64_HEADERS
|| $self->uncompressedSize() > 0xffffffff;
$self->{'zip64'} ||= $zip64;
$self->{'writeOffset'} = 0;
my $status = $self->rewindData();
return $status if $status != AZ_OK;
my $memberSize;
($status, $memberSize) = $self->_writeLocalFileHeader($fh);
return $status if $status != AZ_OK;
$status = $self->_writeData($fh);
return $status if $status != AZ_OK;
$memberSize += $self->_writeOffset();
if ($self->hasDataDescriptor()) {
my $ddSize;
($status, $ddSize) = $self->_writeDataDescriptor($fh);
$memberSize += $ddSize;
} elsif ($headerFieldsUnknown) {
$status = $self->_refreshLocalFileHeader($fh);
}
return $status if $status != AZ_OK;
return ($status, $memberSize);
}
# Copy my (possibly compressed) data to given file handle.
# Returns C<AZ_OK> on success
sub _writeData {
my $self = shift;
my $fhOrName = shift;
if ($self->isSymbolicLink() && OS_SUPPORTS_SYMLINK) {
my $chunkSize = $Archive::Zip::ChunkSize;
my ($outRef, $status) = $self->readChunk($chunkSize);
symlink($$outRef, $fhOrName)
or return _ioError("creating symbolic link");
} else {
return AZ_OK if ($self->uncompressedSize() == 0);
my $status;
my $chunkSize = $Archive::Zip::ChunkSize;
while ($self->_readDataRemaining() > 0) {
my $outRef;
($outRef, $status) = $self->readChunk($chunkSize);
return $status if ($status != AZ_OK and $status != AZ_STREAM_END);
if (length($$outRef) > 0) {
$self->_print($fhOrName, $$outRef)
or return _ioError("write error during copy");
}
last if $status == AZ_STREAM_END;
}
}
return AZ_OK;
}
# Return true if I depend on the named file
sub _usesFileNamed {
return 0;
}
# ##############################################################################
#
# Decrypt section
#
# H.Merijn Brand (Tux) 2011-06-28
#
# ##############################################################################
# This code is derived from the crypt source of unzip-6.0 dated 05 Jan 2007
# Its license states:
#
# --8<---
# Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
# See the accompanying file LICENSE, version 2005-Feb-10 or later
# (the contents of which are also included in (un)zip.h) for terms of use.
# If, for some reason, all these files are missing, the Info-ZIP license
# also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
#
# crypt.c (full version) by Info-ZIP. Last revised: [see crypt.h]
# The main encryption/decryption source code for Info-Zip software was
# originally written in Europe. To the best of our knowledge, it can
# be freely distributed in both source and object forms from any country,
# including the USA under License Exception TSU of the U.S. Export
# Administration Regulations (section 740.13(e)) of 6 June 2002.
# NOTE on copyright history:
# Previous versions of this source package (up to version 2.8) were
# not copyrighted and put in the public domain. If you cannot comply
# with the Info-Zip LICENSE, you may want to look for one of those
# public domain versions.
#
# This encryption code is a direct transcription of the algorithm from
# Roger Schlafly, described by Phil Katz in the file appnote.txt. This
# file (appnote.txt) is distributed with the PKZIP program (even in the
# version without encryption capabilities).
# -->8---
# As of January 2000, US export regulations were amended to allow export
# of free encryption source code from the US. As of June 2002, these
# regulations were further relaxed to allow export of encryption binaries
# associated with free encryption source code. The Zip 2.31, UnZip 5.52
# and Wiz 5.02 archives now include full crypto source code. As of the
# Zip 2.31 release, all official binaries include encryption support; the
# former "zcr" archives ceased to exist.
# (Note that restrictions may still exist in other countries, of course.)
# For now, we just support the decrypt stuff
# All below methods are supposed to be private
# use Data::Peek;
my @keys;
my @crct = do {
my $xor = 0xedb88320;
my @crc = (0) x 1024;
# generate a crc for every 8-bit value
foreach my $n (0 .. 255) {
my $c = $n;
$c = $c & 1 ? $xor ^ ($c >> 1) : $c >> 1 for 1 .. 8;
$crc[$n] = _revbe($c);
}
# generate crc for each value followed by one, two, and three zeros */
foreach my $n (0 .. 255) {
my $c = ($crc[($crc[$n] >> 24) ^ 0] ^ ($crc[$n] << 8)) & 0xffffffff;
$crc[$_ * 256 + $n] = $c for 1 .. 3;
}
map { _revbe($crc[$_]) } 0 .. 1023;
};
sub _crc32 {
my ($c, $b) = @_;
return ($crct[($c ^ $b) & 0xff] ^ ($c >> 8));
} # _crc32
sub _revbe {
my $w = shift;
return (($w >> 24) +
(($w >> 8) & 0xff00) +
(($w & 0xff00) << 8) +
(($w & 0xff) << 24));
} # _revbe
sub _update_keys {
use integer;
my $c = shift; # signed int
$keys[0] = _crc32($keys[0], $c);
$keys[1] = (($keys[1] + ($keys[0] & 0xff)) * 0x08088405 + 1) & 0xffffffff;
my $keyshift = $keys[1] >> 24;
$keys[2] = _crc32($keys[2], $keyshift);
} # _update_keys
sub _zdecode ($) {
my $c = shift;
my $t = ($keys[2] & 0xffff) | 2;
_update_keys($c ^= ((($t * ($t ^ 1)) >> 8) & 0xff));
return $c;
} # _zdecode
sub _decode {
my $self = shift;
my $buff = shift;
$self->isEncrypted or return $buff;
my $pass = $self->password;
defined $pass or return "";
@keys = (0x12345678, 0x23456789, 0x34567890);
_update_keys($_) for unpack "C*", $pass;
# DDumper { uk => [ @keys ] };
my $head = substr $buff, 0, 12, "";
my @head = map { _zdecode($_) } unpack "C*", $head;
my $x =
$self->{externalFileAttributes}
? ($self->{lastModFileDateTime} >> 8) & 0xff
: $self->{crc32} >> 24;
$head[-1] == $x or return ""; # Password fail
# Worth checking ...
$self->{crc32c} = (unpack LOCAL_FILE_HEADER_FORMAT, pack "C*", @head)[3];
# DHexDump ($buff);
$buff = pack "C*" => map { _zdecode($_) } unpack "C*" => $buff;
# DHexDump ($buff);
return $buff;
} # _decode
1;
ARCHIVE_ZIP_MEMBER
$fatpacked{"Archive/Zip/MemberRead.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_MEMBERREAD';
package Archive::Zip::MemberRead;
=head1 NAME
Archive::Zip::MemberRead - A wrapper that lets you read Zip archive members as if they were files.
=cut
=head1 SYNOPSIS
use Archive::Zip;
use Archive::Zip::MemberRead;
$zip = Archive::Zip->new("file.zip");
$fh = Archive::Zip::MemberRead->new($zip, "subdir/abc.txt");
while (defined($line = $fh->getline()))
{
print $fh->input_line_number . "#: $line\n";
}
$read = $fh->read($buffer, 32*1024);
print "Read $read bytes as :$buffer:\n";
=head1 DESCRIPTION
The Archive::Zip::MemberRead module lets you read Zip archive member data
just like you read data from files.
=head1 METHODS
=over 4
=cut
use strict;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
use vars qw{$VERSION};
my $nl;
BEGIN {
$VERSION = '1.68';
$VERSION = eval $VERSION;
# Requirement for newline conversion. Should check for e.g., DOS and OS/2 as well, but am too lazy.
$nl = $^O eq 'MSWin32' ? "\r\n" : "\n";
}
=item Archive::Zip::Member::readFileHandle()
You can get a C<Archive::Zip::MemberRead> from an archive member by
calling C<readFileHandle()>:
my $member = $zip->memberNamed('abc/def.c');
my $fh = $member->readFileHandle();
while (defined($line = $fh->getline()))
{
# ...
}
$fh->close();
=cut
sub Archive::Zip::Member::readFileHandle {
return Archive::Zip::MemberRead->new(shift());
}
=item Archive::Zip::MemberRead->new($zip, $fileName)
=item Archive::Zip::MemberRead->new($zip, $member)
=item Archive::Zip::MemberRead->new($member)
Construct a new Archive::Zip::MemberRead on the specified member.
my $fh = Archive::Zip::MemberRead->new($zip, 'fred.c')
=cut
sub new {
my ($class, $zip, $file) = @_;
my ($self, $member);
if ($zip && $file) # zip and filename, or zip and member
{
$member = ref($file) ? $file : $zip->memberNamed($file);
} elsif ($zip && !$file && ref($zip)) # just member
{
$member = $zip;
} else {
die(
'Archive::Zip::MemberRead::new needs a zip and filename, zip and member, or member'
);
}
$self = {};
bless($self, $class);
$self->set_member($member);
return $self;
}
sub set_member {
my ($self, $member) = @_;
$self->{member} = $member;
$self->set_compression(COMPRESSION_STORED);
$self->rewind();
}
sub set_compression {
my ($self, $compression) = @_;
$self->{member}->desiredCompressionMethod($compression) if $self->{member};
}
=item setLineEnd(expr)
Set the line end character to use. This is set to \n by default
except on Windows systems where it is set to \r\n. You will
only need to set this on systems which are not Windows or Unix
based and require a line end different from \n.
This is a class method so call as C<Archive::Zip::MemberRead>->C<setLineEnd($nl)>
=cut
sub setLineEnd {
shift;
$nl = shift;
}
=item rewind()
Rewinds an C<Archive::Zip::MemberRead> so that you can read from it again
starting at the beginning.
=cut
sub rewind {
my $self = shift;
$self->_reset_vars();
$self->{member}->rewindData() if $self->{member};
}
sub _reset_vars {
my $self = shift;
$self->{line_no} = 0;
$self->{at_end} = 0;
delete $self->{buffer};
}
=item input_record_separator(expr)
If the argument is given, input_record_separator for this
instance is set to it. The current setting (which may be
the global $/) is always returned.
=cut
sub input_record_separator {
my $self = shift;
if (@_) {
$self->{sep} = shift;
$self->{sep_re} =
_sep_as_re($self->{sep}); # Cache the RE as an optimization
}
return exists $self->{sep} ? $self->{sep} : $/;
}
# Return the input_record_separator in use as an RE fragment
# Note that if we have a per-instance input_record_separator
# we can just return the already converted value. Otherwise,
# the conversion must be done on $/ every time since we cannot
# know whether it has changed or not.
sub _sep_re {
my $self = shift;
# Important to phrase this way: sep's value may be undef.
return exists $self->{sep} ? $self->{sep_re} : _sep_as_re($/);
}
# Convert the input record separator into an RE and return it.
sub _sep_as_re {
my $sep = shift;
if (defined $sep) {
if ($sep eq '') {
return "(?:$nl){2,}";
} else {
$sep =~ s/\n/$nl/og;
return quotemeta $sep;
}
} else {
return undef;
}
}
=item input_line_number()
Returns the current line number, but only if you're using C<getline()>.
Using C<read()> will not update the line number.
=cut
sub input_line_number {
my $self = shift;
return $self->{line_no};
}
=item close()
Closes the given file handle.
=cut
sub close {
my $self = shift;
$self->_reset_vars();
$self->{member}->endRead();
}
=item buffer_size([ $size ])
Gets or sets the buffer size used for reads.
Default is the chunk size used by Archive::Zip.
=cut
sub buffer_size {
my ($self, $size) = @_;
if (!$size) {
return $self->{chunkSize} || Archive::Zip::chunkSize();
} else {
$self->{chunkSize} = $size;
}
}
=item getline()
Returns the next line from the currently open member.
Makes sense only for text files.
A read error is considered fatal enough to die.
Returns undef on eof. All subsequent calls would return undef,
unless a rewind() is called.
Note: The line returned has the input_record_separator (default: newline) removed.
=item getline( { preserve_line_ending => 1 } )
Returns the next line including the line ending.
=cut
sub getline {
my ($self, $argref) = @_;
my $size = $self->buffer_size();
my $sep = $self->_sep_re();
my $preserve_line_ending;
if (ref $argref eq 'HASH') {
$preserve_line_ending = $argref->{'preserve_line_ending'};
$sep =~ s/\\([^A-Za-z_0-9])+/$1/g;
}
for (; ;) {
if ( $sep
&& defined($self->{buffer})
&& $self->{buffer} =~ s/^(.*?)$sep//s) {
my $line = $1;
$self->{line_no}++;
if ($preserve_line_ending) {
return $line . $sep;
} else {
return $line;
}
} elsif ($self->{at_end}) {
$self->{line_no}++ if $self->{buffer};
return delete $self->{buffer};
}
my ($temp, $status) = $self->{member}->readChunk($size);
if ($status != AZ_OK && $status != AZ_STREAM_END) {
die "ERROR: Error reading chunk from archive - $status";
}
$self->{at_end} = $status == AZ_STREAM_END;
$self->{buffer} .= $$temp;
}
}
=item read($buffer, $num_bytes_to_read)
Simulates a normal C<read()> system call.
Returns the no. of bytes read. C<undef> on error, 0 on eof, I<e.g.>:
$fh = Archive::Zip::MemberRead->new($zip, "sreeji/secrets.bin");
while (1)
{
$read = $fh->read($buffer, 1024);
die "FATAL ERROR reading my secrets !\n" if (!defined($read));
last if (!$read);
# Do processing.
....
}
=cut
#
# All these $_ are required to emulate read().
#
sub read {
my $self = $_[0];
my $size = $_[2];
my ($temp, $status, $ret);
($temp, $status) = $self->{member}->readChunk($size);
if ($status != AZ_OK && $status != AZ_STREAM_END) {
$_[1] = undef;
$ret = undef;
} else {
$_[1] = $$temp;
$ret = length($$temp);
}
return $ret;
}
1;
=back
=head1 AUTHOR
Sreeji K. Das E<lt>sreeji_k@yahoo.comE<gt>
See L<Archive::Zip> by Ned Konz without which this module does not make
any sense!
Minor mods by Ned Konz.
=head1 COPYRIGHT
Copyright 2002 Sreeji K. Das.
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut
ARCHIVE_ZIP_MEMBERREAD
$fatpacked{"Archive/Zip/MockFileHandle.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_MOCKFILEHANDLE';
package Archive::Zip::MockFileHandle;
# Output file handle that calls a custom write routine
# Ned Konz, March 2000
# This is provided to help with writing zip files
# when you have to process them a chunk at a time.
use strict;
use vars qw{$VERSION};
BEGIN {
$VERSION = '1.68';
$VERSION = eval $VERSION;
}
sub new {
my $class = shift || __PACKAGE__;
$class = ref($class) || $class;
my $self = bless(
{
'position' => 0,
'size' => 0
},
$class
);
return $self;
}
sub eof {
my $self = shift;
return $self->{'position'} >= $self->{'size'};
}
# Copy given buffer to me
sub print {
my $self = shift;
my $bytes = join('', @_);
my $bytesWritten = $self->writeHook($bytes);
if ($self->{'position'} + $bytesWritten > $self->{'size'}) {
$self->{'size'} = $self->{'position'} + $bytesWritten;
}
$self->{'position'} += $bytesWritten;
return $bytesWritten;
}
# Called on each write.
# Override in subclasses.
# Return number of bytes written (0 on error).
sub writeHook {
my $self = shift;
my $bytes = shift;
return length($bytes);
}
sub binmode { 1 }
sub close { 1 }
sub clearerr { 1 }
# I'm write-only!
sub read { 0 }
sub tell { return shift->{'position'} }
sub opened { 1 }
1;
ARCHIVE_ZIP_MOCKFILEHANDLE
$fatpacked{"Archive/Zip/NewFileMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_NEWFILEMEMBER';
package Archive::Zip::NewFileMember;
use strict;
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw ( Archive::Zip::FileMember );
}
use Archive::Zip qw(
:CONSTANTS
:ERROR_CODES
:UTILITY_METHODS
);
# Given a file name, set up for eventual writing.
sub _newFromFileNamed {
my $class = shift;
my $fileName = shift; # local FS format
my $newName = shift;
$newName = _asZipDirName($fileName) unless defined($newName);
return undef unless (stat($fileName) && -r _ && !-d _ );
my $self = $class->new(@_);
$self->{'fileName'} = $newName;
$self->{'externalFileName'} = $fileName;
$self->{'compressionMethod'} = COMPRESSION_STORED;
my @stat = stat(_);
$self->{'compressedSize'} = $self->{'uncompressedSize'} = $stat[7];
$self->desiredCompressionMethod(
($self->compressedSize() > 0)
? COMPRESSION_DEFLATED
: COMPRESSION_STORED
);
$self->unixFileAttributes($stat[2]);
$self->setLastModFileDateTimeFromUnix($stat[9]);
$self->isTextFile(-T _ );
return $self;
}
sub rewindData {
my $self = shift;
my $status = $self->SUPER::rewindData(@_);
return $status unless $status == AZ_OK;
return AZ_IO_ERROR unless $self->fh();
$self->fh()->clearerr();
$self->fh()->seek(0, IO::Seekable::SEEK_SET)
or return _ioError("rewinding", $self->externalFileName());
return AZ_OK;
}
# Return bytes read. Note that first parameter is a ref to a buffer.
# my $data;
# my ( $bytesRead, $status) = $self->readRawChunk( \$data, $chunkSize );
sub _readRawChunk {
my ($self, $dataRef, $chunkSize) = @_;
return (0, AZ_OK) unless $chunkSize;
my $bytesRead = $self->fh()->read($$dataRef, $chunkSize)
or return (0, _ioError("reading data"));
return ($bytesRead, AZ_OK);
}
# If I already exist, extraction is a no-op.
sub extractToFileNamed {
my $self = shift;
my $name = shift; # local FS name
if (File::Spec->rel2abs($name) eq
File::Spec->rel2abs($self->externalFileName()) and -r $name) {
return AZ_OK;
} else {
return $self->SUPER::extractToFileNamed($name, @_);
}
}
1;
ARCHIVE_ZIP_NEWFILEMEMBER
$fatpacked{"Archive/Zip/StringMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_STRINGMEMBER';
package Archive::Zip::StringMember;
use strict;
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw( Archive::Zip::Member );
}
use Archive::Zip qw(
:CONSTANTS
:ERROR_CODES
);
# Create a new string member. Default is COMPRESSION_STORED.
# Can take a ref to a string as well.
sub _newFromString {
my $class = shift;
my $string = shift;
my $name = shift;
my $self = $class->new(@_);
$self->contents($string);
$self->fileName($name) if defined($name);
# Set the file date to now
$self->setLastModFileDateTimeFromUnix(time());
$self->unixFileAttributes($self->DEFAULT_FILE_PERMISSIONS);
return $self;
}
sub _become {
my $self = shift;
my $newClass = shift;
return $self if ref($self) eq $newClass;
delete($self->{'contents'});
return $self->SUPER::_become($newClass);
}
# Get or set my contents. Note that we do not call the superclass
# version of this, because it calls us.
sub contents {
my $self = shift;
my $string = shift;
if (defined($string)) {
$self->{'contents'} =
pack('C0a*', (ref($string) eq 'SCALAR') ? $$string : $string);
$self->{'uncompressedSize'} = $self->{'compressedSize'} =
length($self->{'contents'});
$self->{'compressionMethod'} = COMPRESSION_STORED;
}
return wantarray ? ($self->{'contents'}, AZ_OK) : $self->{'contents'};
}
# Return bytes read. Note that first parameter is a ref to a buffer.
# my $data;
# my ( $bytesRead, $status) = $self->readRawChunk( \$data, $chunkSize );
sub _readRawChunk {
my ($self, $dataRef, $chunkSize) = @_;
$$dataRef = substr($self->contents(), $self->_readOffset(), $chunkSize);
return (length($$dataRef), AZ_OK);
}
1;
ARCHIVE_ZIP_STRINGMEMBER
$fatpacked{"Archive/Zip/Tree.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_TREE';
package Archive::Zip::Tree;
use strict;
use vars qw{$VERSION};
BEGIN {
$VERSION = '1.68';
}
use Archive::Zip;
warn(
"Archive::Zip::Tree is deprecated; its methods have been moved into Archive::Zip."
) if $^W;
1;
__END__
=head1 NAME
Archive::Zip::Tree - (DEPRECATED) methods for adding/extracting trees using Archive::Zip
=head1 DESCRIPTION
This module is deprecated, because all its methods were moved into the main
Archive::Zip module.
It is included in the distribution merely to avoid breaking old code.
See L<Archive::Zip>.
=head1 AUTHOR
Ned Konz, perl@bike-nomad.com
=head1 COPYRIGHT
Copyright (c) 2000-2002 Ned Konz. All rights reserved. This program is free
software; you can redistribute it and/or modify it under the same terms
as Perl itself.
=head1 SEE ALSO
L<Archive::Zip>
=cut
ARCHIVE_ZIP_TREE
$fatpacked{"Archive/Zip/ZipFileMember.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'ARCHIVE_ZIP_ZIPFILEMEMBER';
package Archive::Zip::ZipFileMember;
use strict;
use vars qw( $VERSION @ISA );
BEGIN {
$VERSION = '1.68';
@ISA = qw ( Archive::Zip::FileMember );
}
use Archive::Zip qw(
:CONSTANTS
:ERROR_CODES
:PKZIP_CONSTANTS
:UTILITY_METHODS
);
# Create a new Archive::Zip::ZipFileMember
# given a filename and optional open file handle
#
sub _newFromZipFile {
my $class = shift;
my $fh = shift;
my $externalFileName = shift;
my $archiveZip64 = @_ ? shift : 0;
my $possibleEocdOffset = @_ ? shift : 0; # normally 0
my $self = $class->new(
'eocdCrc32' => 0,
'diskNumberStart' => 0,
'localHeaderRelativeOffset' => 0,
'dataOffset' => 0, # localHeaderRelativeOffset + header length
@_
);
$self->{'externalFileName'} = $externalFileName;
$self->{'fh'} = $fh;
$self->{'archiveZip64'} = $archiveZip64;
$self->{'possibleEocdOffset'} = $possibleEocdOffset;
return $self;
}
sub isDirectory {
my $self = shift;
return (substr($self->fileName, -1, 1) eq '/'
and $self->uncompressedSize == 0);
}
# Seek to the beginning of the local header, just past the signature.
# Verify that the local header signature is in fact correct.
# Update the localHeaderRelativeOffset if necessary by adding the possibleEocdOffset.
# Returns status.
sub _seekToLocalHeader {
my $self = shift;
my $where = shift; # optional
my $previousWhere = shift; # optional
$where = $self->localHeaderRelativeOffset() unless defined($where);
# avoid loop on certain corrupt files (from Julian Field)
return _formatError("corrupt zip file")
if defined($previousWhere) && $where == $previousWhere;
my $status;
my $signature;
$status = $self->fh()->seek($where, IO::Seekable::SEEK_SET);
return _ioError("seeking to local header") unless $status;
($status, $signature) =
_readSignature($self->fh(), $self->externalFileName(),
LOCAL_FILE_HEADER_SIGNATURE, 1);
return $status if $status == AZ_IO_ERROR;
# retry with EOCD offset if any was given.
if ($status == AZ_FORMAT_ERROR && $self->{'possibleEocdOffset'}) {
$status = $self->_seekToLocalHeader(
$self->localHeaderRelativeOffset() + $self->{'possibleEocdOffset'},
$where
);
if ($status == AZ_OK) {
$self->{'localHeaderRelativeOffset'} +=
$self->{'possibleEocdOffset'};
$self->{'possibleEocdOffset'} = 0;
}
}
return $status;
}
# Because I'm going to delete the file handle, read the local file
# header if the file handle is seekable. If it is not, I assume that
# I've already read the local header.
# Return ( $status, $self )
sub _become {
my $self = shift;
my $newClass = shift;
return $self if ref($self) eq $newClass;
my $status = AZ_OK;
if (_isSeekable($self->fh())) {
my $here = $self->fh()->tell();
$status = $self->_seekToLocalHeader();
$status = $self->_readLocalFileHeader() if $status == AZ_OK;
$self->fh()->seek($here, IO::Seekable::SEEK_SET);
return $status unless $status == AZ_OK;
}
delete($self->{'eocdCrc32'});
delete($self->{'diskNumberStart'});
delete($self->{'localHeaderRelativeOffset'});
delete($self->{'dataOffset'});
delete($self->{'archiveZip64'});
delete($self->{'possibleEocdOffset'});
return $self->SUPER::_become($newClass);
}
sub diskNumberStart {
shift->{'diskNumberStart'};
}
sub localHeaderRelativeOffset {
shift->{'localHeaderRelativeOffset'};
}
sub dataOffset {
shift->{'dataOffset'};
}
# Skip local file header, updating only extra field stuff.
# Assumes that fh is positioned before signature.
sub _skipLocalFileHeader {
my $self = shift;
my $header;
my $bytesRead = $self->fh()->read($header, LOCAL_FILE_HEADER_LENGTH);
if ($bytesRead != LOCAL_FILE_HEADER_LENGTH) {
return _ioError("reading local file header");
}
my $fileNameLength;
my $extraFieldLength;
my $bitFlag;
(
undef, # $self->{'versionNeededToExtract'},
$bitFlag,
undef, # $self->{'compressionMethod'},
undef, # $self->{'lastModFileDateTime'},
undef, # $crc32,
undef, # $compressedSize,
undef, # $uncompressedSize,
$fileNameLength,
$extraFieldLength
) = unpack(LOCAL_FILE_HEADER_FORMAT, $header);
if ($fileNameLength) {
$self->fh()->seek($fileNameLength, IO::Seekable::SEEK_CUR)
or return _ioError("skipping local file name");
}
my $zip64 = 0;
if ($extraFieldLength) {
$bytesRead =
$self->fh()->read($self->{'localExtraField'}, $extraFieldLength);
if ($bytesRead != $extraFieldLength) {
return _ioError("reading local extra field");
}
if ($self->{'archiveZip64'}) {
my $status;
($status, $zip64) =
$self->_extractZip64ExtraField($self->{'localExtraField'}, undef, undef);
return $status if $status != AZ_OK;
$self->{'zip64'} ||= $zip64;
}
}
$self->{'dataOffset'} = $self->fh()->tell();
if ($bitFlag & GPBF_HAS_DATA_DESCRIPTOR_MASK) {
# Read the crc32, compressedSize, and uncompressedSize from the
# extended data descriptor, which directly follows the compressed data.
#
# Skip over the compressed file data (assumes that EOCD compressedSize
# was correct)
$self->fh()->seek($self->{'compressedSize'}, IO::Seekable::SEEK_CUR)
or return _ioError("seeking to extended local header");
# these values should be set correctly from before.
my $oldCrc32 = $self->{'eocdCrc32'};
my $oldCompressedSize = $self->{'compressedSize'};
my $oldUncompressedSize = $self->{'uncompressedSize'};
my $status = $self->_readDataDescriptor($zip64);
return $status unless $status == AZ_OK;
# The buffer with encrypted data is prefixed with a new
# encrypted 12 byte header. The size only changes when
# the buffer is also compressed
$self->isEncrypted && $oldUncompressedSize > $self->{'uncompressedSize'}
and $oldUncompressedSize -= DATA_DESCRIPTOR_LENGTH;
return _formatError(
"CRC or size mismatch while skipping data descriptor")
if ( $oldCrc32 != $self->{'crc32'}
|| $oldUncompressedSize != $self->{'uncompressedSize'});
$self->{'crc32'} = 0
if $self->compressionMethod() == COMPRESSION_STORED ;
}
return AZ_OK;
}
# Read from a local file header into myself. Returns AZ_OK (in
# scalar context) or a pair (AZ_OK, $headerSize) (in list
# context) if successful.
# Assumes that fh is positioned after signature.
# Note that crc32, compressedSize, and uncompressedSize will be 0 if
# GPBF_HAS_DATA_DESCRIPTOR_MASK is set in the bitFlag.
sub _readLocalFileHeader {
my $self = shift;
my $header;
my $bytesRead = $self->fh()->read($header, LOCAL_FILE_HEADER_LENGTH);
if ($bytesRead != LOCAL_FILE_HEADER_LENGTH) {
return _ioError("reading local file header");
}
my $fileNameLength;
my $crc32;
my $compressedSize;
my $uncompressedSize;
my $extraFieldLength;
(
$self->{'versionNeededToExtract'}, $self->{'bitFlag'},
$self->{'compressionMethod'}, $self->{'lastModFileDateTime'},
$crc32, $compressedSize,
$uncompressedSize, $fileNameLength,
$extraFieldLength
) = unpack(LOCAL_FILE_HEADER_FORMAT, $header);
if ($fileNameLength) {
my $fileName;
$bytesRead = $self->fh()->read($fileName, $fileNameLength);
if ($bytesRead != $fileNameLength) {
return _ioError("reading local file name");
}
$self->fileName($fileName);
}
my $zip64 = 0;
if ($extraFieldLength) {
$bytesRead =
$self->fh()->read($self->{'localExtraField'}, $extraFieldLength);
if ($bytesRead != $extraFieldLength) {
return _ioError("reading local extra field");
}
if ($self->{'archiveZip64'}) {
my $status;
($status, $zip64) =
$self->_extractZip64ExtraField($self->{'localExtraField'},
$uncompressedSize,
$compressedSize);
return $status if $status != AZ_OK;
$self->{'zip64'} ||= $zip64;
}
}
$self->{'dataOffset'} = $self->fh()->tell();
if ($self->hasDataDescriptor()) {
# Read the crc32, compressedSize, and uncompressedSize from the
# extended data descriptor.
# Skip over the compressed file data (assumes that EOCD compressedSize
# was correct)
$self->fh()->seek($self->{'compressedSize'}, IO::Seekable::SEEK_CUR)
or return _ioError("seeking to extended local header");
my $status = $self->_readDataDescriptor($zip64);
return $status unless $status == AZ_OK;
} else {
return _formatError(
"CRC or size mismatch after reading data descriptor")
if ( $self->{'crc32'} != $crc32
|| $self->{'uncompressedSize'} != $uncompressedSize);
}
return
wantarray
? (AZ_OK,
SIGNATURE_LENGTH,
LOCAL_FILE_HEADER_LENGTH +
$fileNameLength +
$extraFieldLength)
: AZ_OK;
}
# This will read the data descriptor, which is after the end of compressed file
# data in members that have GPBF_HAS_DATA_DESCRIPTOR_MASK set in their bitFlag.
# The only reliable way to find these is to rely on the EOCD compressedSize.
# Assumes that file is positioned immediately after the compressed data.
# Returns status; sets crc32, compressedSize, and uncompressedSize.
sub _readDataDescriptor {
my $self = shift;
my $zip64 = shift;
my $signatureData;
my $header;
my $crc32;
my $compressedSize;
my $uncompressedSize;
my $bytesRead = $self->fh()->read($signatureData, SIGNATURE_LENGTH);
return _ioError("reading header signature")
if $bytesRead != SIGNATURE_LENGTH;
my $signature = unpack(SIGNATURE_FORMAT, $signatureData);
my $dataDescriptorLength;
my $dataDescriptorFormat;
my $dataDescriptorLengthNoSig;
my $dataDescriptorFormatNoSig;
if (! $zip64) {
$dataDescriptorLength = DATA_DESCRIPTOR_LENGTH;
$dataDescriptorFormat = DATA_DESCRIPTOR_FORMAT;
$dataDescriptorLengthNoSig = DATA_DESCRIPTOR_LENGTH_NO_SIG;
$dataDescriptorFormatNoSig = DATA_DESCRIPTOR_FORMAT_NO_SIG
}
else {
$dataDescriptorLength = DATA_DESCRIPTOR_ZIP64_LENGTH;
$dataDescriptorFormat = DATA_DESCRIPTOR_ZIP64_FORMAT;
$dataDescriptorLengthNoSig = DATA_DESCRIPTOR_ZIP64_LENGTH_NO_SIG;
$dataDescriptorFormatNoSig = DATA_DESCRIPTOR_ZIP64_FORMAT_NO_SIG
}
# unfortunately, the signature appears to be optional.
if ($signature == DATA_DESCRIPTOR_SIGNATURE
&& ($signature != $self->{'crc32'})) {
$bytesRead = $self->fh()->read($header, $dataDescriptorLength);
return _ioError("reading data descriptor")
if $bytesRead != $dataDescriptorLength;
($crc32, $compressedSize, $uncompressedSize) =
unpack($dataDescriptorFormat, $header);
} else {
$bytesRead = $self->fh()->read($header, $dataDescriptorLengthNoSig);
return _ioError("reading data descriptor")
if $bytesRead != $dataDescriptorLengthNoSig;
$crc32 = $signature;
($compressedSize, $uncompressedSize) =
unpack($dataDescriptorFormatNoSig, $header);
}
$self->{'eocdCrc32'} = $self->{'crc32'}
unless defined($self->{'eocdCrc32'});
$self->{'crc32'} = $crc32;
$self->{'compressedSize'} = $compressedSize;
$self->{'uncompressedSize'} = $uncompressedSize;
return AZ_OK;
}
# Read a Central Directory header. Return AZ_OK on success.
# Assumes that fh is positioned right after the signature.
sub _readCentralDirectoryFileHeader {
my $self = shift;
my $fh = $self->fh();
my $header = '';
my $bytesRead = $fh->read($header, CENTRAL_DIRECTORY_FILE_HEADER_LENGTH);
if ($bytesRead != CENTRAL_DIRECTORY_FILE_HEADER_LENGTH) {
return _ioError("reading central dir header");
}
my ($fileNameLength, $extraFieldLength, $fileCommentLength);
(
$self->{'versionMadeBy'},
$self->{'fileAttributeFormat'},
$self->{'versionNeededToExtract'},
$self->{'bitFlag'},
$self->{'compressionMethod'},
$self->{'lastModFileDateTime'},
$self->{'crc32'},
$self->{'compressedSize'},
$self->{'uncompressedSize'},
$fileNameLength,
$extraFieldLength,
$fileCommentLength,
$self->{'diskNumberStart'},
$self->{'internalFileAttributes'},
$self->{'externalFileAttributes'},
$self->{'localHeaderRelativeOffset'}
) = unpack(CENTRAL_DIRECTORY_FILE_HEADER_FORMAT, $header);
$self->{'eocdCrc32'} = $self->{'crc32'};
if ($fileNameLength) {
$bytesRead = $fh->read($self->{'fileName'}, $fileNameLength);
if ($bytesRead != $fileNameLength) {
_ioError("reading central dir filename");
}
}
if ($extraFieldLength) {
$bytesRead = $fh->read($self->{'cdExtraField'}, $extraFieldLength);
if ($bytesRead != $extraFieldLength) {
return _ioError("reading central dir extra field");
}
if ($self->{'archiveZip64'}) {
my ($status, $zip64) =
$self->_extractZip64ExtraField($self->{'cdExtraField'},
$self->{'uncompressedSize'},
$self->{'compressedSize'},
$self->{'localHeaderRelativeOffset'},
$self->{'diskNumberStart'});
return $status if $status != AZ_OK;
$self->{'zip64'} ||= $zip64;
}
}
if ($fileCommentLength) {
$bytesRead = $fh->read($self->{'fileComment'}, $fileCommentLength);
if ($bytesRead != $fileCommentLength) {
return _ioError("reading central dir file comment");
}
}
# NK 10/21/04: added to avoid problems with manipulated headers
if ( $self->{'uncompressedSize'} != $self->{'compressedSize'}
and $self->{'compressionMethod'} == COMPRESSION_STORED) {
$self->{'uncompressedSize'} = $self->{'compressedSize'};
}
$self->desiredCompressionMethod($self->compressionMethod());
return AZ_OK;
}
sub rewindData {
my $self = shift;
my $status = $self->SUPER::rewindData(@_);
return $status unless $status == AZ_OK;
return AZ_IO_ERROR unless $self->fh();
$self->fh()->clearerr();
# Seek to local file header.
# The only reason that I'm doing this this way is that the extraField
# length seems to be different between the CD header and the LF header.
$status = $self->_seekToLocalHeader();
return $status unless $status == AZ_OK;
# skip local file header
$status = $self->_skipLocalFileHeader();
return $status unless $status == AZ_OK;
# Seek to beginning of file data
$self->fh()->seek($self->dataOffset(), IO::Seekable::SEEK_SET)
or return _ioError("seeking to beginning of file data");
return AZ_OK;
}
# Return bytes read. Note that first parameter is a ref to a buffer.
# my $data;
# my ( $bytesRead, $status) = $self->readRawChunk( \$data, $chunkSize );
sub _readRawChunk {
my ($self, $dataRef, $chunkSize) = @_;
return (0, AZ_OK) unless $chunkSize;
my $bytesRead = $self->fh()->read($$dataRef, $chunkSize)
or return (0, _ioError("reading data"));
return ($bytesRead, AZ_OK);
}
1;
ARCHIVE_ZIP_ZIPFILEMEMBER
$fatpacked{"Carp.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CARP';
package Carp;
{ use 5.006; }
use strict;
use warnings;
BEGIN {
# Very old versions of warnings.pm load Carp. This can go wrong due
# to the circular dependency. If warnings is invoked before Carp,
# then warnings starts by loading Carp, then Carp (above) tries to
# invoke warnings, and gets nothing because warnings is in the process
# of loading and hasn't defined its import method yet. If we were
# only turning on warnings ("use warnings" above) this wouldn't be too
# bad, because Carp would just gets the state of the -w switch and so
# might not get some warnings that it wanted. The real problem is
# that we then want to turn off Unicode warnings, but "no warnings
# 'utf8'" won't be effective if we're in this circular-dependency
# situation. So, if warnings.pm is an affected version, we turn
# off all warnings ourselves by directly setting ${^WARNING_BITS}.
# On unaffected versions, we turn off just Unicode warnings, via
# the proper API.
if(!defined($warnings::VERSION) || eval($warnings::VERSION) < 1.06) {
${^WARNING_BITS} = "";
} else {
"warnings"->unimport("utf8");
}
}
sub _fetch_sub { # fetch sub without autovivifying
my($pack, $sub) = @_;
$pack .= '::';
# only works with top-level packages
return unless exists($::{$pack});
for ($::{$pack}) {
return unless ref \$_ eq 'GLOB' && *$_{HASH} && exists $$_{$sub};
for ($$_{$sub}) {
return ref \$_ eq 'GLOB' ? *$_{CODE} : undef
}
}
}
# UTF8_REGEXP_PROBLEM is a compile-time constant indicating whether Carp
# must avoid applying a regular expression to an upgraded (is_utf8)
# string. There are multiple problems, on different Perl versions,
# that require this to be avoided. All versions prior to 5.13.8 will
# load utf8_heavy.pl for the swash system, even if the regexp doesn't
# use character classes. Perl 5.6 and Perls [5.11.2, 5.13.11) exhibit
# specific problems when Carp is being invoked in the aftermath of a
# syntax error.
BEGIN {
if("$]" < 5.013011) {
*UTF8_REGEXP_PROBLEM = sub () { 1 };
} else {
*UTF8_REGEXP_PROBLEM = sub () { 0 };
}
}
# is_utf8() is essentially the utf8::is_utf8() function, which indicates
# whether a string is represented in the upgraded form (using UTF-8
# internally). As utf8::is_utf8() is only available from Perl 5.8
# onwards, extra effort is required here to make it work on Perl 5.6.
BEGIN {
if(defined(my $sub = _fetch_sub utf8 => 'is_utf8')) {
*is_utf8 = $sub;
} else {
# black magic for perl 5.6
*is_utf8 = sub { unpack("C", "\xaa".$_[0]) != 170 };
}
}
# The downgrade() function defined here is to be used for attempts to
# downgrade where it is acceptable to fail. It must be called with a
# second argument that is a true value.
BEGIN {
if(defined(my $sub = _fetch_sub utf8 => 'downgrade')) {
*downgrade = \&{"utf8::downgrade"};
} else {
*downgrade = sub {
my $r = "";
my $l = length($_[0]);
for(my $i = 0; $i != $l; $i++) {
my $o = ord(substr($_[0], $i, 1));
return if $o > 255;
$r .= chr($o);
}
$_[0] = $r;
};
}
}
# is_safe_printable_codepoint() indicates whether a character, specified
# by integer codepoint, is OK to output literally in a trace. Generally
# this is if it is a printable character in the ancestral character set
# (ASCII or EBCDIC). This is used on some Perls in situations where a
# regexp can't be used.
BEGIN {
*is_safe_printable_codepoint =
"$]" >= 5.007_003 ?
eval(q(sub ($) {
my $u = utf8::native_to_unicode($_[0]);
$u >= 0x20 && $u <= 0x7e;
}))
: ord("A") == 65 ?
sub ($) { $_[0] >= 0x20 && $_[0] <= 0x7e }
:
sub ($) {
# Early EBCDIC
# 3 EBCDIC code pages supported then; all controls but one
# are the code points below SPACE. The other one is 0x5F on
# POSIX-BC; FF on the other two.
# FIXME: there are plenty of unprintable codepoints other
# than those that this code and the comment above identifies
# as "controls".
$_[0] >= ord(" ") && $_[0] <= 0xff &&
$_[0] != (ord ("^") == 106 ? 0x5f : 0xff);
}
;
}
sub _univ_mod_loaded {
return 0 unless exists($::{"UNIVERSAL::"});
for ($::{"UNIVERSAL::"}) {
return 0 unless ref \$_ eq "GLOB" && *$_{HASH} && exists $$_{"$_[0]::"};
for ($$_{"$_[0]::"}) {
return 0 unless ref \$_ eq "GLOB" && *$_{HASH} && exists $$_{"VERSION"};
for ($$_{"VERSION"}) {
return 0 unless ref \$_ eq "GLOB";
return ${*$_{SCALAR}};
}
}
}
}
# _maybe_isa() is usually the UNIVERSAL::isa function. We have to avoid
# the latter if the UNIVERSAL::isa module has been loaded, to avoid infi-
# nite recursion; in that case _maybe_isa simply returns true.
my $isa;
BEGIN {
if (_univ_mod_loaded('isa')) {
*_maybe_isa = sub { 1 }
}
else {
# Since we have already done the check, record $isa for use below
# when defining _StrVal.
*_maybe_isa = $isa = _fetch_sub(UNIVERSAL => "isa");
}
}
# We need an overload::StrVal or equivalent function, but we must avoid
# loading any modules on demand, as Carp is used from __DIE__ handlers and
# may be invoked after a syntax error.
# We can copy recent implementations of overload::StrVal and use
# overloading.pm, which is the fastest implementation, so long as
# overloading is available. If it is not available, we use our own pure-
# Perl StrVal. We never actually use overload::StrVal, for various rea-
# sons described below.
# overload versions are as follows:
# undef-1.00 (up to perl 5.8.0) uses bless (avoid!)
# 1.01-1.17 (perl 5.8.1 to 5.14) uses Scalar::Util
# 1.18+ (perl 5.16+) uses overloading
# The ancient 'bless' implementation (that inspires our pure-Perl version)
# blesses unblessed references and must be avoided. Those using
# Scalar::Util use refaddr, possibly the pure-Perl implementation, which
# has the same blessing bug, and must be avoided. Also, Scalar::Util is
# loaded on demand. Since we avoid the Scalar::Util implementations, we
# end up having to implement our own overloading.pm-based version for perl
# 5.10.1 to 5.14. Since it also works just as well in more recent ver-
# sions, we use it there, too.
BEGIN {
if (eval { require "overloading.pm" }) {
*_StrVal = eval 'sub { no overloading; "$_[0]" }'
}
else {
# Work around the UNIVERSAL::can/isa modules to avoid recursion.
# _mycan is either UNIVERSAL::can, or, in the presence of an
# override, overload::mycan.
*_mycan = _univ_mod_loaded('can')
? do { require "overload.pm"; _fetch_sub overload => 'mycan' }
: \&UNIVERSAL::can;
# _blessed is either UNIVERAL::isa(...), or, in the presence of an
# override, a hideous, but fairly reliable, workaround.
*_blessed = $isa
? sub { &$isa($_[0], "UNIVERSAL") }
: sub {
my $probe = "UNIVERSAL::Carp_probe_" . rand;
no strict 'refs';
local *$probe = sub { "unlikely string" };
local $@;
local $SIG{__DIE__} = sub{};
(eval { $_[0]->$probe } || '') eq 'unlikely string'
};
*_StrVal = sub {
my $pack = ref $_[0];
# Perl's overload mechanism uses the presence of a special
# "method" named "((" or "()" to signal it is in effect.
# This test seeks to see if it has been set up. "((" post-
# dates overloading.pm, so we can skip it.
return "$_[0]" unless _mycan($pack, "()");
# Even at this point, the invocant may not be blessed, so
# check for that.
return "$_[0]" if not _blessed($_[0]);
bless $_[0], "Carp";
my $str = "$_[0]";
bless $_[0], $pack;
$pack . substr $str, index $str, "=";
}
}
}
our $VERSION = '1.50';
$VERSION =~ tr/_//d;
our $MaxEvalLen = 0;
our $Verbose = 0;
our $CarpLevel = 0;
our $MaxArgLen = 64; # How much of each argument to print. 0 = all.
our $MaxArgNums = 8; # How many arguments to print. 0 = all.
our $RefArgFormatter = undef; # allow caller to format reference arguments
require Exporter;
our @ISA = ('Exporter');
our @EXPORT = qw(confess croak carp);
our @EXPORT_OK = qw(cluck verbose longmess shortmess);
our @EXPORT_FAIL = qw(verbose); # hook to enable verbose mode
# The members of %Internal are packages that are internal to perl.
# Carp will not report errors from within these packages if it
# can. The members of %CarpInternal are internal to Perl's warning
# system. Carp will not report errors from within these packages
# either, and will not report calls *to* these packages for carp and
# croak. They replace $CarpLevel, which is deprecated. The
# $Max(EvalLen|(Arg(Len|Nums)) variables are used to specify how the eval
# text and function arguments should be formatted when printed.
our %CarpInternal;
our %Internal;
# disable these by default, so they can live w/o require Carp
$CarpInternal{Carp}++;
$CarpInternal{warnings}++;
$Internal{Exporter}++;
$Internal{'Exporter::Heavy'}++;
# if the caller specifies verbose usage ("perl -MCarp=verbose script.pl")
# then the following method will be called by the Exporter which knows
# to do this thanks to @EXPORT_FAIL, above. $_[1] will contain the word
# 'verbose'.
sub export_fail { shift; $Verbose = shift if $_[0] eq 'verbose'; @_ }
sub _cgc {
no strict 'refs';
return \&{"CORE::GLOBAL::caller"} if defined &{"CORE::GLOBAL::caller"};
return;
}
sub longmess {
local($!, $^E);
# Icky backwards compatibility wrapper. :-(
#
# The story is that the original implementation hard-coded the
# number of call levels to go back, so calls to longmess were off
# by one. Other code began calling longmess and expecting this
# behaviour, so the replacement has to emulate that behaviour.
my $cgc = _cgc();
my $call_pack = $cgc ? $cgc->() : caller();
if ( $Internal{$call_pack} or $CarpInternal{$call_pack} ) {
return longmess_heavy(@_);
}
else {
local $CarpLevel = $CarpLevel + 1;
return longmess_heavy(@_);
}
}
our @CARP_NOT;
sub shortmess {
local($!, $^E);
my $cgc = _cgc();
# Icky backwards compatibility wrapper. :-(
local @CARP_NOT = $cgc ? $cgc->() : caller();
shortmess_heavy(@_);
}
sub croak { die shortmess @_ }
sub confess { die longmess @_ }
sub carp { warn shortmess @_ }
sub cluck { warn longmess @_ }
BEGIN {
if("$]" >= 5.015002 || ("$]" >= 5.014002 && "$]" < 5.015) ||
("$]" >= 5.012005 && "$]" < 5.013)) {
*CALLER_OVERRIDE_CHECK_OK = sub () { 1 };
} else {
*CALLER_OVERRIDE_CHECK_OK = sub () { 0 };
}
}
sub caller_info {
my $i = shift(@_) + 1;
my %call_info;
my $cgc = _cgc();
{
# Some things override caller() but forget to implement the
# @DB::args part of it, which we need. We check for this by
# pre-populating @DB::args with a sentinel which no-one else
# has the address of, so that we can detect whether @DB::args
# has been properly populated. However, on earlier versions
# of perl this check tickles a bug in CORE::caller() which
# leaks memory. So we only check on fixed perls.
@DB::args = \$i if CALLER_OVERRIDE_CHECK_OK;
package DB;
@call_info{
qw(pack file line sub has_args wantarray evaltext is_require) }
= $cgc ? $cgc->($i) : caller($i);
}
unless ( defined $call_info{file} ) {
return ();
}
my $sub_name = Carp::get_subname( \%call_info );
if ( $call_info{has_args} ) {
# Guard our serialization of the stack from stack refcounting bugs
# NOTE this is NOT a complete solution, we cannot 100% guard against
# these bugs. However in many cases Perl *is* capable of detecting
# them and throws an error when it does. Unfortunately serializing
# the arguments on the stack is a perfect way of finding these bugs,
# even when they would not affect normal program flow that did not
# poke around inside the stack. Inside of Carp.pm it makes little
# sense reporting these bugs, as Carp's job is to report the callers
# errors, not the ones it might happen to tickle while doing so.
# See: https://rt.perl.org/Public/Bug/Display.html?id=131046
# and: https://rt.perl.org/Public/Bug/Display.html?id=52610
# for more details and discussion. - Yves
my @args = map {
my $arg;
local $@= $@;
eval {
$arg = $_;
1;
} or do {
$arg = '** argument not available anymore **';
};
$arg;
} @DB::args;
if (CALLER_OVERRIDE_CHECK_OK && @args == 1
&& ref $args[0] eq ref \$i
&& $args[0] == \$i ) {
@args = (); # Don't let anyone see the address of $i
local $@;
my $where = eval {
my $func = $cgc or return '';
my $gv =
(_fetch_sub B => 'svref_2object' or return '')
->($func)->GV;
my $package = $gv->STASH->NAME;
my $subname = $gv->NAME;
return unless defined $package && defined $subname;
# returning CORE::GLOBAL::caller isn't useful for tracing the cause:
return if $package eq 'CORE::GLOBAL' && $subname eq 'caller';
" in &${package}::$subname";
} || '';
@args
= "** Incomplete caller override detected$where; \@DB::args were not set **";
}
else {
my $overflow;
if ( $MaxArgNums and @args > $MaxArgNums )
{ # More than we want to show?
$#args = $MaxArgNums - 1;
$overflow = 1;
}
@args = map { Carp::format_arg($_) } @args;
if ($overflow) {
push @args, '...';
}
}
# Push the args onto the subroutine
$sub_name .= '(' . join( ', ', @args ) . ')';
}
$call_info{sub_name} = $sub_name;
return wantarray() ? %call_info : \%call_info;
}
# Transform an argument to a function into a string.
our $in_recurse;
sub format_arg {
my $arg = shift;
if ( my $pack= ref($arg) ) {
# legitimate, let's not leak it.
if (!$in_recurse && _maybe_isa( $arg, 'UNIVERSAL' ) &&
do {
local $@;
local $in_recurse = 1;
local $SIG{__DIE__} = sub{};
eval {$arg->can('CARP_TRACE') }
})
{
return $arg->CARP_TRACE();
}
elsif (!$in_recurse &&
defined($RefArgFormatter) &&
do {
local $@;
local $in_recurse = 1;
local $SIG{__DIE__} = sub{};
eval {$arg = $RefArgFormatter->($arg); 1}
})
{
return $arg;
}
else
{
# Argument may be blessed into a class with overloading, and so
# might have an overloaded stringification. We don't want to
# risk getting the overloaded stringification, so we need to
# use _StrVal, our overload::StrVal()-equivalent.
return _StrVal $arg;
}
}
return "undef" if !defined($arg);
downgrade($arg, 1);
return $arg if !(UTF8_REGEXP_PROBLEM && is_utf8($arg)) &&
$arg =~ /\A-?[0-9]+(?:\.[0-9]*)?(?:[eE][-+]?[0-9]+)?\z/;
my $suffix = "";
if ( 2 < $MaxArgLen and $MaxArgLen < length($arg) ) {
substr ( $arg, $MaxArgLen - 3 ) = "";
$suffix = "...";
}
if(UTF8_REGEXP_PROBLEM && is_utf8($arg)) {
for(my $i = length($arg); $i--; ) {
my $c = substr($arg, $i, 1);
my $x = substr($arg, 0, 0); # work around bug on Perl 5.8.{1,2}
if($c eq "\"" || $c eq "\\" || $c eq "\$" || $c eq "\@") {
substr $arg, $i, 0, "\\";
next;
}
my $o = ord($c);
substr $arg, $i, 1, sprintf("\\x{%x}", $o)
unless is_safe_printable_codepoint($o);
}
} else {
$arg =~ s/([\"\\\$\@])/\\$1/g;
# This is all the ASCII printables spelled-out. It is portable to all
# Perl versions and platforms (such as EBCDIC). There are other more
# compact ways to do this, but may not work everywhere every version.
$arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
}
downgrade($arg, 1);
return "\"".$arg."\"".$suffix;
}
sub Regexp::CARP_TRACE {
my $arg = "$_[0]";
downgrade($arg, 1);
if(UTF8_REGEXP_PROBLEM && is_utf8($arg)) {
for(my $i = length($arg); $i--; ) {
my $o = ord(substr($arg, $i, 1));
my $x = substr($arg, 0, 0); # work around bug on Perl 5.8.{1,2}
substr $arg, $i, 1, sprintf("\\x{%x}", $o)
unless is_safe_printable_codepoint($o);
}
} else {
# See comment in format_arg() about this same regex.
$arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
}
downgrade($arg, 1);
my $suffix = "";
if($arg =~ /\A\(\?\^?([a-z]*)(?:-[a-z]*)?:(.*)\)\z/s) {
($suffix, $arg) = ($1, $2);
}
if ( 2 < $MaxArgLen and $MaxArgLen < length($arg) ) {
substr ( $arg, $MaxArgLen - 3 ) = "";
$suffix = "...".$suffix;
}
return "qr($arg)$suffix";
}
# Takes an inheritance cache and a package and returns
# an anon hash of known inheritances and anon array of
# inheritances which consequences have not been figured
# for.
sub get_status {
my $cache = shift;
my $pkg = shift;
$cache->{$pkg} ||= [ { $pkg => $pkg }, [ trusts_directly($pkg) ] ];
return @{ $cache->{$pkg} };
}
# Takes the info from caller() and figures out the name of
# the sub/require/eval
sub get_subname {
my $info = shift;
if ( defined( $info->{evaltext} ) ) {
my $eval = $info->{evaltext};
if ( $info->{is_require} ) {
return "require $eval";
}
else {
$eval =~ s/([\\\'])/\\$1/g;
return "eval '" . str_len_trim( $eval, $MaxEvalLen ) . "'";
}
}
# this can happen on older perls when the sub (or the stash containing it)
# has been deleted
if ( !defined( $info->{sub} ) ) {
return '__ANON__::__ANON__';
}
return ( $info->{sub} eq '(eval)' ) ? 'eval {...}' : $info->{sub};
}
# Figures out what call (from the point of view of the caller)
# the long error backtrace should start at.
sub long_error_loc {
my $i;
my $lvl = $CarpLevel;
{
++$i;
my $cgc = _cgc();
my @caller = $cgc ? $cgc->($i) : caller($i);
my $pkg = $caller[0];
unless ( defined($pkg) ) {
# This *shouldn't* happen.
if (%Internal) {
local %Internal;
$i = long_error_loc();
last;
}
elsif (defined $caller[2]) {
# this can happen when the stash has been deleted
# in that case, just assume that it's a reasonable place to
# stop (the file and line data will still be intact in any
# case) - the only issue is that we can't detect if the
# deleted package was internal (so don't do that then)
# -doy
redo unless 0 > --$lvl;
last;
}
else {
return 2;
}
}
redo if $CarpInternal{$pkg};
redo unless 0 > --$lvl;
redo if $Internal{$pkg};
}
return $i - 1;
}
sub longmess_heavy {
if ( ref( $_[0] ) ) { # don't break references as exceptions
return wantarray ? @_ : $_[0];
}
my $i = long_error_loc();
return ret_backtrace( $i, @_ );
}
BEGIN {
if("$]" >= 5.017004) {
# The LAST_FH constant is a reference to the variable.
$Carp::{LAST_FH} = \eval '\${^LAST_FH}';
} else {
eval '*LAST_FH = sub () { 0 }';
}
}
# Returns a full stack backtrace starting from where it is
# told.
sub ret_backtrace {
my ( $i, @error ) = @_;
my $mess;
my $err = join '', @error;
$i++;
my $tid_msg = '';
if ( defined &threads::tid ) {
my $tid = threads->tid;
$tid_msg = " thread $tid" if $tid;
}
my %i = caller_info($i);
$mess = "$err at $i{file} line $i{line}$tid_msg";
if( $. ) {
# Use ${^LAST_FH} if available.
if (LAST_FH) {
if (${+LAST_FH}) {
$mess .= sprintf ", <%s> %s %d",
*${+LAST_FH}{NAME},
($/ eq "\n" ? "line" : "chunk"), $.
}
}
else {
local $@ = '';
local $SIG{__DIE__};
eval {
CORE::die;
};
if($@ =~ /^Died at .*(, <.*?> (?:line|chunk) \d+).$/ ) {
$mess .= $1;
}
}
}
$mess .= "\.\n";
while ( my %i = caller_info( ++$i ) ) {
$mess .= "\t$i{sub_name} called at $i{file} line $i{line}$tid_msg\n";
}
return $mess;
}
sub ret_summary {
my ( $i, @error ) = @_;
my $err = join '', @error;
$i++;
my $tid_msg = '';
if ( defined &threads::tid ) {
my $tid = threads->tid;
$tid_msg = " thread $tid" if $tid;
}
my %i = caller_info($i);
return "$err at $i{file} line $i{line}$tid_msg\.\n";
}
sub short_error_loc {
# You have to create your (hash)ref out here, rather than defaulting it
# inside trusts *on a lexical*, as you want it to persist across calls.
# (You can default it on $_[2], but that gets messy)
my $cache = {};
my $i = 1;
my $lvl = $CarpLevel;
{
my $cgc = _cgc();
my $called = $cgc ? $cgc->($i) : caller($i);
$i++;
my $caller = $cgc ? $cgc->($i) : caller($i);
if (!defined($caller)) {
my @caller = $cgc ? $cgc->($i) : caller($i);
if (@caller) {
# if there's no package but there is other caller info, then
# the package has been deleted - treat this as a valid package
# in this case
redo if defined($called) && $CarpInternal{$called};
redo unless 0 > --$lvl;
last;
}
else {
return 0;
}
}
redo if $Internal{$caller};
redo if $CarpInternal{$caller};
redo if $CarpInternal{$called};
redo if trusts( $called, $caller, $cache );
redo if trusts( $caller, $called, $cache );
redo unless 0 > --$lvl;
}
return $i - 1;
}
sub shortmess_heavy {
return longmess_heavy(@_) if $Verbose;
return @_ if ref( $_[0] ); # don't break references as exceptions
my $i = short_error_loc();
if ($i) {
ret_summary( $i, @_ );
}
else {
longmess_heavy(@_);
}
}
# If a string is too long, trims it with ...
sub str_len_trim {
my $str = shift;
my $max = shift || 0;
if ( 2 < $max and $max < length($str) ) {
substr( $str, $max - 3 ) = '...';
}
return $str;
}
# Takes two packages and an optional cache. Says whether the
# first inherits from the second.
#
# Recursive versions of this have to work to avoid certain
# possible endless loops, and when following long chains of
# inheritance are less efficient.
sub trusts {
my $child = shift;
my $parent = shift;
my $cache = shift;
my ( $known, $partial ) = get_status( $cache, $child );
# Figure out consequences until we have an answer
while ( @$partial and not exists $known->{$parent} ) {
my $anc = shift @$partial;
next if exists $known->{$anc};
$known->{$anc}++;
my ( $anc_knows, $anc_partial ) = get_status( $cache, $anc );
my @found = keys %$anc_knows;
@$known{@found} = ();
push @$partial, @$anc_partial;
}
return exists $known->{$parent};
}
# Takes a package and gives a list of those trusted directly
sub trusts_directly {
my $class = shift;
no strict 'refs';
my $stash = \%{"$class\::"};
for my $var (qw/ CARP_NOT ISA /) {
# Don't try using the variable until we know it exists,
# to avoid polluting the caller's namespace.
if ( $stash->{$var} && ref \$stash->{$var} eq 'GLOB'
&& *{$stash->{$var}}{ARRAY} && @{$stash->{$var}} ) {
return @{$stash->{$var}}
}
}
return;
}
if(!defined($warnings::VERSION) ||
do { no warnings "numeric"; $warnings::VERSION < 1.03 }) {
# Very old versions of warnings.pm import from Carp. This can go
# wrong due to the circular dependency. If Carp is invoked before
# warnings, then Carp starts by loading warnings, then warnings
# tries to import from Carp, and gets nothing because Carp is in
# the process of loading and hasn't defined its import method yet.
# So we work around that by manually exporting to warnings here.
no strict "refs";
*{"warnings::$_"} = \&$_ foreach @EXPORT;
}
1;
__END__
=head1 NAME
Carp - alternative warn and die for modules
=head1 SYNOPSIS
use Carp;
# warn user (from perspective of caller)
carp "string trimmed to 80 chars";
# die of errors (from perspective of caller)
croak "We're outta here!";
# die of errors with stack backtrace
confess "not implemented";
# cluck, longmess and shortmess not exported by default
use Carp qw(cluck longmess shortmess);
cluck "This is how we got here!"; # warn with stack backtrace
$long_message = longmess( "message from cluck() or confess()" );
$short_message = shortmess( "message from carp() or croak()" );
=head1 DESCRIPTION
The Carp routines are useful in your own modules because
they act like C<die()> or C<warn()>, but with a message which is more
likely to be useful to a user of your module. In the case of
C<cluck()> and C<confess()>, that context is a summary of every
call in the call-stack; C<longmess()> returns the contents of the error
message.
For a shorter message you can use C<carp()> or C<croak()> which report the
error as being from where your module was called. C<shortmess()> returns the
contents of this error message. There is no guarantee that that is where the
error was, but it is a good educated guess.
C<Carp> takes care not to clobber the status variables C<$!> and C<$^E>
in the course of assembling its error messages. This means that a
C<$SIG{__DIE__}> or C<$SIG{__WARN__}> handler can capture the error
information held in those variables, if it is required to augment the
error message, and if the code calling C<Carp> left useful values there.
Of course, C<Carp> can't guarantee the latter.
You can also alter the way the output and logic of C<Carp> works, by
changing some global variables in the C<Carp> namespace. See the
section on C<GLOBAL VARIABLES> below.
Here is a more complete description of how C<carp> and C<croak> work.
What they do is search the call-stack for a function call stack where
they have not been told that there shouldn't be an error. If every
call is marked safe, they give up and give a full stack backtrace
instead. In other words they presume that the first likely looking
potential suspect is guilty. Their rules for telling whether
a call shouldn't generate errors work as follows:
=over 4
=item 1.
Any call from a package to itself is safe.
=item 2.
Packages claim that there won't be errors on calls to or from
packages explicitly marked as safe by inclusion in C<@CARP_NOT>, or
(if that array is empty) C<@ISA>. The ability to override what
@ISA says is new in 5.8.
=item 3.
The trust in item 2 is transitive. If A trusts B, and B
trusts C, then A trusts C. So if you do not override C<@ISA>
with C<@CARP_NOT>, then this trust relationship is identical to,
"inherits from".
=item 4.
Any call from an internal Perl module is safe. (Nothing keeps
user modules from marking themselves as internal to Perl, but
this practice is discouraged.)
=item 5.
Any call to Perl's warning system (eg Carp itself) is safe.
(This rule is what keeps it from reporting the error at the
point where you call C<carp> or C<croak>.)
=item 6.
C<$Carp::CarpLevel> can be set to skip a fixed number of additional
call levels. Using this is not recommended because it is very
difficult to get it to behave correctly.
=back
=head2 Forcing a Stack Trace
As a debugging aid, you can force Carp to treat a croak as a confess
and a carp as a cluck across I<all> modules. In other words, force a
detailed stack trace to be given. This can be very helpful when trying
to understand why, or from where, a warning or error is being generated.
This feature is enabled by 'importing' the non-existent symbol
'verbose'. You would typically enable it by saying
perl -MCarp=verbose script.pl
or by including the string C<-MCarp=verbose> in the PERL5OPT
environment variable.
Alternately, you can set the global variable C<$Carp::Verbose> to true.
See the C<GLOBAL VARIABLES> section below.
=head2 Stack Trace formatting
At each stack level, the subroutine's name is displayed along with
its parameters. For simple scalars, this is sufficient. For complex
data types, such as objects and other references, this can simply
display C<'HASH(0x1ab36d8)'>.
Carp gives two ways to control this.
=over 4
=item 1.
For objects, a method, C<CARP_TRACE>, will be called, if it exists. If
this method doesn't exist, or it recurses into C<Carp>, or it otherwise
throws an exception, this is skipped, and Carp moves on to the next option,
otherwise checking stops and the string returned is used. It is recommended
that the object's type is part of the string to make debugging easier.
=item 2.
For any type of reference, C<$Carp::RefArgFormatter> is checked (see below).
This variable is expected to be a code reference, and the current parameter
is passed in. If this function doesn't exist (the variable is undef), or
it recurses into C<Carp>, or it otherwise throws an exception, this is
skipped, and Carp moves on to the next option, otherwise checking stops
and the string returned is used.
=item 3.
Otherwise, if neither C<CARP_TRACE> nor C<$Carp::RefArgFormatter> is
available, stringify the value ignoring any overloading.
=back
=head1 GLOBAL VARIABLES
=head2 $Carp::MaxEvalLen
This variable determines how many characters of a string-eval are to
be shown in the output. Use a value of C<0> to show all text.
Defaults to C<0>.
=head2 $Carp::MaxArgLen
This variable determines how many characters of each argument to a
function to print. Use a value of C<0> to show the full length of the
argument.
Defaults to C<64>.
=head2 $Carp::MaxArgNums
This variable determines how many arguments to each function to show.
Use a false value to show all arguments to a function call. To suppress all
arguments, use C<-1> or C<'0 but true'>.
Defaults to C<8>.
=head2 $Carp::Verbose
This variable makes C<carp()> and C<croak()> generate stack backtraces
just like C<cluck()> and C<confess()>. This is how C<use Carp 'verbose'>
is implemented internally.
Defaults to C<0>.
=head2 $Carp::RefArgFormatter
This variable sets a general argument formatter to display references.
Plain scalars and objects that implement C<CARP_TRACE> will not go through
this formatter. Calling C<Carp> from within this function is not supported.
local $Carp::RefArgFormatter = sub {
require Data::Dumper;
Data::Dumper::Dump($_[0]); # not necessarily safe
};
=head2 @CARP_NOT
This variable, I<in your package>, says which packages are I<not> to be
considered as the location of an error. The C<carp()> and C<cluck()>
functions will skip over callers when reporting where an error occurred.
NB: This variable must be in the package's symbol table, thus:
# These work
our @CARP_NOT; # file scope
use vars qw(@CARP_NOT); # package scope
@My::Package::CARP_NOT = ... ; # explicit package variable
# These don't work
sub xyz { ... @CARP_NOT = ... } # w/o declarations above
my @CARP_NOT; # even at top-level
Example of use:
package My::Carping::Package;
use Carp;
our @CARP_NOT;
sub bar { .... or _error('Wrong input') }
sub _error {
# temporary control of where'ness, __PACKAGE__ is implicit
local @CARP_NOT = qw(My::Friendly::Caller);
carp(@_)
}
This would make C<Carp> report the error as coming from a caller not
in C<My::Carping::Package>, nor from C<My::Friendly::Caller>.
Also read the L</DESCRIPTION> section above, about how C<Carp> decides
where the error is reported from.
Use C<@CARP_NOT>, instead of C<$Carp::CarpLevel>.
Overrides C<Carp>'s use of C<@ISA>.
=head2 %Carp::Internal
This says what packages are internal to Perl. C<Carp> will never
report an error as being from a line in a package that is internal to
Perl. For example:
$Carp::Internal{ (__PACKAGE__) }++;
# time passes...
sub foo { ... or confess("whatever") };
would give a full stack backtrace starting from the first caller
outside of __PACKAGE__. (Unless that package was also internal to
Perl.)
=head2 %Carp::CarpInternal
This says which packages are internal to Perl's warning system. For
generating a full stack backtrace this is the same as being internal
to Perl, the stack backtrace will not start inside packages that are
listed in C<%Carp::CarpInternal>. But it is slightly different for
the summary message generated by C<carp> or C<croak>. There errors
will not be reported on any lines that are calling packages in
C<%Carp::CarpInternal>.
For example C<Carp> itself is listed in C<%Carp::CarpInternal>.
Therefore the full stack backtrace from C<confess> will not start
inside of C<Carp>, and the short message from calling C<croak> is
not placed on the line where C<croak> was called.
=head2 $Carp::CarpLevel
This variable determines how many additional call frames are to be
skipped that would not otherwise be when reporting where an error
occurred on a call to one of C<Carp>'s functions. It is fairly easy
to count these call frames on calls that generate a full stack
backtrace. However it is much harder to do this accounting for calls
that generate a short message. Usually people skip too many call
frames. If they are lucky they skip enough that C<Carp> goes all of
the way through the call stack, realizes that something is wrong, and
then generates a full stack backtrace. If they are unlucky then the
error is reported from somewhere misleading very high in the call
stack.
Therefore it is best to avoid C<$Carp::CarpLevel>. Instead use
C<@CARP_NOT>, C<%Carp::Internal> and C<%Carp::CarpInternal>.
Defaults to C<0>.
=head1 BUGS
The Carp routines don't handle exception objects currently.
If called with a first argument that is a reference, they simply
call die() or warn(), as appropriate.
=head1 SEE ALSO
L<Carp::Always>,
L<Carp::Clan>
=head1 CONTRIBUTING
L<Carp> is maintained by the perl 5 porters as part of the core perl 5
version control repository. Please see the L<perlhack> perldoc for how to
submit patches and contribute to it.
=head1 AUTHOR
The Carp module first appeared in Larry Wall's perl 5.000 distribution.
Since then it has been modified by several of the perl 5 porters.
Andrew Main (Zefram) <zefram@fysh.org> divested Carp into an independent
distribution.
=head1 COPYRIGHT
Copyright (C) 1994-2013 Larry Wall
Copyright (C) 2011, 2012, 2013 Andrew Main (Zefram) <zefram@fysh.org>
=head1 LICENSE
This module is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
CARP
$fatpacked{"Carp/Heavy.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CARP_HEAVY';
package Carp::Heavy;
use Carp ();
our $VERSION = '1.50';
$VERSION =~ tr/_//d;
# Carp::Heavy was merged into Carp in version 1.12. Any mismatched versions
# after this point are not significant and can be ignored.
if(($Carp::VERSION || 0) < 1.12) {
my $cv = defined($Carp::VERSION) ? $Carp::VERSION : "undef";
die "Version mismatch between Carp $cv ($INC{q(Carp.pm)}) and Carp::Heavy $VERSION ($INC{q(Carp/Heavy.pm)}). Did you alter \@INC after Carp was loaded?\n";
}
1;
# Most of the machinery of Carp used to be here.
# It has been moved in Carp.pm now, but this placeholder remains for
# the benefit of modules that like to preload Carp::Heavy directly.
# This must load Carp, because some modules rely on the historical
# behaviour of Carp::Heavy loading Carp.
CARP_HEAVY
$fatpacked{"Crypt/RC4.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CRYPT_RC4';
#--------------------------------------------------------------------#
# Crypt::RC4
# Date Written: 07-Jun-2000 04:15:55 PM
# Last Modified: 13-Dec-2001 03:33:49 PM
# Author: Kurt Kincaid (sifukurt@yahoo.com)
# Copyright (c) 2001, Kurt Kincaid
# All Rights Reserved.
#
# This is free software and may be modified and/or
# redistributed under the same terms as Perl itself.
#--------------------------------------------------------------------#
package Crypt::RC4;
use strict;
use vars qw( $VERSION @ISA @EXPORT $MAX_CHUNK_SIZE );
$MAX_CHUNK_SIZE = 1024 unless $MAX_CHUNK_SIZE;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(RC4);
$VERSION = '2.02';
sub new {
my ( $class, $key ) = @_;
my $self = bless {}, $class;
$self->{state} = Setup( $key );
$self->{x} = 0;
$self->{y} = 0;
$self;
}
sub RC4 {
my $self;
my( @state, $x, $y );
if ( ref $_[0] ) {
$self = shift;
@state = @{ $self->{state} };
$x = $self->{x};
$y = $self->{y};
} else {
@state = Setup( shift );
$x = $y = 0;
}
my $message = shift;
my $num_pieces = do {
my $num = length($message) / $MAX_CHUNK_SIZE;
my $int = int $num;
$int == $num ? $int : $int+1;
};
for my $piece ( 0..$num_pieces - 1 ) {
my @message = unpack "C*", substr($message, $piece * $MAX_CHUNK_SIZE, $MAX_CHUNK_SIZE);
for ( @message ) {
$x = 0 if ++$x > 255;
$y -= 256 if ($y += $state[$x]) > 255;
@state[$x, $y] = @state[$y, $x];
$_ ^= $state[( $state[$x] + $state[$y] ) % 256];
}
substr($message, $piece * $MAX_CHUNK_SIZE, $MAX_CHUNK_SIZE) = pack "C*", @message;
}
if ($self) {
$self->{state} = \@state;
$self->{x} = $x;
$self->{y} = $y;
}
$message;
}
sub Setup {
my @k = unpack( 'C*', shift );
my @state = 0..255;
my $y = 0;
for my $x (0..255) {
$y = ( $k[$x % @k] + $state[$x] + $y ) % 256;
@state[$x, $y] = @state[$y, $x];
}
wantarray ? @state : \@state;
}
1;
__END__
=head1 NAME
Crypt::RC4 - Perl implementation of the RC4 encryption algorithm
=head1 SYNOPSIS
# Functional Style
use Crypt::RC4;
$encrypted = RC4( $passphrase, $plaintext );
$decrypt = RC4( $passphrase, $encrypted );
# OO Style
use Crypt::RC4;
$ref = Crypt::RC4->new( $passphrase );
$encrypted = $ref->RC4( $plaintext );
$ref2 = Crypt::RC4->new( $passphrase );
$decrypted = $ref2->RC4( $encrypted );
# process an entire file, one line at a time
# (Warning: Encrypted file leaks line lengths.)
$ref3 = Crypt::RC4->new( $passphrase );
while (<FILE>) {
chomp;
print $ref3->RC4($_), "\n";
}
=head1 DESCRIPTION
A simple implementation of the RC4 algorithm, developed by RSA Security, Inc. Here is the description
from RSA's website:
RC4 is a stream cipher designed by Rivest for RSA Data Security (now RSA Security). It is a variable
key-size stream cipher with byte-oriented operations. The algorithm is based on the use of a random
permutation. Analysis shows that the period of the cipher is overwhelmingly likely to be greater than
10100. Eight to sixteen machine operations are required per output byte, and the cipher can be
expected to run very quickly in software. Independent analysts have scrutinized the algorithm and it
is considered secure.
Based substantially on the "RC4 in 3 lines of perl" found at http://www.cypherspace.org
A major bug in v1.0 was fixed by David Hook (dgh@wumpus.com.au). Thanks, David.
=head1 AUTHOR
Kurt Kincaid (sifukurt@yahoo.com)
Ronald Rivest for RSA Security, Inc.
=head1 BUGS
Disclaimer: Strictly speaking, this module uses the "alleged" RC4
algorithm. The Algorithm known as "RC4" is a trademark of RSA Security
Inc., and this document makes no claims one way or another that this
is the correct algorithm, and further, make no claims about the
quality of the source code nor any licensing requirements for
commercial use.
There's nothing preventing you from using this module in an insecure
way which leaks information. For example, encrypting multilple
messages with the same passphrase may allow an attacker to decode all of
them with little effort, even though they'll appear to be secured. If
serious crypto is your goal, be careful. Be very careful.
It's a pure-Perl implementation, so that rating of "Eight
to sixteen machine operations" is good for nothing but a good laugh.
If encryption and decryption are a bottleneck for you, please re-write
this module to use native code wherever practical.
=head1 LICENSE
This is free software and may be modified and/or
redistributed under the same terms as Perl itself.
=head1 SEE ALSO
L<perl>, L<http://www.cypherspace.org>, L<http://www.rsasecurity.com>,
L<http://www.achtung.com/crypto/rc4.html>,
L<http://www.columbia.edu/~ariel/ssleay/rrc4.html>
=cut
CRYPT_RC4
$fatpacked{"Digest/Perl/MD5.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'DIGEST_PERL_MD5';
package Digest::Perl::MD5;
use strict;
use integer;
use Exporter;
use vars qw($VERSION @ISA @EXPORTER @EXPORT_OK);
@EXPORT_OK = qw(md5 md5_hex md5_base64);
@ISA = 'Exporter';
$VERSION = '1.9';
# I-Vektor
sub A() { 0x67_45_23_01 }
sub B() { 0xef_cd_ab_89 }
sub C() { 0x98_ba_dc_fe }
sub D() { 0x10_32_54_76 }
# for internal use
sub MAX() { 0xFFFFFFFF }
# pad a message to a multiple of 64
sub padding {
my $l = length (my $msg = shift() . chr(128));
$msg .= "\0" x (($l%64<=56?56:120)-$l%64);
$l = ($l-1)*8;
$msg .= pack 'VV', $l & MAX , ($l >> 16 >> 16);
}
sub rotate_left($$) {
#$_[0] << $_[1] | $_[0] >> (32 - $_[1]);
#my $right = $_[0] >> (32 - $_[1]);
#my $rmask = (1 << $_[1]) - 1;
($_[0] << $_[1]) | (( $_[0] >> (32 - $_[1]) ) & ((1 << $_[1]) - 1));
#$_[0] << $_[1] | (($_[0]>> (32 - $_[1])) & (1 << (32 - $_[1])) - 1);
}
sub gen_code {
# Discard upper 32 bits on 64 bit archs.
my $MSK = ((1 << 16) << 16) ? ' & ' . MAX : '';
# FF => "X0=rotate_left(((X1&X2)|(~X1&X3))+X0+X4+X6$MSK,X5)+X1$MSK;",
# GG => "X0=rotate_left(((X1&X3)|(X2&(~X3)))+X0+X4+X6$MSK,X5)+X1$MSK;",
my %f = (
FF => "X0=rotate_left((X3^(X1&(X2^X3)))+X0+X4+X6$MSK,X5)+X1$MSK;",
GG => "X0=rotate_left((X2^(X3&(X1^X2)))+X0+X4+X6$MSK,X5)+X1$MSK;",
HH => "X0=rotate_left((X1^X2^X3)+X0+X4+X6$MSK,X5)+X1$MSK;",
II => "X0=rotate_left((X2^(X1|(~X3)))+X0+X4+X6$MSK,X5)+X1$MSK;",
);
#unless ( (1 << 16) << 16) { %f = %{$CODES{'32bit'}} }
#else { %f = %{$CODES{'64bit'}} }
my %s = ( # shift lengths
S11 => 7, S12 => 12, S13 => 17, S14 => 22, S21 => 5, S22 => 9, S23 => 14,
S24 => 20, S31 => 4, S32 => 11, S33 => 16, S34 => 23, S41 => 6, S42 => 10,
S43 => 15, S44 => 21
);
my $insert = "\n";
while(defined( my $data = <DATA> )) {
chomp $data;
next unless $data =~ /^[FGHI]/;
my ($func,@x) = split /,/, $data;
my $c = $f{$func};
$c =~ s/X(\d)/$x[$1]/g;
$c =~ s/(S\d{2})/$s{$1}/;
$c =~ s/^(.*)=rotate_left\((.*),(.*)\)\+(.*)$//;
my $su = 32 - $3;
my $sh = (1 << $3) - 1;
$c = "$1=(((\$r=$2)<<$3)|((\$r>>$su)&$sh))+$4";
#my $rotate = "(($2 << $3) || (($2 >> (32 - $3)) & (1 << $2) - 1)))";
# $c = "\$r = $2;
# $1 = ((\$r << $3) | ((\$r >> (32 - $3)) & ((1 << $3) - 1))) + $4";
$insert .= "\t$c\n";
}
close DATA;
my $dump = '
sub round {
my ($a,$b,$c,$d) = @_[0 .. 3];
my $r;' . $insert . '
$_[0]+$a' . $MSK . ', $_[1]+$b ' . $MSK .
', $_[2]+$c' . $MSK . ', $_[3]+$d' . $MSK . ';
}';
eval $dump;
# print "$dump\n";
# exit 0;
}
gen_code();
#########################################
# Private output converter functions:
sub _encode_hex { unpack 'H*', $_[0] }
sub _encode_base64 {
my $res;
while ($_[0] =~ /(.{1,45})/gs) {
$res .= substr pack('u', $1), 1;
chop $res;
}
$res =~ tr|` -_|AA-Za-z0-9+/|;#`
chop $res; chop $res;
$res
}
#########################################
# OOP interface:
sub new {
my $proto = shift;
my $class = ref $proto || $proto;
my $self = {};
bless $self, $class;
$self->reset();
$self
}
sub reset {
my $self = shift;
delete $self->{_data};
$self->{_state} = [A,B,C,D];
$self->{_length} = 0;
$self
}
sub add {
my $self = shift;
$self->{_data} .= join '', @_ if @_;
my ($i,$c);
for $i (0 .. (length $self->{_data})/64-1) {
my @X = unpack 'V16', substr $self->{_data}, $i*64, 64;
@{$self->{_state}} = round(@{$self->{_state}},@X);
++$c;
}
if ($c) {
substr ($self->{_data}, 0, $c*64) = '';
$self->{_length} += $c*64;
}
$self
}
sub finalize {
my $self = shift;
$self->{_data} .= chr(128);
my $l = $self->{_length} + length $self->{_data};
$self->{_data} .= "\0" x (($l%64<=56?56:120)-$l%64);
$l = ($l-1)*8;
$self->{_data} .= pack 'VV', $l & MAX , ($l >> 16 >> 16);
$self->add();
$self
}
sub addfile {
my ($self,$fh) = @_;
if (!ref($fh) && ref(\$fh) ne "GLOB") {
require Symbol;
$fh = Symbol::qualify($fh, scalar caller);
}
# $self->{_data} .= do{local$/;<$fh>};
my $read = 0;
my $buffer = '';
$self->add($buffer) while $read = read $fh, $buffer, 8192;
die __PACKAGE__, " read failed: $!" unless defined $read;
$self
}
sub add_bits {
my $self = shift;
return $self->add( pack 'B*', shift ) if @_ == 1;
my ($b,$n) = @_;
die __PACKAGE__, " Invalid number of bits\n" if $n%8;
$self->add( substr $b, 0, $n/8 )
}
sub digest {
my $self = shift;
$self->finalize();
my $res = pack 'V4', @{$self->{_state}};
$self->reset();
$res
}
sub hexdigest {
_encode_hex($_[0]->digest)
}
sub b64digest {
_encode_base64($_[0]->digest)
}
sub clone {
my $self = shift;
my $clone = {
_state => [@{$self->{_state}}],
_length => $self->{_length},
_data => $self->{_data}
};
bless $clone, ref $self || $self;
}
#########################################
# Procedural interface:
sub md5 {
my $message = padding(join'',@_);
my ($a,$b,$c,$d) = (A,B,C,D);
my $i;
for $i (0 .. (length $message)/64-1) {
my @X = unpack 'V16', substr $message,$i*64,64;
($a,$b,$c,$d) = round($a,$b,$c,$d,@X);
}
pack 'V4',$a,$b,$c,$d;
}
sub md5_hex { _encode_hex &md5 }
sub md5_base64 { _encode_base64 &md5 }
1;
=head1 NAME
Digest::MD5::Perl - Perl implementation of Ron Rivests MD5 Algorithm
=head1 DISCLAIMER
This is B<not> an interface (like C<Digest::MD5>) but a Perl implementation of MD5.
It is written in perl only and because of this it is slow but it works without C-Code.
You should use C<Digest::MD5> instead of this module if it is available.
This module is only useful for
=over 4
=item
computers where you cannot install C<Digest::MD5> (e.g. lack of a C-Compiler)
=item
encrypting only small amounts of data (less than one million bytes). I use it to
hash passwords.
=item
educational purposes
=back
=head1 SYNOPSIS
# Functional style
use Digest::MD5 qw(md5 md5_hex md5_base64);
$hash = md5 $data;
$hash = md5_hex $data;
$hash = md5_base64 $data;
# OO style
use Digest::MD5;
$ctx = Digest::MD5->new;
$ctx->add($data);
$ctx->addfile(*FILE);
$digest = $ctx->digest;
$digest = $ctx->hexdigest;
$digest = $ctx->b64digest;
=head1 DESCRIPTION
This modules has the same interface as the much faster C<Digest::MD5>. So you can
easily exchange them, e.g.
BEGIN {
eval {
require Digest::MD5;
import Digest::MD5 'md5_hex'
};
if ($@) { # ups, no Digest::MD5
require Digest::Perl::MD5;
import Digest::Perl::MD5 'md5_hex'
}
}
If the C<Digest::MD5> module is available it is used and if not you take
C<Digest::Perl::MD5>.
You can also install the Perl part of Digest::MD5 together with Digest::Perl::MD5
and use Digest::MD5 as normal, it falls back to Digest::Perl::MD5 if it
cannot load its object files.
For a detailed Documentation see the C<Digest::MD5> module.
=head1 EXAMPLES
The simplest way to use this library is to import the md5_hex()
function (or one of its cousins):
use Digest::Perl::MD5 'md5_hex';
print 'Digest is ', md5_hex('foobarbaz'), "\n";
The above example would print out the message
Digest is 6df23dc03f9b54cc38a0fc1483df6e21
provided that the implementation is working correctly. The same
checksum can also be calculated in OO style:
use Digest::MD5;
$md5 = Digest::MD5->new;
$md5->add('foo', 'bar');
$md5->add('baz');
$digest = $md5->hexdigest;
print "Digest is $digest\n";
The digest methods are destructive. That means you can only call them
once and the $md5 objects is reset after use. You can make a copy with clone:
$md5->clone->hexdigest
=head1 LIMITATIONS
This implementation of the MD5 algorithm has some limitations:
=over 4
=item
It's slow, very slow. I've done my very best but Digest::MD5 is still about 100 times faster.
You can only encrypt Data up to one million bytes in an acceptable time. But it's very useful
for encrypting small amounts of data like passwords.
=item
You can only encrypt up to 2^32 bits = 512 MB on 32bit archs. But You should
use C<Digest::MD5> for those amounts of data anyway.
=back
=head1 SEE ALSO
L<Digest::MD5>
L<md5(1)>
RFC 1321
tools/md5: a small BSD compatible md5 tool written in pure perl.
=head1 COPYRIGHT
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
Copyright 2000 Christian Lackas, Imperia Software Solutions
Copyright 1998-1999 Gisle Aas.
Copyright 1995-1996 Neil Winton.
Copyright 1991-1992 RSA Data Security, Inc.
The MD5 algorithm is defined in RFC 1321. The basic C code
implementing the algorithm is derived from that in the RFC and is
covered by the following copyright:
=over 4
=item
Copyright (C) 1991-1992, RSA Data Security, Inc. Created 1991. All
rights reserved.
License to copy and use this software is granted provided that it
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
Algorithm" in all material mentioning or referencing this software
or this function.
License is also granted to make and use derivative works provided
that such works are identified as "derived from the RSA Data
Security, Inc. MD5 Message-Digest Algorithm" in all material
mentioning or referencing the derived work.
RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.
These notices must be retained in any copies of any part of this
documentation and/or software.
=back
This copyright does not prohibit distribution of any version of Perl
containing this extension under the terms of the GNU or Artistic
licenses.
=head1 AUTHORS
The original MD5 interface was written by Neil Winton
(<N.Winton (at) axion.bt.co.uk>).
C<Digest::MD5> was made by Gisle Aas <gisle (at) aas.no> (I took his Interface
and part of the documentation).
Thanks to Guido Flohr for his 'use integer'-hint.
This release was made by Christian Lackas <delta (at) lackas.net>.
=cut
__DATA__
FF,$a,$b,$c,$d,$_[4],7,0xd76aa478,/* 1 */
FF,$d,$a,$b,$c,$_[5],12,0xe8c7b756,/* 2 */
FF,$c,$d,$a,$b,$_[6],17,0x242070db,/* 3 */
FF,$b,$c,$d,$a,$_[7],22,0xc1bdceee,/* 4 */
FF,$a,$b,$c,$d,$_[8],7,0xf57c0faf,/* 5 */
FF,$d,$a,$b,$c,$_[9],12,0x4787c62a,/* 6 */
FF,$c,$d,$a,$b,$_[10],17,0xa8304613,/* 7 */
FF,$b,$c,$d,$a,$_[11],22,0xfd469501,/* 8 */
FF,$a,$b,$c,$d,$_[12],7,0x698098d8,/* 9 */
FF,$d,$a,$b,$c,$_[13],12,0x8b44f7af,/* 10 */
FF,$c,$d,$a,$b,$_[14],17,0xffff5bb1,/* 11 */
FF,$b,$c,$d,$a,$_[15],22,0x895cd7be,/* 12 */
FF,$a,$b,$c,$d,$_[16],7,0x6b901122,/* 13 */
FF,$d,$a,$b,$c,$_[17],12,0xfd987193,/* 14 */
FF,$c,$d,$a,$b,$_[18],17,0xa679438e,/* 15 */
FF,$b,$c,$d,$a,$_[19],22,0x49b40821,/* 16 */
GG,$a,$b,$c,$d,$_[5],5,0xf61e2562,/* 17 */
GG,$d,$a,$b,$c,$_[10],9,0xc040b340,/* 18 */
GG,$c,$d,$a,$b,$_[15],14,0x265e5a51,/* 19 */
GG,$b,$c,$d,$a,$_[4],20,0xe9b6c7aa,/* 20 */
GG,$a,$b,$c,$d,$_[9],5,0xd62f105d,/* 21 */
GG,$d,$a,$b,$c,$_[14],9,0x2441453,/* 22 */
GG,$c,$d,$a,$b,$_[19],14,0xd8a1e681,/* 23 */
GG,$b,$c,$d,$a,$_[8],20,0xe7d3fbc8,/* 24 */
GG,$a,$b,$c,$d,$_[13],5,0x21e1cde6,/* 25 */
GG,$d,$a,$b,$c,$_[18],9,0xc33707d6,/* 26 */
GG,$c,$d,$a,$b,$_[7],14,0xf4d50d87,/* 27 */
GG,$b,$c,$d,$a,$_[12],20,0x455a14ed,/* 28 */
GG,$a,$b,$c,$d,$_[17],5,0xa9e3e905,/* 29 */
GG,$d,$a,$b,$c,$_[6],9,0xfcefa3f8,/* 30 */
GG,$c,$d,$a,$b,$_[11],14,0x676f02d9,/* 31 */
GG,$b,$c,$d,$a,$_[16],20,0x8d2a4c8a,/* 32 */
HH,$a,$b,$c,$d,$_[9],4,0xfffa3942,/* 33 */
HH,$d,$a,$b,$c,$_[12],11,0x8771f681,/* 34 */
HH,$c,$d,$a,$b,$_[15],16,0x6d9d6122,/* 35 */
HH,$b,$c,$d,$a,$_[18],23,0xfde5380c,/* 36 */
HH,$a,$b,$c,$d,$_[5],4,0xa4beea44,/* 37 */
HH,$d,$a,$b,$c,$_[8],11,0x4bdecfa9,/* 38 */
HH,$c,$d,$a,$b,$_[11],16,0xf6bb4b60,/* 39 */
HH,$b,$c,$d,$a,$_[14],23,0xbebfbc70,/* 40 */
HH,$a,$b,$c,$d,$_[17],4,0x289b7ec6,/* 41 */
HH,$d,$a,$b,$c,$_[4],11,0xeaa127fa,/* 42 */
HH,$c,$d,$a,$b,$_[7],16,0xd4ef3085,/* 43 */
HH,$b,$c,$d,$a,$_[10],23,0x4881d05,/* 44 */
HH,$a,$b,$c,$d,$_[13],4,0xd9d4d039,/* 45 */
HH,$d,$a,$b,$c,$_[16],11,0xe6db99e5,/* 46 */
HH,$c,$d,$a,$b,$_[19],16,0x1fa27cf8,/* 47 */
HH,$b,$c,$d,$a,$_[6],23,0xc4ac5665,/* 48 */
II,$a,$b,$c,$d,$_[4],6,0xf4292244,/* 49 */
II,$d,$a,$b,$c,$_[11],10,0x432aff97,/* 50 */
II,$c,$d,$a,$b,$_[18],15,0xab9423a7,/* 51 */
II,$b,$c,$d,$a,$_[9],21,0xfc93a039,/* 52 */
II,$a,$b,$c,$d,$_[16],6,0x655b59c3,/* 53 */
II,$d,$a,$b,$c,$_[7],10,0x8f0ccc92,/* 54 */
II,$c,$d,$a,$b,$_[14],15,0xffeff47d,/* 55 */
II,$b,$c,$d,$a,$_[5],21,0x85845dd1,/* 56 */
II,$a,$b,$c,$d,$_[12],6,0x6fa87e4f,/* 57 */
II,$d,$a,$b,$c,$_[19],10,0xfe2ce6e0,/* 58 */
II,$c,$d,$a,$b,$_[10],15,0xa3014314,/* 59 */
II,$b,$c,$d,$a,$_[17],21,0x4e0811a1,/* 60 */
II,$a,$b,$c,$d,$_[8],6,0xf7537e82,/* 61 */
II,$d,$a,$b,$c,$_[15],10,0xbd3af235,/* 62 */
II,$c,$d,$a,$b,$_[6],15,0x2ad7d2bb,/* 63 */
II,$b,$c,$d,$a,$_[13],21,0xeb86d391,/* 64 */
DIGEST_PERL_MD5
$fatpacked{"Exporter.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'EXPORTER';
package Exporter;
require 5.006;
# Be lean.
#use strict;
#no strict 'refs';
our $Debug = 0;
our $ExportLevel = 0;
our $Verbose ||= 0;
our $VERSION = '5.74';
our (%Cache);
sub as_heavy {
require Exporter::Heavy;
# Unfortunately, this does not work if the caller is aliased as *name = \&foo
# Thus the need to create a lot of identical subroutines
my $c = (caller(1))[3];
$c =~ s/.*:://;
\&{"Exporter::Heavy::heavy_$c"};
}
sub export {
goto &{as_heavy()};
}
sub import {
my $pkg = shift;
my $callpkg = caller($ExportLevel);
if ($pkg eq "Exporter" and @_ and $_[0] eq "import") {
*{$callpkg."::import"} = \&import;
return;
}
# We *need* to treat @{"$pkg\::EXPORT_FAIL"} since Carp uses it :-(
my $exports = \@{"$pkg\::EXPORT"};
# But, avoid creating things if they don't exist, which saves a couple of
# hundred bytes per package processed.
my $fail = ${$pkg . '::'}{EXPORT_FAIL} && \@{"$pkg\::EXPORT_FAIL"};
return export $pkg, $callpkg, @_
if $Verbose or $Debug or $fail && @$fail > 1;
my $export_cache = ($Cache{$pkg} ||= {});
my $args = @_ or @_ = @$exports;
if ($args and not %$export_cache) {
s/^&//, $export_cache->{$_} = 1
foreach (@$exports, @{"$pkg\::EXPORT_OK"});
}
my $heavy;
# Try very hard not to use {} and hence have to enter scope on the foreach
# We bomb out of the loop with last as soon as heavy is set.
if ($args or $fail) {
($heavy = (/\W/ or $args and not exists $export_cache->{$_}
or $fail and @$fail and $_ eq $fail->[0])) and last
foreach (@_);
} else {
($heavy = /\W/) and last
foreach (@_);
}
return export $pkg, $callpkg, ($args ? @_ : ()) if $heavy;
local $SIG{__WARN__} =
sub {require Carp; &Carp::carp} if not $SIG{__WARN__};
# shortcut for the common case of no type character
*{"$callpkg\::$_"} = \&{"$pkg\::$_"} foreach @_;
}
# Default methods
sub export_fail {
my $self = shift;
@_;
}
# Unfortunately, caller(1)[3] "does not work" if the caller is aliased as
# *name = \&foo. Thus the need to create a lot of identical subroutines
# Otherwise we could have aliased them to export().
sub export_to_level {
goto &{as_heavy()};
}
sub export_tags {
goto &{as_heavy()};
}
sub export_ok_tags {
goto &{as_heavy()};
}
sub require_version {
goto &{as_heavy()};
}
1;
__END__
=head1 NAME
Exporter - Implements default import method for modules
=head1 SYNOPSIS
In module F<YourModule.pm>:
package YourModule;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(munge frobnicate); # symbols to export on request
or
package YourModule;
use Exporter 'import'; # gives you Exporter's import() method directly
our @EXPORT_OK = qw(munge frobnicate); # symbols to export on request
In other files which wish to use C<YourModule>:
use YourModule qw(frobnicate); # import listed symbols
frobnicate ($left, $right) # calls YourModule::frobnicate
Take a look at L</Good Practices> for some variants
you will like to use in modern Perl code.
=head1 DESCRIPTION
The Exporter module implements an C<import> method which allows a module
to export functions and variables to its users' namespaces. Many modules
use Exporter rather than implementing their own C<import> method because
Exporter provides a highly flexible interface, with an implementation optimised
for the common case.
Perl automatically calls the C<import> method when processing a
C<use> statement for a module. Modules and C<use> are documented
in L<perlfunc> and L<perlmod>. Understanding the concept of
modules and how the C<use> statement operates is important to
understanding the Exporter.
=head2 How to Export
The arrays C<@EXPORT> and C<@EXPORT_OK> in a module hold lists of
symbols that are going to be exported into the users name space by
default, or which they can request to be exported, respectively. The
symbols can represent functions, scalars, arrays, hashes, or typeglobs.
The symbols must be given by full name with the exception that the
ampersand in front of a function is optional, e.g.
our @EXPORT = qw(afunc $scalar @array); # afunc is a function
our @EXPORT_OK = qw(&bfunc %hash *typeglob); # explicit prefix on &bfunc
If you are only exporting function names it is recommended to omit the
ampersand, as the implementation is faster this way.
=head2 Selecting What to Export
Do B<not> export method names!
Do B<not> export anything else by default without a good reason!
Exports pollute the namespace of the module user. If you must export
try to use C<@EXPORT_OK> in preference to C<@EXPORT> and avoid short or
common symbol names to reduce the risk of name clashes.
Generally anything not exported is still accessible from outside the
module using the C<YourModule::item_name> (or C<< $blessed_ref->method >>)
syntax. By convention you can use a leading underscore on names to
informally indicate that they are 'internal' and not for public use.
(It is actually possible to get private functions by saying:
my $subref = sub { ... };
$subref->(@args); # Call it as a function
$obj->$subref(@args); # Use it as a method
However if you use them for methods it is up to you to figure out
how to make inheritance work.)
As a general rule, if the module is trying to be object oriented
then export nothing. If it's just a collection of functions then
C<@EXPORT_OK> anything but use C<@EXPORT> with caution. For function and
method names use barewords in preference to names prefixed with
ampersands for the export lists.
Other module design guidelines can be found in L<perlmod>.
=head2 How to Import
In other files which wish to use your module there are three basic ways for
them to load your module and import its symbols:
=over 4
=item C<use YourModule;>
This imports all the symbols from YourModule's C<@EXPORT> into the namespace
of the C<use> statement.
=item C<use YourModule ();>
This causes perl to load your module but does not import any symbols.
=item C<use YourModule qw(...);>
This imports only the symbols listed by the caller into their namespace.
All listed symbols must be in your C<@EXPORT> or C<@EXPORT_OK>, else an error
occurs. The advanced export features of Exporter are accessed like this,
but with list entries that are syntactically distinct from symbol names.
=back
Unless you want to use its advanced features, this is probably all you
need to know to use Exporter.
=head1 Advanced Features
=head2 Specialised Import Lists
If any of the entries in an import list begins with !, : or / then
the list is treated as a series of specifications which either add to
or delete from the list of names to import. They are processed left to
right. Specifications are in the form:
[!]name This name only
[!]:DEFAULT All names in @EXPORT
[!]:tag All names in $EXPORT_TAGS{tag} anonymous array
[!]/pattern/ All names in @EXPORT and @EXPORT_OK which match
A leading ! indicates that matching names should be deleted from the
list of names to import. If the first specification is a deletion it
is treated as though preceded by :DEFAULT. If you just want to import
extra names in addition to the default set you will still need to
include :DEFAULT explicitly.
e.g., F<Module.pm> defines:
our @EXPORT = qw(A1 A2 A3 A4 A5);
our @EXPORT_OK = qw(B1 B2 B3 B4 B5);
our %EXPORT_TAGS = (T1 => [qw(A1 A2 B1 B2)], T2 => [qw(A1 A2 B3 B4)]);
Note that you cannot use tags in @EXPORT or @EXPORT_OK.
Names in EXPORT_TAGS must also appear in @EXPORT or @EXPORT_OK.
An application using Module can say something like:
use Module qw(:DEFAULT :T2 !B3 A3);
Other examples include:
use Socket qw(!/^[AP]F_/ !SOMAXCONN !SOL_SOCKET);
use POSIX qw(:errno_h :termios_h !TCSADRAIN !/^EXIT/);
Remember that most patterns (using //) will need to be anchored
with a leading ^, e.g., C</^EXIT/> rather than C</EXIT/>.
You can say C<BEGIN { $Exporter::Verbose=1 }> to see how the
specifications are being processed and what is actually being imported
into modules.
=head2 Exporting Without Using Exporter's import Method
Exporter has a special method, 'export_to_level' which is used in situations
where you can't directly call Exporter's
import method. The export_to_level
method looks like:
MyPackage->export_to_level(
$where_to_export, $package, @what_to_export
);
where C<$where_to_export> is an integer telling how far up the calling stack
to export your symbols, and C<@what_to_export> is an array telling what
symbols *to* export (usually this is C<@_>). The C<$package> argument is
currently unused.
For example, suppose that you have a module, A, which already has an
import function:
package A;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw($b);
sub import
{
$A::b = 1; # not a very useful import method
}
and you want to Export symbol C<$A::b> back to the module that called
package A. Since Exporter relies on the import method to work, via
inheritance, as it stands Exporter::import() will never get called.
Instead, say the following:
package A;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw($b);
sub import
{
$A::b = 1;
A->export_to_level(1, @_);
}
This will export the symbols one level 'above' the current package - ie: to
the program or module that used package A.
Note: Be careful not to modify C<@_> at all before you call export_to_level
- or people using your package will get very unexplained results!
=head2 Exporting Without Inheriting from Exporter
By including Exporter in your C<@ISA> you inherit an Exporter's import() method
but you also inherit several other helper methods which you probably don't
want. To avoid this you can do:
package YourModule;
use Exporter qw(import);
which will export Exporter's own import() method into YourModule.
Everything will work as before but you won't need to include Exporter in
C<@YourModule::ISA>.
Note: This feature was introduced in version 5.57
of Exporter, released with perl 5.8.3.
=head2 Module Version Checking
The Exporter module will convert an attempt to import a number from a
module into a call to C<< $module_name->VERSION($value) >>. This can
be used to validate that the version of the module being used is
greater than or equal to the required version.
For historical reasons, Exporter supplies a C<require_version> method that
simply delegates to C<VERSION>. Originally, before C<UNIVERSAL::VERSION>
existed, Exporter would call C<require_version>.
Since the C<UNIVERSAL::VERSION> method treats the C<$VERSION> number as
a simple numeric value it will regard version 1.10 as lower than
1.9. For this reason it is strongly recommended that you use numbers
with at least two decimal places, e.g., 1.09.
=head2 Managing Unknown Symbols
In some situations you may want to prevent certain symbols from being
exported. Typically this applies to extensions which have functions
or constants that may not exist on some systems.
The names of any symbols that cannot be exported should be listed
in the C<@EXPORT_FAIL> array.
If a module attempts to import any of these symbols the Exporter
will give the module an opportunity to handle the situation before
generating an error. The Exporter will call an export_fail method
with a list of the failed symbols:
@failed_symbols = $module_name->export_fail(@failed_symbols);
If the C<export_fail> method returns an empty list then no error is
recorded and all the requested symbols are exported. If the returned
list is not empty then an error is generated for each symbol and the
export fails. The Exporter provides a default C<export_fail> method which
simply returns the list unchanged.
Uses for the C<export_fail> method include giving better error messages
for some symbols and performing lazy architectural checks (put more
symbols into C<@EXPORT_FAIL> by default and then take them out if someone
actually tries to use them and an expensive check shows that they are
usable on that platform).
=head2 Tag Handling Utility Functions
Since the symbols listed within C<%EXPORT_TAGS> must also appear in either
C<@EXPORT> or C<@EXPORT_OK>, two utility functions are provided which allow
you to easily add tagged sets of symbols to C<@EXPORT> or C<@EXPORT_OK>:
our %EXPORT_TAGS = (foo => [qw(aa bb cc)], bar => [qw(aa cc dd)]);
Exporter::export_tags('foo'); # add aa, bb and cc to @EXPORT
Exporter::export_ok_tags('bar'); # add aa, cc and dd to @EXPORT_OK
Any names which are not tags are added to C<@EXPORT> or C<@EXPORT_OK>
unchanged but will trigger a warning (with C<-w>) to avoid misspelt tags
names being silently added to C<@EXPORT> or C<@EXPORT_OK>. Future versions
may make this a fatal error.
=head2 Generating Combined Tags
If several symbol categories exist in C<%EXPORT_TAGS>, it's usually
useful to create the utility ":all" to simplify "use" statements.
The simplest way to do this is:
our %EXPORT_TAGS = (foo => [qw(aa bb cc)], bar => [qw(aa cc dd)]);
# add all the other ":class" tags to the ":all" class,
# deleting duplicates
{
my %seen;
push @{$EXPORT_TAGS{all}},
grep {!$seen{$_}++} @{$EXPORT_TAGS{$_}} foreach keys %EXPORT_TAGS;
}
F<CGI.pm> creates an ":all" tag which contains some (but not really
all) of its categories. That could be done with one small
change:
# add some of the other ":class" tags to the ":all" class,
# deleting duplicates
{
my %seen;
push @{$EXPORT_TAGS{all}},
grep {!$seen{$_}++} @{$EXPORT_TAGS{$_}}
foreach qw/html2 html3 netscape form cgi internal/;
}
Note that the tag names in C<%EXPORT_TAGS> don't have the leading ':'.
=head2 C<AUTOLOAD>ed Constants
Many modules make use of C<AUTOLOAD>ing for constant subroutines to
avoid having to compile and waste memory on rarely used values (see
L<perlsub> for details on constant subroutines). Calls to such
constant subroutines are not optimized away at compile time because
they can't be checked at compile time for constancy.
Even if a prototype is available at compile time, the body of the
subroutine is not (it hasn't been C<AUTOLOAD>ed yet). perl needs to
examine both the C<()> prototype and the body of a subroutine at
compile time to detect that it can safely replace calls to that
subroutine with the constant value.
A workaround for this is to call the constants once in a C<BEGIN> block:
package My ;
use Socket ;
foo( SO_LINGER ); ## SO_LINGER NOT optimized away; called at runtime
BEGIN { SO_LINGER }
foo( SO_LINGER ); ## SO_LINGER optimized away at compile time.
This forces the C<AUTOLOAD> for C<SO_LINGER> to take place before
SO_LINGER is encountered later in C<My> package.
If you are writing a package that C<AUTOLOAD>s, consider forcing
an C<AUTOLOAD> for any constants explicitly imported by other packages
or which are usually used when your package is C<use>d.
=head1 Good Practices
=head2 Declaring C<@EXPORT_OK> and Friends
When using C<Exporter> with the standard C<strict> and C<warnings>
pragmas, the C<our> keyword is needed to declare the package
variables C<@EXPORT_OK>, C<@EXPORT>, C<@ISA>, etc.
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(munge frobnicate);
If backward compatibility for Perls B<under> 5.6 is important,
one must write instead a C<use vars> statement.
use vars qw(@ISA @EXPORT_OK);
@ISA = qw(Exporter);
@EXPORT_OK = qw(munge frobnicate);
=head2 Playing Safe
There are some caveats with the use of runtime statements
like C<require Exporter> and the assignment to package
variables, which can be very subtle for the unaware programmer.
This may happen for instance with mutually recursive
modules, which are affected by the time the relevant
constructions are executed.
The ideal (but a bit ugly) way to never have to think
about that is to use C<BEGIN> blocks. So the first part
of the L</SYNOPSIS> code could be rewritten as:
package YourModule;
use strict;
use warnings;
our (@ISA, @EXPORT_OK);
BEGIN {
require Exporter;
@ISA = qw(Exporter);
@EXPORT_OK = qw(munge frobnicate); # symbols to export on request
}
The C<BEGIN> will assure that the loading of F<Exporter.pm>
and the assignments to C<@ISA> and C<@EXPORT_OK> happen
immediately, leaving no room for something to get awry
or just plain wrong.
With respect to loading C<Exporter> and inheriting, there
are alternatives with the use of modules like C<base> and C<parent>.
use base qw(Exporter);
# or
use parent qw(Exporter);
Any of these statements are nice replacements for
C<BEGIN { require Exporter; @ISA = qw(Exporter); }>
with the same compile-time effect. The basic difference
is that C<base> code interacts with declared C<fields>
while C<parent> is a streamlined version of the older
C<base> code to just establish the IS-A relationship.
For more details, see the documentation and code of
L<base> and L<parent>.
Another thorough remedy to that runtime
vs. compile-time trap is to use L<Exporter::Easy>,
which is a wrapper of Exporter that allows all
boilerplate code at a single gulp in the
use statement.
use Exporter::Easy (
OK => [ qw(munge frobnicate) ],
);
# @ISA setup is automatic
# all assignments happen at compile time
=head2 What Not to Export
You have been warned already in L</Selecting What to Export>
to not export:
=over 4
=item *
method names (because you don't need to
and that's likely to not do what you want),
=item *
anything by default (because you don't want to surprise your users...
badly)
=item *
anything you don't need to (because less is more)
=back
There's one more item to add to this list. Do B<not>
export variable names. Just because C<Exporter> lets you
do that, it does not mean you should.
@EXPORT_OK = qw($svar @avar %hvar); # DON'T!
Exporting variables is not a good idea. They can
change under the hood, provoking horrible
effects at-a-distance that are too hard to track
and to fix. Trust me: they are not worth it.
To provide the capability to set/get class-wide
settings, it is best instead to provide accessors
as subroutines or class methods instead.
=head1 SEE ALSO
C<Exporter> is definitely not the only module with
symbol exporter capabilities. At CPAN, you may find
a bunch of them. Some are lighter. Some
provide improved APIs and features. Pick the one
that fits your needs. The following is
a sample list of such modules.
Exporter::Easy
Exporter::Lite
Exporter::Renaming
Exporter::Tidy
Sub::Exporter / Sub::Installer
Perl6::Export / Perl6::Export::Attrs
=head1 LICENSE
This library is free software. You can redistribute it
and/or modify it under the same terms as Perl itself.
=cut
EXPORTER
$fatpacked{"Exporter/Heavy.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'EXPORTER_HEAVY';
package Exporter::Heavy;
use strict;
no strict 'refs';
# On one line so MakeMaker will see it.
require Exporter; our $VERSION = $Exporter::VERSION;
=head1 NAME
Exporter::Heavy - Exporter guts
=head1 SYNOPSIS
(internal use only)
=head1 DESCRIPTION
No user-serviceable parts inside.
=cut
#
# We go to a lot of trouble not to 'require Carp' at file scope,
# because Carp requires Exporter, and something has to give.
#
sub _rebuild_cache {
my ($pkg, $exports, $cache) = @_;
s/^&// foreach @$exports;
@{$cache}{@$exports} = (1) x @$exports;
my $ok = \@{"${pkg}::EXPORT_OK"};
if (@$ok) {
s/^&// foreach @$ok;
@{$cache}{@$ok} = (1) x @$ok;
}
}
sub heavy_export {
# Save the old __WARN__ handler in case it was defined
my $oldwarn = $SIG{__WARN__};
# First make import warnings look like they're coming from the "use".
local $SIG{__WARN__} = sub {
# restore it back so proper stacking occurs
local $SIG{__WARN__} = $oldwarn;
my $text = shift;
if ($text =~ s/ at \S*Exporter\S*.pm line \d+.*\n//) {
require Carp;
local $Carp::CarpLevel = 1; # ignore package calling us too.
Carp::carp($text);
}
else {
warn $text;
}
};
local $SIG{__DIE__} = sub {
require Carp;
local $Carp::CarpLevel = 1; # ignore package calling us too.
Carp::croak("$_[0]Illegal null symbol in \@${1}::EXPORT")
if $_[0] =~ /^Unable to create sub named "(.*?)::"/;
};
my($pkg, $callpkg, @imports) = @_;
my($type, $sym, $cache_is_current, $oops);
my($exports, $export_cache) = (\@{"${pkg}::EXPORT"},
$Exporter::Cache{$pkg} ||= {});
if (@imports) {
if (!%$export_cache) {
_rebuild_cache ($pkg, $exports, $export_cache);
$cache_is_current = 1;
}
if (grep m{^[/!:]}, @imports) {
my $tagsref = \%{"${pkg}::EXPORT_TAGS"};
my $tagdata;
my %imports;
my($remove, $spec, @names, @allexports);
# negated first item implies starting with default set:
unshift @imports, ':DEFAULT' if $imports[0] =~ m/^!/;
foreach $spec (@imports){
$remove = $spec =~ s/^!//;
if ($spec =~ s/^://){
if ($spec eq 'DEFAULT'){
@names = @$exports;
}
elsif ($tagdata = $tagsref->{$spec}) {
@names = @$tagdata;
}
else {
warn qq["$spec" is not defined in %${pkg}::EXPORT_TAGS];
++$oops;
next;
}
}
elsif ($spec =~ m:^/(.*)/$:){
my $patn = $1;
@allexports = keys %$export_cache unless @allexports; # only do keys once
@names = grep(/$patn/, @allexports); # not anchored by default
}
else {
@names = ($spec); # is a normal symbol name
}
warn "Import ".($remove ? "del":"add").": @names "
if $Exporter::Verbose;
if ($remove) {
foreach $sym (@names) { delete $imports{$sym} }
}
else {
@imports{@names} = (1) x @names;
}
}
@imports = keys %imports;
}
my @carp;
foreach $sym (@imports) {
if (!$export_cache->{$sym}) {
if ($sym =~ m/^\d/) {
$pkg->VERSION($sym); # inherit from UNIVERSAL
# If the version number was the only thing specified
# then we should act as if nothing was specified:
if (@imports == 1) {
@imports = @$exports;
last;
}
# We need a way to emulate 'use Foo ()' but still
# allow an easy version check: "use Foo 1.23, ''";
if (@imports == 2 and !$imports[1]) {
@imports = ();
last;
}
} elsif ($sym !~ s/^&// || !$export_cache->{$sym}) {
# Last chance - see if they've updated EXPORT_OK since we
# cached it.
unless ($cache_is_current) {
%$export_cache = ();
_rebuild_cache ($pkg, $exports, $export_cache);
$cache_is_current = 1;
}
if (!$export_cache->{$sym}) {
# accumulate the non-exports
push @carp,
qq["$sym" is not exported by the $pkg module];
$oops++;
}
}
}
}
if ($oops) {
require Carp;
Carp::croak(join("\n", @carp, "Can't continue after import errors"));
}
}
else {
@imports = @$exports;
}
my($fail, $fail_cache) = (\@{"${pkg}::EXPORT_FAIL"},
$Exporter::FailCache{$pkg} ||= {});
if (@$fail) {
if (!%$fail_cache) {
# Build cache of symbols. Optimise the lookup by adding
# barewords twice... both with and without a leading &.
# (Technique could be applied to $export_cache at cost of memory)
my @expanded = map { /^\w/ ? ($_, '&'.$_) : $_ } @$fail;
warn "${pkg}::EXPORT_FAIL cached: @expanded" if $Exporter::Verbose;
@{$fail_cache}{@expanded} = (1) x @expanded;
}
my @failed;
foreach $sym (@imports) { push(@failed, $sym) if $fail_cache->{$sym} }
if (@failed) {
@failed = $pkg->export_fail(@failed);
foreach $sym (@failed) {
require Carp;
Carp::carp(qq["$sym" is not implemented by the $pkg module ],
"on this architecture");
}
if (@failed) {
require Carp;
Carp::croak("Can't continue after import errors");
}
}
}
warn "Importing into $callpkg from $pkg: ",
join(", ",sort @imports) if $Exporter::Verbose;
foreach $sym (@imports) {
# shortcut for the common case of no type character
(*{"${callpkg}::$sym"} = \&{"${pkg}::$sym"}, next)
unless $sym =~ s/^(\W)//;
$type = $1;
no warnings 'once';
*{"${callpkg}::$sym"} =
$type eq '&' ? \&{"${pkg}::$sym"} :
$type eq '$' ? \${"${pkg}::$sym"} :
$type eq '@' ? \@{"${pkg}::$sym"} :
$type eq '%' ? \%{"${pkg}::$sym"} :
$type eq '*' ? *{"${pkg}::$sym"} :
do { require Carp; Carp::croak("Can't export symbol: $type$sym") };
}
}
sub heavy_export_to_level
{
my $pkg = shift;
my $level = shift;
(undef) = shift; # XXX redundant arg
my $callpkg = caller($level);
$pkg->export($callpkg, @_);
}
# Utility functions
sub _push_tags {
my($pkg, $var, $syms) = @_;
my @nontag = ();
my $export_tags = \%{"${pkg}::EXPORT_TAGS"};
push(@{"${pkg}::$var"},
map { $export_tags->{$_} ? @{$export_tags->{$_}}
: scalar(push(@nontag,$_),$_) }
(@$syms) ? @$syms : keys %$export_tags);
if (@nontag and $^W) {
# This may change to a die one day
require Carp;
Carp::carp(join(", ", @nontag)." are not tags of $pkg");
}
}
sub heavy_require_version {
my($self, $wanted) = @_;
my $pkg = ref $self || $self;
return ${pkg}->VERSION($wanted);
}
sub heavy_export_tags {
_push_tags((caller)[0], "EXPORT", \@_);
}
sub heavy_export_ok_tags {
_push_tags((caller)[0], "EXPORT_OK", \@_);
}
1;
EXPORTER_HEAVY
$fatpacked{"File/Path.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'FILE_PATH';
package File::Path;
use 5.005_04;
use strict;
use Cwd 'getcwd';
use File::Basename ();
use File::Spec ();
BEGIN {
if ( $] < 5.006 ) {
# can't say 'opendir my $dh, $dirname'
# need to initialise $dh
eval 'use Symbol';
}
}
use Exporter ();
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
$VERSION = '2.18';
$VERSION = eval $VERSION;
@ISA = qw(Exporter);
@EXPORT = qw(mkpath rmtree);
@EXPORT_OK = qw(make_path remove_tree);
BEGIN {
for (qw(VMS MacOS MSWin32 os2)) {
no strict 'refs';
*{"_IS_\U$_"} = $^O eq $_ ? sub () { 1 } : sub () { 0 };
}
# These OSes complain if you want to remove a file that you have no
# write permission to:
*_FORCE_WRITABLE = (
grep { $^O eq $_ } qw(amigaos dos epoc MSWin32 MacOS os2)
) ? sub () { 1 } : sub () { 0 };
# Unix-like systems need to stat each directory in order to detect
# race condition. MS-Windows is immune to this particular attack.
*_NEED_STAT_CHECK = !(_IS_MSWIN32()) ? sub () { 1 } : sub () { 0 };
}
sub _carp {
require Carp;
goto &Carp::carp;
}
sub _croak {
require Carp;
goto &Carp::croak;
}
sub _error {
my $arg = shift;
my $message = shift;
my $object = shift;
if ( $arg->{error} ) {
$object = '' unless defined $object;
$message .= ": $!" if $!;
push @{ ${ $arg->{error} } }, { $object => $message };
}
else {
_carp( defined($object) ? "$message for $object: $!" : "$message: $!" );
}
}
sub __is_arg {
my ($arg) = @_;
# If client code blessed an array ref to HASH, this will not work
# properly. We could have done $arg->isa() wrapped in eval, but
# that would be expensive. This implementation should suffice.
# We could have also used Scalar::Util:blessed, but we choose not
# to add this dependency
return ( ref $arg eq 'HASH' );
}
sub make_path {
push @_, {} unless @_ and __is_arg( $_[-1] );
goto &mkpath;
}
sub mkpath {
my $old_style = !( @_ and __is_arg( $_[-1] ) );
my $data;
my $paths;
if ($old_style) {
my ( $verbose, $mode );
( $paths, $verbose, $mode ) = @_;
$paths = [$paths] unless UNIVERSAL::isa( $paths, 'ARRAY' );
$data->{verbose} = $verbose;
$data->{mode} = defined $mode ? $mode : oct '777';
}
else {
my %args_permitted = map { $_ => 1 } ( qw|
chmod
error
group
mask
mode
owner
uid
user
verbose
| );
my %not_on_win32_args = map { $_ => 1 } ( qw|
group
owner
uid
user
| );
my @bad_args = ();
my @win32_implausible_args = ();
my $arg = pop @_;
for my $k (sort keys %{$arg}) {
if (! $args_permitted{$k}) {
push @bad_args, $k;
}
elsif ($not_on_win32_args{$k} and _IS_MSWIN32) {
push @win32_implausible_args, $k;
}
else {
$data->{$k} = $arg->{$k};
}
}
_carp("Unrecognized option(s) passed to mkpath() or make_path(): @bad_args")
if @bad_args;
_carp("Option(s) implausible on Win32 passed to mkpath() or make_path(): @win32_implausible_args")
if @win32_implausible_args;
$data->{mode} = delete $data->{mask} if exists $data->{mask};
$data->{mode} = oct '777' unless exists $data->{mode};
${ $data->{error} } = [] if exists $data->{error};
unless (@win32_implausible_args) {
$data->{owner} = delete $data->{user} if exists $data->{user};
$data->{owner} = delete $data->{uid} if exists $data->{uid};
if ( exists $data->{owner} and $data->{owner} =~ /\D/ ) {
my $uid = ( getpwnam $data->{owner} )[2];
if ( defined $uid ) {
$data->{owner} = $uid;
}
else {
_error( $data,
"unable to map $data->{owner} to a uid, ownership not changed"
);
delete $data->{owner};
}
}
if ( exists $data->{group} and $data->{group} =~ /\D/ ) {
my $gid = ( getgrnam $data->{group} )[2];
if ( defined $gid ) {
$data->{group} = $gid;
}
else {
_error( $data,
"unable to map $data->{group} to a gid, group ownership not changed"
);
delete $data->{group};
}
}
if ( exists $data->{owner} and not exists $data->{group} ) {
$data->{group} = -1; # chown will leave group unchanged
}
if ( exists $data->{group} and not exists $data->{owner} ) {
$data->{owner} = -1; # chown will leave owner unchanged
}
}
$paths = [@_];
}
return _mkpath( $data, $paths );
}
sub _mkpath {
my $data = shift;
my $paths = shift;
my ( @created );
foreach my $path ( @{$paths} ) {
next unless defined($path) and length($path);
$path .= '/' if _IS_OS2 and $path =~ /^\w:\z/s; # feature of CRT
# Logic wants Unix paths, so go with the flow.
if (_IS_VMS) {
next if $path eq '/';
$path = VMS::Filespec::unixify($path);
}
next if -d $path;
my $parent = File::Basename::dirname($path);
# Coverage note: It's not clear how we would test the condition:
# '-d $parent or $path eq $parent'
unless ( -d $parent or $path eq $parent ) {
push( @created, _mkpath( $data, [$parent] ) );
}
print "mkdir $path\n" if $data->{verbose};
if ( mkdir( $path, $data->{mode} ) ) {
push( @created, $path );
if ( exists $data->{owner} ) {
# NB: $data->{group} guaranteed to be set during initialisation
if ( !chown $data->{owner}, $data->{group}, $path ) {
_error( $data,
"Cannot change ownership of $path to $data->{owner}:$data->{group}"
);
}
}
if ( exists $data->{chmod} ) {
# Coverage note: It's not clear how we would trigger the next
# 'if' block. Failure of 'chmod' might first result in a
# system error: "Permission denied".
if ( !chmod $data->{chmod}, $path ) {
_error( $data,
"Cannot change permissions of $path to $data->{chmod}" );
}
}
}
else {
my $save_bang = $!;
# From 'perldoc perlvar': $EXTENDED_OS_ERROR ($^E) is documented
# as:
# Error information specific to the current operating system. At the
# moment, this differs from "$!" under only VMS, OS/2, and Win32
# (and for MacPerl). On all other platforms, $^E is always just the
# same as $!.
my ( $e, $e1 ) = ( $save_bang, $^E );
$e .= "; $e1" if $e ne $e1;
# allow for another process to have created it meanwhile
if ( ! -d $path ) {
$! = $save_bang;
if ( $data->{error} ) {
push @{ ${ $data->{error} } }, { $path => $e };
}
else {
_croak("mkdir $path: $e");
}
}
}
}
return @created;
}
sub remove_tree {
push @_, {} unless @_ and __is_arg( $_[-1] );
goto &rmtree;
}
sub _is_subdir {
my ( $dir, $test ) = @_;
my ( $dv, $dd ) = File::Spec->splitpath( $dir, 1 );
my ( $tv, $td ) = File::Spec->splitpath( $test, 1 );
# not on same volume
return 0 if $dv ne $tv;
my @d = File::Spec->splitdir($dd);
my @t = File::Spec->splitdir($td);
# @t can't be a subdir if it's shorter than @d
return 0 if @t < @d;
return join( '/', @d ) eq join( '/', splice @t, 0, +@d );
}
sub rmtree {
my $old_style = !( @_ and __is_arg( $_[-1] ) );
my ($arg, $data, $paths);
if ($old_style) {
my ( $verbose, $safe );
( $paths, $verbose, $safe ) = @_;
$data->{verbose} = $verbose;
$data->{safe} = defined $safe ? $safe : 0;
if ( defined($paths) and length($paths) ) {
$paths = [$paths] unless UNIVERSAL::isa( $paths, 'ARRAY' );
}
else {
_carp("No root path(s) specified\n");
return 0;
}
}
else {
my %args_permitted = map { $_ => 1 } ( qw|
error
keep_root
result
safe
verbose
| );
my @bad_args = ();
my $arg = pop @_;
for my $k (sort keys %{$arg}) {
if (! $args_permitted{$k}) {
push @bad_args, $k;
}
else {
$data->{$k} = $arg->{$k};
}
}
_carp("Unrecognized option(s) passed to remove_tree(): @bad_args")
if @bad_args;
${ $data->{error} } = [] if exists $data->{error};
${ $data->{result} } = [] if exists $data->{result};
# Wouldn't it make sense to do some validation on @_ before assigning
# to $paths here?
# In the $old_style case we guarantee that each path is both defined
# and non-empty. We don't check that here, which means we have to
# check it later in the first condition in this line:
# if ( $ortho_root_length && _is_subdir( $ortho_root, $ortho_cwd ) ) {
# Granted, that would be a change in behavior for the two
# non-old-style interfaces.
$paths = [@_];
}
$data->{prefix} = '';
$data->{depth} = 0;
my @clean_path;
$data->{cwd} = getcwd() or do {
_error( $data, "cannot fetch initial working directory" );
return 0;
};
for ( $data->{cwd} ) { /\A(.*)\Z/s; $_ = $1 } # untaint
for my $p (@$paths) {
# need to fixup case and map \ to / on Windows
my $ortho_root = _IS_MSWIN32 ? _slash_lc($p) : $p;
my $ortho_cwd =
_IS_MSWIN32 ? _slash_lc( $data->{cwd} ) : $data->{cwd};
my $ortho_root_length = length($ortho_root);
$ortho_root_length-- if _IS_VMS; # don't compare '.' with ']'
if ( $ortho_root_length && _is_subdir( $ortho_root, $ortho_cwd ) ) {
local $! = 0;
_error( $data, "cannot remove path when cwd is $data->{cwd}", $p );
next;
}
if (_IS_MACOS) {
$p = ":$p" unless $p =~ /:/;
$p .= ":" unless $p =~ /:\z/;
}
elsif ( _IS_MSWIN32 ) {
$p =~ s{[/\\]\z}{};
}
else {
$p =~ s{/\z}{};
}
push @clean_path, $p;
}
@{$data}{qw(device inode)} = ( lstat $data->{cwd} )[ 0, 1 ] or do {
_error( $data, "cannot stat initial working directory", $data->{cwd} );
return 0;
};
return _rmtree( $data, \@clean_path );
}
sub _rmtree {
my $data = shift;
my $paths = shift;
my $count = 0;
my $curdir = File::Spec->curdir();
my $updir = File::Spec->updir();
my ( @files, $root );
ROOT_DIR:
foreach my $root (@$paths) {
# since we chdir into each directory, it may not be obvious
# to figure out where we are if we generate a message about
# a file name. We therefore construct a semi-canonical
# filename, anchored from the directory being unlinked (as
# opposed to being truly canonical, anchored from the root (/).
my $canon =
$data->{prefix}
? File::Spec->catfile( $data->{prefix}, $root )
: $root;
my ( $ldev, $lino, $perm ) = ( lstat $root )[ 0, 1, 2 ]
or next ROOT_DIR;
if ( -d _ ) {
$root = VMS::Filespec::vmspath( VMS::Filespec::pathify($root) )
if _IS_VMS;
if ( !chdir($root) ) {
# see if we can escalate privileges to get in
# (e.g. funny protection mask such as -w- instead of rwx)
# This uses fchmod to avoid traversing outside of the proper
# location (CVE-2017-6512)
my $root_fh;
if (open($root_fh, '<', $root)) {
my ($fh_dev, $fh_inode) = (stat $root_fh )[0,1];
$perm &= oct '7777';
my $nperm = $perm | oct '700';
local $@;
if (
!(
$data->{safe}
or $nperm == $perm
or !-d _
or $fh_dev ne $ldev
or $fh_inode ne $lino
or eval { chmod( $nperm, $root_fh ) }
)
)
{
_error( $data,
"cannot make child directory read-write-exec", $canon );
next ROOT_DIR;
}
close $root_fh;
}
if ( !chdir($root) ) {
_error( $data, "cannot chdir to child", $canon );
next ROOT_DIR;
}
}
my ( $cur_dev, $cur_inode, $perm ) = ( stat $curdir )[ 0, 1, 2 ]
or do {
_error( $data, "cannot stat current working directory", $canon );
next ROOT_DIR;
};
if (_NEED_STAT_CHECK) {
( $ldev eq $cur_dev and $lino eq $cur_inode )
or _croak(
"directory $canon changed before chdir, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting."
);
}
$perm &= oct '7777'; # don't forget setuid, setgid, sticky bits
my $nperm = $perm | oct '700';
# notabene: 0700 is for making readable in the first place,
# it's also intended to change it to writable in case we have
# to recurse in which case we are better than rm -rf for
# subtrees with strange permissions
if (
!(
$data->{safe}
or $nperm == $perm
or chmod( $nperm, $curdir )
)
)
{
_error( $data, "cannot make directory read+writeable", $canon );
$nperm = $perm;
}
my $d;
$d = gensym() if $] < 5.006;
if ( !opendir $d, $curdir ) {
_error( $data, "cannot opendir", $canon );
@files = ();
}
else {
if ( !defined ${^TAINT} or ${^TAINT} ) {
# Blindly untaint dir names if taint mode is active
@files = map { /\A(.*)\z/s; $1 } readdir $d;
}
else {
@files = readdir $d;
}
closedir $d;
}
if (_IS_VMS) {
# Deleting large numbers of files from VMS Files-11
# filesystems is faster if done in reverse ASCIIbetical order.
# include '.' to '.;' from blead patch #31775
@files = map { $_ eq '.' ? '.;' : $_ } reverse @files;
}
@files = grep { $_ ne $updir and $_ ne $curdir } @files;
if (@files) {
# remove the contained files before the directory itself
my $narg = {%$data};
@{$narg}{qw(device inode cwd prefix depth)} =
( $cur_dev, $cur_inode, $updir, $canon, $data->{depth} + 1 );
$count += _rmtree( $narg, \@files );
}
# restore directory permissions of required now (in case the rmdir
# below fails), while we are still in the directory and may do so
# without a race via '.'
if ( $nperm != $perm and not chmod( $perm, $curdir ) ) {
_error( $data, "cannot reset chmod", $canon );
}
# don't leave the client code in an unexpected directory
chdir( $data->{cwd} )
or
_croak("cannot chdir to $data->{cwd} from $canon: $!, aborting.");
# ensure that a chdir upwards didn't take us somewhere other
# than we expected (see CVE-2002-0435)
( $cur_dev, $cur_inode ) = ( stat $curdir )[ 0, 1 ]
or _croak(
"cannot stat prior working directory $data->{cwd}: $!, aborting."
);
if (_NEED_STAT_CHECK) {
( $data->{device} eq $cur_dev and $data->{inode} eq $cur_inode )
or _croak( "previous directory $data->{cwd} "
. "changed before entering $canon, "
. "expected dev=$ldev ino=$lino, "
. "actual dev=$cur_dev ino=$cur_inode, aborting."
);
}
if ( $data->{depth} or !$data->{keep_root} ) {
if ( $data->{safe}
&& ( _IS_VMS
? !&VMS::Filespec::candelete($root)
: !-w $root ) )
{
print "skipped $root\n" if $data->{verbose};
next ROOT_DIR;
}
if ( _FORCE_WRITABLE and !chmod $perm | oct '700', $root ) {
_error( $data, "cannot make directory writeable", $canon );
}
print "rmdir $root\n" if $data->{verbose};
if ( rmdir $root ) {
push @{ ${ $data->{result} } }, $root if $data->{result};
++$count;
}
else {
_error( $data, "cannot remove directory", $canon );
if (
_FORCE_WRITABLE
&& !chmod( $perm,
( _IS_VMS ? VMS::Filespec::fileify($root) : $root )
)
)
{
_error(
$data,
sprintf( "cannot restore permissions to 0%o",
$perm ),
$canon
);
}
}
}
}
else {
# not a directory
$root = VMS::Filespec::vmsify("./$root")
if _IS_VMS
&& !File::Spec->file_name_is_absolute($root)
&& ( $root !~ m/(?<!\^)[\]>]+/ ); # not already in VMS syntax
if (
$data->{safe}
&& (
_IS_VMS
? !&VMS::Filespec::candelete($root)
: !( -l $root || -w $root )
)
)
{
print "skipped $root\n" if $data->{verbose};
next ROOT_DIR;
}
my $nperm = $perm & oct '7777' | oct '600';
if ( _FORCE_WRITABLE
and $nperm != $perm
and not chmod $nperm, $root )
{
_error( $data, "cannot make file writeable", $canon );
}
print "unlink $canon\n" if $data->{verbose};
# delete all versions under VMS
for ( ; ; ) {
if ( unlink $root ) {
push @{ ${ $data->{result} } }, $root if $data->{result};
}
else {
_error( $data, "cannot unlink file", $canon );
_FORCE_WRITABLE and chmod( $perm, $root )
or _error( $data,
sprintf( "cannot restore permissions to 0%o", $perm ),
$canon );
last;
}
++$count;
last unless _IS_VMS && lstat $root;
}
}
}
return $count;
}
sub _slash_lc {
# fix up slashes and case on MSWin32 so that we can determine that
# c:\path\to\dir is underneath C:/Path/To
my $path = shift;
$path =~ tr{\\}{/};
return lc($path);
}
1;
__END__
=head1 NAME
File::Path - Create or remove directory trees
=head1 VERSION
2.18 - released November 4 2020.
=head1 SYNOPSIS
use File::Path qw(make_path remove_tree);
@created = make_path('foo/bar/baz', '/zug/zwang');
@created = make_path('foo/bar/baz', '/zug/zwang', {
verbose => 1,
mode => 0711,
});
make_path('foo/bar/baz', '/zug/zwang', {
chmod => 0777,
});
$removed_count = remove_tree('foo/bar/baz', '/zug/zwang', {
verbose => 1,
error => \my $err_list,
safe => 1,
});
# legacy (interface promoted before v2.00)
@created = mkpath('/foo/bar/baz');
@created = mkpath('/foo/bar/baz', 1, 0711);
@created = mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711);
$removed_count = rmtree('foo/bar/baz', 1, 1);
$removed_count = rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1);
# legacy (interface promoted before v2.06)
@created = mkpath('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 });
$removed_count = rmtree('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 });
=head1 DESCRIPTION
This module provides a convenient way to create directories of
arbitrary depth and to delete an entire directory subtree from the
filesystem.
The following functions are provided:
=over
=item make_path( $dir1, $dir2, .... )
=item make_path( $dir1, $dir2, ...., \%opts )
The C<make_path> function creates the given directories if they don't
exist before, much like the Unix command C<mkdir -p>.
The function accepts a list of directories to be created. Its
behaviour may be tuned by an optional hashref appearing as the last
parameter on the call.
The function returns the list of directories actually created during
the call; in scalar context the number of directories created.
The following keys are recognised in the option hash:
=over
=item mode => $num
The numeric permissions mode to apply to each created directory
(defaults to C<0777>), to be modified by the current C<umask>. If the
directory already exists (and thus does not need to be created),
the permissions will not be modified.
C<mask> is recognised as an alias for this parameter.
=item chmod => $num
Takes a numeric mode to apply to each created directory (not
modified by the current C<umask>). If the directory already exists
(and thus does not need to be created), the permissions will
not be modified.
=item verbose => $bool
If present, will cause C<make_path> to print the name of each directory
as it is created. By default nothing is printed.
=item error => \$err
If present, it should be a reference to a scalar.
This scalar will be made to reference an array, which will
be used to store any errors that are encountered. See the L</"ERROR
HANDLING"> section for more information.
If this parameter is not used, certain error conditions may raise
a fatal error that will cause the program to halt, unless trapped
in an C<eval> block.
=item owner => $owner
=item user => $owner
=item uid => $owner
If present, will cause any created directory to be owned by C<$owner>.
If the value is numeric, it will be interpreted as a uid; otherwise a
username is assumed. An error will be issued if the username cannot be
mapped to a uid, the uid does not exist or the process lacks the
privileges to change ownership.
Ownership of directories that already exist will not be changed.
C<user> and C<uid> are aliases of C<owner>.
=item group => $group
If present, will cause any created directory to be owned by the group
C<$group>. If the value is numeric, it will be interpreted as a gid;
otherwise a group name is assumed. An error will be issued if the
group name cannot be mapped to a gid, the gid does not exist or the
process lacks the privileges to change group ownership.
Group ownership of directories that already exist will not be changed.
make_path '/var/tmp/webcache', {owner=>'nobody', group=>'nogroup'};
=back
=item mkpath( $dir )
=item mkpath( $dir, $verbose, $mode )
=item mkpath( [$dir1, $dir2,...], $verbose, $mode )
=item mkpath( $dir1, $dir2,..., \%opt )
The C<mkpath()> function provide the legacy interface of
C<make_path()> with a different interpretation of the arguments
passed. The behaviour and return value of the function is otherwise
identical to C<make_path()>.
=item remove_tree( $dir1, $dir2, .... )
=item remove_tree( $dir1, $dir2, ...., \%opts )
The C<remove_tree> function deletes the given directories and any
files and subdirectories they might contain, much like the Unix
command C<rm -rf> or the Windows commands C<rmdir /s> and C<rd /s>.
The function accepts a list of directories to be removed. (In point of fact,
it will also accept filesystem entries which are not directories, such as
regular files and symlinks. But, as its name suggests, its intent is to
remove trees rather than individual files.)
C<remove_tree()>'s behaviour may be tuned by an optional hashref
appearing as the last parameter on the call. If an empty string is
passed to C<remove_tree>, an error will occur.
B<NOTE:> For security reasons, we strongly advise use of the
hashref-as-final-argument syntax -- specifically, with a setting of the C<safe>
element to a true value.
remove_tree( $dir1, $dir2, ....,
{
safe => 1,
... # other key-value pairs
},
);
The function returns the number of files successfully deleted.
The following keys are recognised in the option hash:
=over
=item verbose => $bool
If present, will cause C<remove_tree> to print the name of each file as
it is unlinked. By default nothing is printed.
=item safe => $bool
When set to a true value, will cause C<remove_tree> to skip the files
for which the process lacks the required privileges needed to delete
files, such as delete privileges on VMS. In other words, the code
will make no attempt to alter file permissions. Thus, if the process
is interrupted, no filesystem object will be left in a more
permissive mode.
=item keep_root => $bool
When set to a true value, will cause all files and subdirectories
to be removed, except the initially specified directories. This comes
in handy when cleaning out an application's scratch directory.
remove_tree( '/tmp', {keep_root => 1} );
=item result => \$res
If present, it should be a reference to a scalar.
This scalar will be made to reference an array, which will
be used to store all files and directories unlinked
during the call. If nothing is unlinked, the array will be empty.
remove_tree( '/tmp', {result => \my $list} );
print "unlinked $_\n" for @$list;
This is a useful alternative to the C<verbose> key.
=item error => \$err
If present, it should be a reference to a scalar.
This scalar will be made to reference an array, which will
be used to store any errors that are encountered. See the L</"ERROR
HANDLING"> section for more information.
Removing things is a much more dangerous proposition than
creating things. As such, there are certain conditions that
C<remove_tree> may encounter that are so dangerous that the only
sane action left is to kill the program.
Use C<error> to trap all that is reasonable (problems with
permissions and the like), and let it die if things get out
of hand. This is the safest course of action.
=back
=item rmtree( $dir )
=item rmtree( $dir, $verbose, $safe )
=item rmtree( [$dir1, $dir2,...], $verbose, $safe )
=item rmtree( $dir1, $dir2,..., \%opt )
The C<rmtree()> function provide the legacy interface of
C<remove_tree()> with a different interpretation of the arguments
passed. The behaviour and return value of the function is otherwise
identical to C<remove_tree()>.
B<NOTE:> For security reasons, we strongly advise use of the
hashref-as-final-argument syntax, specifically with a setting of the C<safe>
element to a true value.
rmtree( $dir1, $dir2, ....,
{
safe => 1,
... # other key-value pairs
},
);
=back
=head2 ERROR HANDLING
=over 4
=item B<NOTE:>
The following error handling mechanism is consistent throughout all
code paths EXCEPT in cases where the ROOT node is nonexistent. In
version 2.11 the maintainers attempted to rectify this inconsistency
but too many downstream modules encountered problems. In such case,
if you require root node evaluation or error checking prior to calling
C<make_path> or C<remove_tree>, you should take additional precautions.
=back
If C<make_path> or C<remove_tree> encounters an error, a diagnostic
message will be printed to C<STDERR> via C<carp> (for non-fatal
errors) or via C<croak> (for fatal errors).
If this behaviour is not desirable, the C<error> attribute may be
used to hold a reference to a variable, which will be used to store
the diagnostics. The variable is made a reference to an array of hash
references. Each hash contain a single key/value pair where the key
is the name of the file, and the value is the error message (including
the contents of C<$!> when appropriate). If a general error is
encountered the diagnostic key will be empty.
An example usage looks like:
remove_tree( 'foo/bar', 'bar/rat', {error => \my $err} );
if ($err && @$err) {
for my $diag (@$err) {
my ($file, $message) = %$diag;
if ($file eq '') {
print "general error: $message\n";
}
else {
print "problem unlinking $file: $message\n";
}
}
}
else {
print "No error encountered\n";
}
Note that if no errors are encountered, C<$err> will reference an
empty array. This means that C<$err> will always end up TRUE; so you
need to test C<@$err> to determine if errors occurred.
=head2 NOTES
C<File::Path> blindly exports C<mkpath> and C<rmtree> into the
current namespace. These days, this is considered bad style, but
to change it now would break too much code. Nonetheless, you are
invited to specify what it is you are expecting to use:
use File::Path 'rmtree';
The routines C<make_path> and C<remove_tree> are B<not> exported
by default. You must specify which ones you want to use.
use File::Path 'remove_tree';
Note that a side-effect of the above is that C<mkpath> and C<rmtree>
are no longer exported at all. This is due to the way the C<Exporter>
module works. If you are migrating a codebase to use the new
interface, you will have to list everything explicitly. But that's
just good practice anyway.
use File::Path qw(remove_tree rmtree);
=head3 API CHANGES
The API was changed in the 2.0 branch. For a time, C<mkpath> and
C<rmtree> tried, unsuccessfully, to deal with the two different
calling mechanisms. This approach was considered a failure.
The new semantics are now only available with C<make_path> and
C<remove_tree>. The old semantics are only available through
C<mkpath> and C<rmtree>. Users are strongly encouraged to upgrade
to at least 2.08 in order to avoid surprises.
=head3 SECURITY CONSIDERATIONS
There were race conditions in the 1.x implementations of File::Path's
C<rmtree> function (although sometimes patched depending on the OS
distribution or platform). The 2.0 version contains code to avoid the
problem mentioned in CVE-2002-0435.
See the following pages for more information:
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286905
http://www.nntp.perl.org/group/perl.perl5.porters/2005/01/msg97623.html
http://www.debian.org/security/2005/dsa-696
Additionally, unless the C<safe> parameter is set (or the
third parameter in the traditional interface is TRUE), should a
C<remove_tree> be interrupted, files that were originally in read-only
mode may now have their permissions set to a read-write (or "delete
OK") mode.
The following CVE reports were previously filed against File-Path and are
believed to have been addressed:
=over 4
=item * L<http://cve.circl.lu/cve/CVE-2004-0452>
=item * L<http://cve.circl.lu/cve/CVE-2005-0448>
=back
In February 2017 the cPanel Security Team reported an additional vulnerability
in File-Path. The C<chmod()> logic to make directories traversable can be
abused to set the mode on an attacker-chosen file to an attacker-chosen value.
This is due to the time-of-check-to-time-of-use (TOCTTOU) race condition
(L<https://en.wikipedia.org/wiki/Time_of_check_to_time_of_use>) between the
C<stat()> that decides the inode is a directory and the C<chmod()> that tries
to make it user-rwx. CPAN versions 2.13 and later incorporate a patch
provided by John Lightsey to address this problem. This vulnerability has
been reported as CVE-2017-6512.
=head1 DIAGNOSTICS
FATAL errors will cause the program to halt (C<croak>), since the
problem is so severe that it would be dangerous to continue. (This
can always be trapped with C<eval>, but it's not a good idea. Under
the circumstances, dying is the best thing to do).
SEVERE errors may be trapped using the modern interface. If the
they are not trapped, or if the old interface is used, such an error
will cause the program will halt.
All other errors may be trapped using the modern interface, otherwise
they will be C<carp>ed about. Program execution will not be halted.
=over 4
=item mkdir [path]: [errmsg] (SEVERE)
C<make_path> was unable to create the path. Probably some sort of
permissions error at the point of departure or insufficient resources
(such as free inodes on Unix).
=item No root path(s) specified
C<make_path> was not given any paths to create. This message is only
emitted if the routine is called with the traditional interface.
The modern interface will remain silent if given nothing to do.
=item No such file or directory
On Windows, if C<make_path> gives you this warning, it may mean that
you have exceeded your filesystem's maximum path length.
=item cannot fetch initial working directory: [errmsg]
C<remove_tree> attempted to determine the initial directory by calling
C<Cwd::getcwd>, but the call failed for some reason. No attempt
will be made to delete anything.
=item cannot stat initial working directory: [errmsg]
C<remove_tree> attempted to stat the initial directory (after having
successfully obtained its name via C<getcwd>), however, the call
failed for some reason. No attempt will be made to delete anything.
=item cannot chdir to [dir]: [errmsg]
C<remove_tree> attempted to set the working directory in order to
begin deleting the objects therein, but was unsuccessful. This is
usually a permissions issue. The routine will continue to delete
other things, but this directory will be left intact.
=item directory [dir] changed before chdir, expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL)
C<remove_tree> recorded the device and inode of a directory, and then
moved into it. It then performed a C<stat> on the current directory
and detected that the device and inode were no longer the same. As
this is at the heart of the race condition problem, the program
will die at this point.
=item cannot make directory [dir] read+writeable: [errmsg]
C<remove_tree> attempted to change the permissions on the current directory
to ensure that subsequent unlinkings would not run into problems,
but was unable to do so. The permissions remain as they were, and
the program will carry on, doing the best it can.
=item cannot read [dir]: [errmsg]
C<remove_tree> tried to read the contents of the directory in order
to acquire the names of the directory entries to be unlinked, but
was unsuccessful. This is usually a permissions issue. The
program will continue, but the files in this directory will remain
after the call.
=item cannot reset chmod [dir]: [errmsg]
C<remove_tree>, after having deleted everything in a directory, attempted
to restore its permissions to the original state but failed. The
directory may wind up being left behind.
=item cannot remove [dir] when cwd is [dir]
The current working directory of the program is F</some/path/to/here>
and you are attempting to remove an ancestor, such as F</some/path>.
The directory tree is left untouched.
The solution is to C<chdir> out of the child directory to a place
outside the directory tree to be removed.
=item cannot chdir to [parent-dir] from [child-dir]: [errmsg], aborting. (FATAL)
C<remove_tree>, after having deleted everything and restored the permissions
of a directory, was unable to chdir back to the parent. The program
halts to avoid a race condition from occurring.
=item cannot stat prior working directory [dir]: [errmsg], aborting. (FATAL)
C<remove_tree> was unable to stat the parent directory after having returned
from the child. Since there is no way of knowing if we returned to
where we think we should be (by comparing device and inode) the only
way out is to C<croak>.
=item previous directory [parent-dir] changed before entering [child-dir], expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL)
When C<remove_tree> returned from deleting files in a child directory, a
check revealed that the parent directory it returned to wasn't the one
it started out from. This is considered a sign of malicious activity.
=item cannot make directory [dir] writeable: [errmsg]
Just before removing a directory (after having successfully removed
everything it contained), C<remove_tree> attempted to set the permissions
on the directory to ensure it could be removed and failed. Program
execution continues, but the directory may possibly not be deleted.
=item cannot remove directory [dir]: [errmsg]
C<remove_tree> attempted to remove a directory, but failed. This may be because
some objects that were unable to be removed remain in the directory, or
it could be a permissions issue. The directory will be left behind.
=item cannot restore permissions of [dir] to [0nnn]: [errmsg]
After having failed to remove a directory, C<remove_tree> was unable to
restore its permissions from a permissive state back to a possibly
more restrictive setting. (Permissions given in octal).
=item cannot make file [file] writeable: [errmsg]
C<remove_tree> attempted to force the permissions of a file to ensure it
could be deleted, but failed to do so. It will, however, still attempt
to unlink the file.
=item cannot unlink file [file]: [errmsg]
C<remove_tree> failed to remove a file. Probably a permissions issue.
=item cannot restore permissions of [file] to [0nnn]: [errmsg]
After having failed to remove a file, C<remove_tree> was also unable
to restore the permissions on the file to a possibly less permissive
setting. (Permissions given in octal).
=item unable to map [owner] to a uid, ownership not changed");
C<make_path> was instructed to give the ownership of created
directories to the symbolic name [owner], but C<getpwnam> did
not return the corresponding numeric uid. The directory will
be created, but ownership will not be changed.
=item unable to map [group] to a gid, group ownership not changed
C<make_path> was instructed to give the group ownership of created
directories to the symbolic name [group], but C<getgrnam> did
not return the corresponding numeric gid. The directory will
be created, but group ownership will not be changed.
=back
=head1 SEE ALSO
=over 4
=item *
L<File::Remove>
Allows files and directories to be moved to the Trashcan/Recycle
Bin (where they may later be restored if necessary) if the operating
system supports such functionality. This feature may one day be
made available directly in C<File::Path>.
=item *
L<File::Find::Rule>
When removing directory trees, if you want to examine each file to
decide whether to delete it (and possibly leaving large swathes
alone), F<File::Find::Rule> offers a convenient and flexible approach
to examining directory trees.
=back
=head1 BUGS AND LIMITATIONS
The following describes F<File::Path> limitations and how to report bugs.
=head2 MULTITHREADED APPLICATIONS
F<File::Path> C<rmtree> and C<remove_tree> will not work with
multithreaded applications due to its use of C<chdir>. At this time,
no warning or error is generated in this situation. You will
certainly encounter unexpected results.
The implementation that surfaces this limitation will not be changed. See the
F<File::Path::Tiny> module for functionality similar to F<File::Path> but which does
not C<chdir>.
=head2 NFS Mount Points
F<File::Path> is not responsible for triggering the automounts, mirror mounts,
and the contents of network mounted filesystems. If your NFS implementation
requires an action to be performed on the filesystem in order for
F<File::Path> to perform operations, it is strongly suggested you assure
filesystem availability by reading the root of the mounted filesystem.
=head2 REPORTING BUGS
Please report all bugs on the RT queue, either via the web interface:
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Path>
or by email:
bug-File-Path@rt.cpan.org
In either case, please B<attach> patches to the bug report rather than
including them inline in the web post or the body of the email.
You can also send pull requests to the Github repository:
L<https://github.com/rpcme/File-Path>
=head1 ACKNOWLEDGEMENTS
Paul Szabo identified the race condition originally, and Brendan
O'Dea wrote an implementation for Debian that addressed the problem.
That code was used as a basis for the current code. Their efforts
are greatly appreciated.
Gisle Aas made a number of improvements to the documentation for
2.07 and his advice and assistance is also greatly appreciated.
=head1 AUTHORS
Prior authors and maintainers: Tim Bunce, Charles Bailey, and
David Landgren <F<david@landgren.net>>.
Current maintainers are Richard Elberger <F<riche@cpan.org>> and
James (Jim) Keenan <F<jkeenan@cpan.org>>.
=head1 CONTRIBUTORS
Contributors to File::Path, in alphabetical order by first name.
=over 1
=item <F<bulkdd@cpan.org>>
=item Charlie Gonzalez <F<itcharlie@cpan.org>>
=item Craig A. Berry <F<craigberry@mac.com>>
=item James E Keenan <F<jkeenan@cpan.org>>
=item John Lightsey <F<john@perlsec.org>>
=item Nigel Horne <F<njh@bandsman.co.uk>>
=item Richard Elberger <F<riche@cpan.org>>
=item Ryan Yee <F<ryee@cpan.org>>
=item Skye Shaw <F<shaw@cpan.org>>
=item Tom Lutz <F<tommylutz@gmail.com>>
=item Will Sheppard <F<willsheppard@github>>
=back
=head1 COPYRIGHT
This module is copyright (C) Charles Bailey, Tim Bunce, David Landgren,
James Keenan and Richard Elberger 1995-2020. All rights reserved.
=head1 LICENSE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
FILE_PATH
$fatpacked{"OLE/Storage_Lite.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'OLE_STORAGE_LITE';
# OLE::Storage_Lite
# by Kawai, Takanori (Hippo2000) 2000.11.4, 8, 14
# This Program is Still ALPHA version.
#//////////////////////////////////////////////////////////////////////////////
# OLE::Storage_Lite::PPS Object
#//////////////////////////////////////////////////////////////////////////////
#==============================================================================
# OLE::Storage_Lite::PPS
#==============================================================================
package OLE::Storage_Lite::PPS;
require Exporter;
use strict;
use vars qw($VERSION @ISA);
@ISA = qw(Exporter);
$VERSION = '0.19';
#------------------------------------------------------------------------------
# new (OLE::Storage_Lite::PPS)
#------------------------------------------------------------------------------
sub new ($$$$$$$$$$;$$) {
#1. Constructor for General Usage
my($sClass, $iNo, $sNm, $iType, $iPrev, $iNext, $iDir,
$raTime1st, $raTime2nd, $iStart, $iSize, $sData, $raChild) = @_;
if($iType == OLE::Storage_Lite::PpsType_File()) { #FILE
return OLE::Storage_Lite::PPS::File->_new
($iNo, $sNm, $iType, $iPrev, $iNext, $iDir, $raTime1st, $raTime2nd,
$iStart, $iSize, $sData, $raChild);
}
elsif($iType == OLE::Storage_Lite::PpsType_Dir()) { #DIRECTRY
return OLE::Storage_Lite::PPS::Dir->_new
($iNo, $sNm, $iType, $iPrev, $iNext, $iDir, $raTime1st, $raTime2nd,
$iStart, $iSize, $sData, $raChild);
}
elsif($iType == OLE::Storage_Lite::PpsType_Root()) { #ROOT
return OLE::Storage_Lite::PPS::Root->_new
($iNo, $sNm, $iType, $iPrev, $iNext, $iDir, $raTime1st, $raTime2nd,
$iStart, $iSize, $sData, $raChild);
}
else {
die "Error PPS:$iType $sNm\n";
}
}
#------------------------------------------------------------------------------
# _new (OLE::Storage_Lite::PPS)
# for OLE::Storage_Lite
#------------------------------------------------------------------------------
sub _new ($$$$$$$$$$$;$$) {
my($sClass, $iNo, $sNm, $iType, $iPrev, $iNext, $iDir,
$raTime1st, $raTime2nd, $iStart, $iSize, $sData, $raChild) = @_;
#1. Constructor for OLE::Storage_Lite
my $oThis = {
No => $iNo,
Name => $sNm,
Type => $iType,
PrevPps => $iPrev,
NextPps => $iNext,
DirPps => $iDir,
Time1st => $raTime1st,
Time2nd => $raTime2nd,
StartBlock => $iStart,
Size => $iSize,
Data => $sData,
Child => $raChild,
};
bless $oThis, $sClass;
return $oThis;
}
#------------------------------------------------------------------------------
# _DataLen (OLE::Storage_Lite::PPS)
# Check for update
#------------------------------------------------------------------------------
sub _DataLen($) {
my($oSelf) =@_;
return 0 unless(defined($oSelf->{Data}));
return ($oSelf->{_PPS_FILE})?
($oSelf->{_PPS_FILE}->stat())[7] : length($oSelf->{Data});
}
#------------------------------------------------------------------------------
# _makeSmallData (OLE::Storage_Lite::PPS)
#------------------------------------------------------------------------------
sub _makeSmallData($$$) {
my($oThis, $aList, $rhInfo) = @_;
my ($sRes);
my $FILE = $rhInfo->{_FILEH_};
my $iSmBlk = 0;
foreach my $oPps (@$aList) {
#1. Make SBD, small data string
if($oPps->{Type}==OLE::Storage_Lite::PpsType_File()) {
next if($oPps->{Size}<=0);
if($oPps->{Size} < $rhInfo->{_SMALL_SIZE}) {
my $iSmbCnt = int($oPps->{Size} / $rhInfo->{_SMALL_BLOCK_SIZE})
+ (($oPps->{Size} % $rhInfo->{_SMALL_BLOCK_SIZE})? 1: 0);
#1.1 Add to SBD
for (my $i = 0; $i<($iSmbCnt-1); $i++) {
print {$FILE} (pack("V", $i+$iSmBlk+1));
}
print {$FILE} (pack("V", -2));
#1.2 Add to Data String(this will be written for RootEntry)
#Check for update
if($oPps->{_PPS_FILE}) {
my $sBuff;
$oPps->{_PPS_FILE}->seek(0, 0); #To The Top
while($oPps->{_PPS_FILE}->read($sBuff, 4096)) {
$sRes .= $sBuff;
}
}
else {
$sRes .= $oPps->{Data};
}
$sRes .= ("\x00" x
($rhInfo->{_SMALL_BLOCK_SIZE} - ($oPps->{Size}% $rhInfo->{_SMALL_BLOCK_SIZE})))
if($oPps->{Size}% $rhInfo->{_SMALL_BLOCK_SIZE});
#1.3 Set for PPS
$oPps->{StartBlock} = $iSmBlk;
$iSmBlk += $iSmbCnt;
}
}
}
my $iSbCnt = int($rhInfo->{_BIG_BLOCK_SIZE}/ OLE::Storage_Lite::LongIntSize());
print {$FILE} (pack("V", -1) x ($iSbCnt - ($iSmBlk % $iSbCnt)))
if($iSmBlk % $iSbCnt);
#2. Write SBD with adjusting length for block
return $sRes;
}
#------------------------------------------------------------------------------
# _savePpsWk (OLE::Storage_Lite::PPS)
#------------------------------------------------------------------------------
sub _savePpsWk($$)
{
my($oThis, $rhInfo) = @_;
#1. Write PPS
my $FILE = $rhInfo->{_FILEH_};
print {$FILE} (
$oThis->{Name}
. ("\x00" x (64 - length($oThis->{Name}))) #64
, pack("v", length($oThis->{Name}) + 2) #66
, pack("c", $oThis->{Type}) #67
, pack("c", 0x00) #UK #68
, pack("V", $oThis->{PrevPps}) #Prev #72
, pack("V", $oThis->{NextPps}) #Next #76
, pack("V", $oThis->{DirPps}) #Dir #80
, "\x00\x09\x02\x00" #84
, "\x00\x00\x00\x00" #88
, "\xc0\x00\x00\x00" #92
, "\x00\x00\x00\x46" #96
, "\x00\x00\x00\x00" #100
, OLE::Storage_Lite::LocalDate2OLE($oThis->{Time1st}) #108
, OLE::Storage_Lite::LocalDate2OLE($oThis->{Time2nd}) #116
, pack("V", defined($oThis->{StartBlock})?
$oThis->{StartBlock}:0) #116
, pack("V", defined($oThis->{Size})?
$oThis->{Size} : 0) #124
, pack("V", 0), #128
);
}
#//////////////////////////////////////////////////////////////////////////////
# OLE::Storage_Lite::PPS::Root Object
#//////////////////////////////////////////////////////////////////////////////
#==============================================================================
# OLE::Storage_Lite::PPS::Root
#==============================================================================
package OLE::Storage_Lite::PPS::Root;
require Exporter;
use strict;
use IO::File;
use IO::Handle;
use Fcntl;
use vars qw($VERSION @ISA);
@ISA = qw(OLE::Storage_Lite::PPS Exporter);
$VERSION = '0.19';
sub _savePpsSetPnt($$$);
sub _savePpsSetPnt2($$$);
#------------------------------------------------------------------------------
# new (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub new ($;$$$) {
my($sClass, $raTime1st, $raTime2nd, $raChild) = @_;
OLE::Storage_Lite::PPS::_new(
$sClass,
undef,
OLE::Storage_Lite::Asc2Ucs('Root Entry'),
5,
undef,
undef,
undef,
$raTime1st,
$raTime2nd,
undef,
undef,
undef,
$raChild);
}
#------------------------------------------------------------------------------
# save (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub save($$;$$) {
my($oThis, $sFile, $bNoAs, $rhInfo) = @_;
#0.Initial Setting for saving
$rhInfo = {} unless($rhInfo);
$rhInfo->{_BIG_BLOCK_SIZE} = 2**
(($rhInfo->{_BIG_BLOCK_SIZE})?
_adjust2($rhInfo->{_BIG_BLOCK_SIZE}) : 9);
$rhInfo->{_SMALL_BLOCK_SIZE}= 2 **
(($rhInfo->{_SMALL_BLOCK_SIZE})?
_adjust2($rhInfo->{_SMALL_BLOCK_SIZE}): 6);
$rhInfo->{_SMALL_SIZE} = 0x1000;
$rhInfo->{_PPS_SIZE} = 0x80;
my $closeFile = 1;
#1.Open File
#1.1 $sFile is Ref of scalar
if(ref($sFile) eq 'SCALAR') {
require IO::Scalar;
my $oIo = new IO::Scalar $sFile, O_WRONLY;
$rhInfo->{_FILEH_} = $oIo;
}
#1.1.1 $sFile is a IO::Scalar object
# Now handled as a filehandle ref below.
#1.2 $sFile is a IO::Handle object
elsif(UNIVERSAL::isa($sFile, 'IO::Handle')) {
# Not all filehandles support binmode() so try it in an eval.
eval{ binmode $sFile };
$rhInfo->{_FILEH_} = $sFile;
}
#1.3 $sFile is a simple filename string
elsif(!ref($sFile)) {
if($sFile ne '-') {
my $oIo = new IO::File;
$oIo->open(">$sFile") || return undef;
binmode($oIo);
$rhInfo->{_FILEH_} = $oIo;
}
else {
my $oIo = new IO::Handle;
$oIo->fdopen(fileno(STDOUT),"w") || return undef;
binmode($oIo);
$rhInfo->{_FILEH_} = $oIo;
}
}
#1.4 Assume that if $sFile is a ref then it is a valid filehandle
else {
# Not all filehandles support binmode() so try it in an eval.
eval{ binmode $sFile };
$rhInfo->{_FILEH_} = $sFile;
# Caller controls filehandle closing
$closeFile = 0;
}
my $iBlk = 0;
#1. Make an array of PPS (for Save)
my @aList=();
if($bNoAs) {
_savePpsSetPnt2([$oThis], \@aList, $rhInfo);
}
else {
_savePpsSetPnt([$oThis], \@aList, $rhInfo);
}
my ($iSBDcnt, $iBBcnt, $iPPScnt) = $oThis->_calcSize(\@aList, $rhInfo);
#2.Save Header
$oThis->_saveHeader($rhInfo, $iSBDcnt, $iBBcnt, $iPPScnt);
#3.Make Small Data string (write SBD)
my $sSmWk = $oThis->_makeSmallData(\@aList, $rhInfo);
$oThis->{Data} = $sSmWk; #Small Datas become RootEntry Data
#4. Write BB
my $iBBlk = $iSBDcnt;
$oThis->_saveBigData(\$iBBlk, \@aList, $rhInfo);
#5. Write PPS
$oThis->_savePps(\@aList, $rhInfo);
#6. Write BD and BDList and Adding Header informations
$oThis->_saveBbd($iSBDcnt, $iBBcnt, $iPPScnt, $rhInfo);
#7.Close File
return $rhInfo->{_FILEH_}->close if $closeFile;
}
#------------------------------------------------------------------------------
# _calcSize (OLE::Storage_Lite::PPS)
#------------------------------------------------------------------------------
sub _calcSize($$)
{
my($oThis, $raList, $rhInfo) = @_;
#0. Calculate Basic Setting
my ($iSBDcnt, $iBBcnt, $iPPScnt) = (0,0,0);
my $iSmallLen = 0;
my $iSBcnt = 0;
foreach my $oPps (@$raList) {
if($oPps->{Type}==OLE::Storage_Lite::PpsType_File()) {
$oPps->{Size} = $oPps->_DataLen(); #Mod
if($oPps->{Size} < $rhInfo->{_SMALL_SIZE}) {
$iSBcnt += int($oPps->{Size} / $rhInfo->{_SMALL_BLOCK_SIZE})
+ (($oPps->{Size} % $rhInfo->{_SMALL_BLOCK_SIZE})? 1: 0);
}
else {
$iBBcnt +=
(int($oPps->{Size}/ $rhInfo->{_BIG_BLOCK_SIZE}) +
(($oPps->{Size}% $rhInfo->{_BIG_BLOCK_SIZE})? 1: 0));
}
}
}
$iSmallLen = $iSBcnt * $rhInfo->{_SMALL_BLOCK_SIZE};
my $iSlCnt = int($rhInfo->{_BIG_BLOCK_SIZE}/ OLE::Storage_Lite::LongIntSize());
$iSBDcnt = int($iSBcnt / $iSlCnt)+ (($iSBcnt % $iSlCnt)? 1:0);
$iBBcnt += (int($iSmallLen/ $rhInfo->{_BIG_BLOCK_SIZE}) +
(( $iSmallLen% $rhInfo->{_BIG_BLOCK_SIZE})? 1: 0));
my $iCnt = scalar(@$raList);
my $iBdCnt = $rhInfo->{_BIG_BLOCK_SIZE}/OLE::Storage_Lite::PpsSize();
$iPPScnt = (int($iCnt/$iBdCnt) + (($iCnt % $iBdCnt)? 1: 0));
return ($iSBDcnt, $iBBcnt, $iPPScnt);
}
#------------------------------------------------------------------------------
# _adjust2 (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub _adjust2($) {
my($i2) = @_;
my $iWk;
$iWk = log($i2)/log(2);
return ($iWk > int($iWk))? int($iWk)+1:$iWk;
}
#------------------------------------------------------------------------------
# _saveHeader (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub _saveHeader($$$$$) {
my($oThis, $rhInfo, $iSBDcnt, $iBBcnt, $iPPScnt) = @_;
my $FILE = $rhInfo->{_FILEH_};
#0. Calculate Basic Setting
my $iBlCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize();
my $i1stBdL = int(($rhInfo->{_BIG_BLOCK_SIZE} - 0x4C) / OLE::Storage_Lite::LongIntSize());
my $i1stBdMax = $i1stBdL * $iBlCnt - $i1stBdL;
my $iBdExL = 0;
my $iAll = $iBBcnt + $iPPScnt + $iSBDcnt;
my $iAllW = $iAll;
my $iBdCntW = int($iAllW / $iBlCnt) + (($iAllW % $iBlCnt)? 1: 0);
my $iBdCnt = int(($iAll + $iBdCntW) / $iBlCnt) + ((($iAllW+$iBdCntW) % $iBlCnt)? 1: 0);
my $i;
if ($iBdCnt > $i1stBdL) {
#0.1 Calculate BD count
$iBlCnt--; #the BlCnt is reduced in the count of the last sect is used for a pointer the next Bl
my $iBBleftover = $iAll - $i1stBdMax;
if ($iAll >$i1stBdMax) {
while(1) {
$iBdCnt = int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0);
$iBdExL = int(($iBdCnt) / $iBlCnt) + ((($iBdCnt) % $iBlCnt)? 1: 0);
$iBBleftover = $iBBleftover + $iBdExL;
last if($iBdCnt == (int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0)));
}
}
$iBdCnt += $i1stBdL;
#print "iBdCnt = $iBdCnt \n";
}
#1.Save Header
print {$FILE} (
"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
, "\x00\x00\x00\x00" x 4
, pack("v", 0x3b)
, pack("v", 0x03)
, pack("v", -2)
, pack("v", 9)
, pack("v", 6)
, pack("v", 0)
, "\x00\x00\x00\x00" x 2
, pack("V", $iBdCnt),
, pack("V", $iBBcnt+$iSBDcnt), #ROOT START
, pack("V", 0)
, pack("V", 0x1000)
, pack("V", $iSBDcnt ? 0 : -2) #Small Block Depot
, pack("V", $iSBDcnt)
);
#2. Extra BDList Start, Count
if($iAll <= $i1stBdMax) {
print {$FILE} (
pack("V", -2), #Extra BDList Start
pack("V", 0), #Extra BDList Count
);
}
else {
print {$FILE} (
pack("V", $iAll+$iBdCnt),
pack("V", $iBdExL),
);
}
#3. BDList
for($i=0; $i<$i1stBdL and $i < $iBdCnt; $i++) {
print {$FILE} (pack("V", $iAll+$i));
}
print {$FILE} ((pack("V", -1)) x($i1stBdL-$i)) if($i<$i1stBdL);
}
#------------------------------------------------------------------------------
# _saveBigData (OLE::Storage_Lite::PPS)
#------------------------------------------------------------------------------
sub _saveBigData($$$$) {
my($oThis, $iStBlk, $raList, $rhInfo) = @_;
my $iRes = 0;
my $FILE = $rhInfo->{_FILEH_};
#1.Write Big (ge 0x1000) Data into Block
foreach my $oPps (@$raList) {
if($oPps->{Type}!=OLE::Storage_Lite::PpsType_Dir()) {
#print "PPS: $oPps DEF:", defined($oPps->{Data}), "\n";
$oPps->{Size} = $oPps->_DataLen(); #Mod
if(($oPps->{Size} >= $rhInfo->{_SMALL_SIZE}) ||
(($oPps->{Type} == OLE::Storage_Lite::PpsType_Root()) && defined($oPps->{Data}))) {
#1.1 Write Data
#Check for update
if($oPps->{_PPS_FILE}) {
my $sBuff;
my $iLen = 0;
$oPps->{_PPS_FILE}->seek(0, 0); #To The Top
while($oPps->{_PPS_FILE}->read($sBuff, 4096)) {
$iLen += length($sBuff);
print {$FILE} ($sBuff); #Check for update
}
}
else {
print {$FILE} ($oPps->{Data});
}
print {$FILE} (
"\x00" x
($rhInfo->{_BIG_BLOCK_SIZE} -
($oPps->{Size} % $rhInfo->{_BIG_BLOCK_SIZE}))
) if ($oPps->{Size} % $rhInfo->{_BIG_BLOCK_SIZE});
#1.2 Set For PPS
$oPps->{StartBlock} = $$iStBlk;
$$iStBlk +=
(int($oPps->{Size}/ $rhInfo->{_BIG_BLOCK_SIZE}) +
(($oPps->{Size}% $rhInfo->{_BIG_BLOCK_SIZE})? 1: 0));
}
}
}
}
#------------------------------------------------------------------------------
# _savePps (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub _savePps($$$)
{
my($oThis, $raList, $rhInfo) = @_;
#0. Initial
my $FILE = $rhInfo->{_FILEH_};
#2. Save PPS
foreach my $oItem (@$raList) {
$oItem->_savePpsWk($rhInfo);
}
#3. Adjust for Block
my $iCnt = scalar(@$raList);
my $iBCnt = $rhInfo->{_BIG_BLOCK_SIZE} / $rhInfo->{_PPS_SIZE};
print {$FILE} ("\x00" x (($iBCnt - ($iCnt % $iBCnt)) * $rhInfo->{_PPS_SIZE}))
if($iCnt % $iBCnt);
return int($iCnt / $iBCnt) + (($iCnt % $iBCnt)? 1: 0);
}
#------------------------------------------------------------------------------
# _savePpsSetPnt2 (OLE::Storage_Lite::PPS::Root)
# For Test
#------------------------------------------------------------------------------
sub _savePpsSetPnt2($$$)
{
my($aThis, $raList, $rhInfo) = @_;
#1. make Array as Children-Relations
#1.1 if No Children
if($#$aThis < 0) {
return 0xFFFFFFFF;
}
elsif($#$aThis == 0) {
#1.2 Just Only one
push @$raList, $aThis->[0];
$aThis->[0]->{No} = $#$raList;
$aThis->[0]->{PrevPps} = 0xFFFFFFFF;
$aThis->[0]->{NextPps} = 0xFFFFFFFF;
$aThis->[0]->{DirPps} = _savePpsSetPnt2($aThis->[0]->{Child}, $raList, $rhInfo);
return $aThis->[0]->{No};
}
else {
#1.3 Array
my $iCnt = $#$aThis + 1;
#1.3.1 Define Center
my $iPos = 0; #int($iCnt/ 2); #$iCnt
my @aWk = @$aThis;
my @aPrev = ($#$aThis > 1)? splice(@aWk, 1, 1) : (); #$iPos);
my @aNext = splice(@aWk, 1); #, $iCnt - $iPos -1);
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt2(
\@aPrev, $raList, $rhInfo);
push @$raList, $aThis->[$iPos];
$aThis->[$iPos]->{No} = $#$raList;
#1.3.2 Devide a array into Previous,Next
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt2(
\@aNext, $raList, $rhInfo);
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt2($aThis->[$iPos]->{Child}, $raList, $rhInfo);
return $aThis->[$iPos]->{No};
}
}
#------------------------------------------------------------------------------
# _savePpsSetPnt2 (OLE::Storage_Lite::PPS::Root)
# For Test
#------------------------------------------------------------------------------
sub _savePpsSetPnt2s($$$)
{
my($aThis, $raList, $rhInfo) = @_;
#1. make Array as Children-Relations
#1.1 if No Children
if($#$aThis < 0) {
return 0xFFFFFFFF;
}
elsif($#$aThis == 0) {
#1.2 Just Only one
push @$raList, $aThis->[0];
$aThis->[0]->{No} = $#$raList;
$aThis->[0]->{PrevPps} = 0xFFFFFFFF;
$aThis->[0]->{NextPps} = 0xFFFFFFFF;
$aThis->[0]->{DirPps} = _savePpsSetPnt2($aThis->[0]->{Child}, $raList, $rhInfo);
return $aThis->[0]->{No};
}
else {
#1.3 Array
my $iCnt = $#$aThis + 1;
#1.3.1 Define Center
my $iPos = 0; #int($iCnt/ 2); #$iCnt
push @$raList, $aThis->[$iPos];
$aThis->[$iPos]->{No} = $#$raList;
my @aWk = @$aThis;
#1.3.2 Devide a array into Previous,Next
my @aPrev = splice(@aWk, 0, $iPos);
my @aNext = splice(@aWk, 1, $iCnt - $iPos -1);
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt2(
\@aPrev, $raList, $rhInfo);
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt2(
\@aNext, $raList, $rhInfo);
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt2($aThis->[$iPos]->{Child}, $raList, $rhInfo);
return $aThis->[$iPos]->{No};
}
}
#------------------------------------------------------------------------------
# _savePpsSetPnt (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub _savePpsSetPnt($$$)
{
my($aThis, $raList, $rhInfo) = @_;
#1. make Array as Children-Relations
#1.1 if No Children
if($#$aThis < 0) {
return 0xFFFFFFFF;
}
elsif($#$aThis == 0) {
#1.2 Just Only one
push @$raList, $aThis->[0];
$aThis->[0]->{No} = $#$raList;
$aThis->[0]->{PrevPps} = 0xFFFFFFFF;
$aThis->[0]->{NextPps} = 0xFFFFFFFF;
$aThis->[0]->{DirPps} = _savePpsSetPnt($aThis->[0]->{Child}, $raList, $rhInfo);
return $aThis->[0]->{No};
}
else {
#1.3 Array
my $iCnt = $#$aThis + 1;
#1.3.1 Define Center
my $iPos = int($iCnt/ 2); #$iCnt
push @$raList, $aThis->[$iPos];
$aThis->[$iPos]->{No} = $#$raList;
my @aWk = @$aThis;
#1.3.2 Devide a array into Previous,Next
my @aPrev = splice(@aWk, 0, $iPos);
my @aNext = splice(@aWk, 1, $iCnt - $iPos -1);
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt(
\@aPrev, $raList, $rhInfo);
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt(
\@aNext, $raList, $rhInfo);
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt($aThis->[$iPos]->{Child}, $raList, $rhInfo);
return $aThis->[$iPos]->{No};
}
}
#------------------------------------------------------------------------------
# _savePpsSetPnt (OLE::Storage_Lite::PPS::Root)
#------------------------------------------------------------------------------
sub _savePpsSetPnt1($$$)
{
my($aThis, $raList, $rhInfo) = @_;
#1. make Array as Children-Relations
#1.1 if No Children
if($#$aThis < 0) {
return 0xFFFFFFFF;
}
elsif($#$aThis == 0) {
#1.2 Just Only one
push @$raList, $aThis->[0];
$aThis->[0]->{No} = $#$raList;
$aThis->[0]->{PrevPps} = 0xFFFFFFFF;
$aThis->[0]->{NextPps} = 0xFFFFFFFF;
$aThis->[0]->{DirPps} = _savePpsSetPnt($aThis->[0]->{Child}, $raList, $rhInfo);
return $aThis->[0]->{No};
}
else {
#1.3 Array
my $iCnt = $#$aThis + 1;
#1.3.1 Define Center
my $iPos = int($iCnt/ 2); #$iCnt
push @$raList, $aThis->[$iPos];
$aThis->[$iPos]->{No} = $#$raList;
my @aWk = @$aThis;
#1.3.2 Devide a array into Previous,Next
my @aPrev = splice(@aWk, 0, $iPos);
my @aNext = splice(@aWk, 1, $iCnt - $iPos -1);
$aThis->[$iPos]->{PrevPps} = _savePpsSetPnt(
\@aPrev, $raList, $rhInfo);
$aThis->[$iPos]->{NextPps} = _savePpsSetPnt(
\@aNext, $raList, $rhInfo);
$aThis->[$iPos]->{DirPps} = _savePpsSetPnt($aThis->[$iPos]->{Child}, $raList, $rhInfo);
return $aThis->[$iPos]->{No};
}
}
#------------------------------------------------------------------------------
# _saveBbd (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _saveBbd($$$$)
{
my($oThis, $iSbdSize, $iBsize, $iPpsCnt, $rhInfo) = @_;
my $FILE = $rhInfo->{_FILEH_};
#0. Calculate Basic Setting
my $iBbCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize();
my $iBlCnt = $iBbCnt - 1;
my $i1stBdL = int(($rhInfo->{_BIG_BLOCK_SIZE} - 0x4C) / OLE::Storage_Lite::LongIntSize());
my $i1stBdMax = $i1stBdL * $iBbCnt - $i1stBdL;
my $iBdExL = 0;
my $iAll = $iBsize + $iPpsCnt + $iSbdSize;
my $iAllW = $iAll;
my $iBdCntW = int($iAllW / $iBbCnt) + (($iAllW % $iBbCnt)? 1: 0);
my $iBdCnt = 0;
my $i;
#0.1 Calculate BD count
my $iBBleftover = $iAll - $i1stBdMax;
if ($iAll >$i1stBdMax) {
while(1) {
$iBdCnt = int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0);
$iBdExL = int(($iBdCnt) / $iBlCnt) + ((($iBdCnt) % $iBlCnt)? 1: 0);
$iBBleftover = $iBBleftover + $iBdExL;
last if($iBdCnt == (int(($iBBleftover) / $iBlCnt) + ((($iBBleftover) % $iBlCnt)? 1: 0)));
}
}
$iAllW += $iBdExL;
$iBdCnt += $i1stBdL;
#print "iBdCnt = $iBdCnt \n";
#1. Making BD
#1.1 Set for SBD
if($iSbdSize > 0) {
for ($i = 0; $i<($iSbdSize-1); $i++) {
print {$FILE} (pack("V", $i+1));
}
print {$FILE} (pack("V", -2));
}
#1.2 Set for B
for ($i = 0; $i<($iBsize-1); $i++) {
print {$FILE} (pack("V", $i+$iSbdSize+1));
}
print {$FILE} (pack("V", -2));
#1.3 Set for PPS
for ($i = 0; $i<($iPpsCnt-1); $i++) {
print {$FILE} (pack("V", $i+$iSbdSize+$iBsize+1));
}
print {$FILE} (pack("V", -2));
#1.4 Set for BBD itself ( 0xFFFFFFFD : BBD)
for($i=0; $i<$iBdCnt;$i++) {
print {$FILE} (pack("V", 0xFFFFFFFD));
}
#1.5 Set for ExtraBDList
for($i=0; $i<$iBdExL;$i++) {
print {$FILE} (pack("V", 0xFFFFFFFC));
}
#1.6 Adjust for Block
print {$FILE} (pack("V", -1) x ($iBbCnt - (($iAllW + $iBdCnt) % $iBbCnt)))
if(($iAllW + $iBdCnt) % $iBbCnt);
#2.Extra BDList
if($iBdCnt > $i1stBdL) {
my $iN=0;
my $iNb=0;
for($i=$i1stBdL;$i<$iBdCnt; $i++, $iN++) {
if($iN>=($iBbCnt-1)) {
$iN = 0;
$iNb++;
print {$FILE} (pack("V", $iAll+$iBdCnt+$iNb));
}
print {$FILE} (pack("V", $iBsize+$iSbdSize+$iPpsCnt+$i));
}
print {$FILE} (pack("V", -1) x (($iBbCnt-1) - (($iBdCnt-$i1stBdL) % ($iBbCnt-1))))
if(($iBdCnt-$i1stBdL) % ($iBbCnt-1));
print {$FILE} (pack("V", -2));
}
}
#//////////////////////////////////////////////////////////////////////////////
# OLE::Storage_Lite::PPS::File Object
#//////////////////////////////////////////////////////////////////////////////
#==============================================================================
# OLE::Storage_Lite::PPS::File
#==============================================================================
package OLE::Storage_Lite::PPS::File;
require Exporter;
use strict;
use vars qw($VERSION @ISA);
@ISA = qw(OLE::Storage_Lite::PPS Exporter);
$VERSION = '0.19';
#------------------------------------------------------------------------------
# new (OLE::Storage_Lite::PPS::File)
#------------------------------------------------------------------------------
sub new ($$$) {
my($sClass, $sNm, $sData) = @_;
OLE::Storage_Lite::PPS::_new(
$sClass,
undef,
$sNm,
2,
undef,
undef,
undef,
undef,
undef,
undef,
undef,
$sData,
undef);
}
#------------------------------------------------------------------------------
# newFile (OLE::Storage_Lite::PPS::File)
#------------------------------------------------------------------------------
sub newFile ($$;$) {
my($sClass, $sNm, $sFile) = @_;
my $oSelf =
OLE::Storage_Lite::PPS::_new(
$sClass,
undef,
$sNm,
2,
undef,
undef,
undef,
undef,
undef,
undef,
undef,
'',
undef);
#
if((!defined($sFile)) or ($sFile eq '')) {
$oSelf->{_PPS_FILE} = IO::File->new_tmpfile();
}
elsif(UNIVERSAL::isa($sFile, 'IO::Handle')) {
$oSelf->{_PPS_FILE} = $sFile;
}
elsif(!ref($sFile)) {
#File Name
$oSelf->{_PPS_FILE} = new IO::File;
return undef unless($oSelf->{_PPS_FILE});
$oSelf->{_PPS_FILE}->open("$sFile", "r+") || return undef;
}
else {
return undef;
}
if($oSelf->{_PPS_FILE}) {
$oSelf->{_PPS_FILE}->seek(0, 2);
binmode($oSelf->{_PPS_FILE});
$oSelf->{_PPS_FILE}->autoflush(1);
}
return $oSelf;
}
#------------------------------------------------------------------------------
# append (OLE::Storage_Lite::PPS::File)
#------------------------------------------------------------------------------
sub append ($$) {
my($oSelf, $sData) = @_;
if($oSelf->{_PPS_FILE}) {
print {$oSelf->{_PPS_FILE}} $sData;
}
else {
$oSelf->{Data} .= $sData;
}
}
#//////////////////////////////////////////////////////////////////////////////
# OLE::Storage_Lite::PPS::Dir Object
#//////////////////////////////////////////////////////////////////////////////
#------------------------------------------------------------------------------
# new (OLE::Storage_Lite::PPS::Dir)
#------------------------------------------------------------------------------
package OLE::Storage_Lite::PPS::Dir;
require Exporter;
use strict;
use vars qw($VERSION @ISA);
@ISA = qw(OLE::Storage_Lite::PPS Exporter);
$VERSION = '0.19';
sub new ($$;$$$) {
my($sClass, $sName, $raTime1st, $raTime2nd, $raChild) = @_;
OLE::Storage_Lite::PPS::_new(
$sClass,
undef,
$sName,
1,
undef,
undef,
undef,
$raTime1st,
$raTime2nd,
undef,
undef,
undef,
$raChild);
}
#==============================================================================
# OLE::Storage_Lite
#==============================================================================
package OLE::Storage_Lite;
require Exporter;
use strict;
use IO::File;
use Time::Local 'timegm';
use vars qw($VERSION @ISA @EXPORT);
@ISA = qw(Exporter);
$VERSION = '0.19';
sub _getPpsSearch($$$$$;$);
sub _getPpsTree($$$;$);
#------------------------------------------------------------------------------
# Const for OLE::Storage_Lite
#------------------------------------------------------------------------------
#0. Constants
sub PpsType_Root {5};
sub PpsType_Dir {1};
sub PpsType_File {2};
sub DataSizeSmall{0x1000};
sub LongIntSize {4};
sub PpsSize {0x80};
#------------------------------------------------------------------------------
# new OLE::Storage_Lite
#------------------------------------------------------------------------------
sub new($$) {
my($sClass, $sFile) = @_;
my $oThis = {
_FILE => $sFile,
};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# getPpsTree: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub getPpsTree($;$)
{
my($oThis, $bData) = @_;
#0.Init
my $rhInfo = _initParse($oThis->{_FILE});
return undef unless($rhInfo);
#1. Get Data
my ($oPps) = _getPpsTree(0, $rhInfo, $bData);
close(IN);
return $oPps;
}
#------------------------------------------------------------------------------
# getSearch: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub getPpsSearch($$;$$)
{
my($oThis, $raName, $bData, $iCase) = @_;
#0.Init
my $rhInfo = _initParse($oThis->{_FILE});
return undef unless($rhInfo);
#1. Get Data
my @aList = _getPpsSearch(0, $rhInfo, $raName, $bData, $iCase);
close(IN);
return @aList;
}
#------------------------------------------------------------------------------
# getNthPps: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub getNthPps($$;$)
{
my($oThis, $iNo, $bData) = @_;
#0.Init
my $rhInfo = _initParse($oThis->{_FILE});
return undef unless($rhInfo);
#1. Get Data
my $oPps = _getNthPps($iNo, $rhInfo, $bData);
close IN;
return $oPps;
}
#------------------------------------------------------------------------------
# _initParse: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub _initParse($) {
my($sFile)=@_;
my $oIo;
#1. $sFile is Ref of scalar
if(ref($sFile) eq 'SCALAR') {
require IO::Scalar;
$oIo = new IO::Scalar;
$oIo->open($sFile);
}
#2. $sFile is a IO::Handle object
elsif(UNIVERSAL::isa($sFile, 'IO::Handle')) {
$oIo = $sFile;
binmode($oIo);
}
#3. $sFile is a simple filename string
elsif(!ref($sFile)) {
$oIo = new IO::File;
$oIo->open("<$sFile") || return undef;
binmode($oIo);
}
#4 Assume that if $sFile is a ref then it is a valid filehandle
else {
$oIo = $sFile;
# Not all filehandles support binmode() so try it in an eval.
eval{ binmode $oIo };
}
return _getHeaderInfo($oIo);
}
#------------------------------------------------------------------------------
# _getPpsTree: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub _getPpsTree($$$;$) {
my($iNo, $rhInfo, $bData, $raDone) = @_;
if(defined($raDone)) {
return () if(grep {$_ ==$iNo} @$raDone);
}
else {
$raDone=[];
}
push @$raDone, $iNo;
my $iRootBlock = $rhInfo->{_ROOT_START} ;
#1. Get Information about itself
my $oPps = _getNthPps($iNo, $rhInfo, $bData);
#2. Child
if($oPps->{DirPps} != 0xFFFFFFFF) {
my @aChildL = _getPpsTree($oPps->{DirPps}, $rhInfo, $bData, $raDone);
$oPps->{Child} = \@aChildL;
}
else {
$oPps->{Child} = undef;
}
#3. Previous,Next PPSs
my @aList = ();
push @aList, _getPpsTree($oPps->{PrevPps}, $rhInfo, $bData, $raDone)
if($oPps->{PrevPps} != 0xFFFFFFFF);
push @aList, $oPps;
push @aList, _getPpsTree($oPps->{NextPps}, $rhInfo, $bData, $raDone)
if($oPps->{NextPps} != 0xFFFFFFFF);
return @aList;
}
#------------------------------------------------------------------------------
# _getPpsSearch: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub _getPpsSearch($$$$$;$) {
my($iNo, $rhInfo, $raName, $bData, $iCase, $raDone) = @_;
my $iRootBlock = $rhInfo->{_ROOT_START} ;
my @aRes;
#1. Check it self
if(defined($raDone)) {
return () if(grep {$_==$iNo} @$raDone);
}
else {
$raDone=[];
}
push @$raDone, $iNo;
my $oPps = _getNthPps($iNo, $rhInfo, undef);
# if(grep($_ eq $oPps->{Name}, @$raName)) {
if(($iCase && (grep(/^\Q$oPps->{Name}\E$/i, @$raName))) ||
(grep($_ eq $oPps->{Name}, @$raName))) {
$oPps = _getNthPps($iNo, $rhInfo, $bData) if ($bData);
@aRes = ($oPps);
}
else {
@aRes = ();
}
#2. Check Child, Previous, Next PPSs
push @aRes, _getPpsSearch($oPps->{DirPps}, $rhInfo, $raName, $bData, $iCase, $raDone)
if($oPps->{DirPps} != 0xFFFFFFFF) ;
push @aRes, _getPpsSearch($oPps->{PrevPps}, $rhInfo, $raName, $bData, $iCase, $raDone)
if($oPps->{PrevPps} != 0xFFFFFFFF );
push @aRes, _getPpsSearch($oPps->{NextPps}, $rhInfo, $raName, $bData, $iCase, $raDone)
if($oPps->{NextPps} != 0xFFFFFFFF);
return @aRes;
}
#===================================================================
# Get Header Info (BASE Informain about that file)
#===================================================================
sub _getHeaderInfo($){
my($FILE) = @_;
my($iWk);
my $rhInfo = {};
$rhInfo->{_FILEH_} = $FILE;
my $sWk;
#0. Check ID
$rhInfo->{_FILEH_}->seek(0, 0);
$rhInfo->{_FILEH_}->read($sWk, 8);
return undef unless($sWk eq "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1");
#BIG BLOCK SIZE
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x1E, 2, "v");
return undef unless(defined($iWk));
$rhInfo->{_BIG_BLOCK_SIZE} = 2 ** $iWk;
#SMALL BLOCK SIZE
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x20, 2, "v");
return undef unless(defined($iWk));
$rhInfo->{_SMALL_BLOCK_SIZE} = 2 ** $iWk;
#BDB Count
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x2C, 4, "V");
return undef unless(defined($iWk));
$rhInfo->{_BDB_COUNT} = $iWk;
#START BLOCK
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x30, 4, "V");
return undef unless(defined($iWk));
$rhInfo->{_ROOT_START} = $iWk;
#MIN SIZE OF BB
# $iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x38, 4, "V");
# return undef unless(defined($iWk));
# $rhInfo->{_MIN_SIZE_BB} = $iWk;
#SMALL BD START
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x3C, 4, "V");
return undef unless(defined($iWk));
$rhInfo->{_SBD_START} = $iWk;
#SMALL BD COUNT
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x40, 4, "V");
return undef unless(defined($iWk));
$rhInfo->{_SBD_COUNT} = $iWk;
#EXTRA BBD START
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x44, 4, "V");
return undef unless(defined($iWk));
$rhInfo->{_EXTRA_BBD_START} = $iWk;
#EXTRA BD COUNT
$iWk = _getInfoFromFile($rhInfo->{_FILEH_}, 0x48, 4, "V");
return undef unless(defined($iWk));
$rhInfo->{_EXTRA_BBD_COUNT} = $iWk;
#GET BBD INFO
$rhInfo->{_BBD_INFO}= _getBbdInfo($rhInfo);
#GET ROOT PPS
my $oRoot = _getNthPps(0, $rhInfo, undef);
$rhInfo->{_SB_START} = $oRoot->{StartBlock};
$rhInfo->{_SB_SIZE} = $oRoot->{Size};
return $rhInfo;
}
#------------------------------------------------------------------------------
# _getInfoFromFile
#------------------------------------------------------------------------------
sub _getInfoFromFile($$$$) {
my($FILE, $iPos, $iLen, $sFmt) =@_;
my($sWk);
return undef unless($FILE);
return undef if($FILE->seek($iPos, 0)==0);
return undef if($FILE->read($sWk, $iLen)!=$iLen);
return unpack($sFmt, $sWk);
}
#------------------------------------------------------------------------------
# _getBbdInfo
#------------------------------------------------------------------------------
sub _getBbdInfo($) {
my($rhInfo) =@_;
my @aBdList = ();
my $iBdbCnt = $rhInfo->{_BDB_COUNT};
my $iGetCnt;
my $sWk;
my $i1stCnt = int(($rhInfo->{_BIG_BLOCK_SIZE} - 0x4C) / OLE::Storage_Lite::LongIntSize());
my $iBdlCnt = int($rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize()) - 1;
#1. 1st BDlist
$rhInfo->{_FILEH_}->seek(0x4C, 0);
$iGetCnt = ($iBdbCnt < $i1stCnt)? $iBdbCnt: $i1stCnt;
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize()*$iGetCnt);
push @aBdList, unpack("V$iGetCnt", $sWk);
$iBdbCnt -= $iGetCnt;
#2. Extra BDList
my $iBlock = $rhInfo->{_EXTRA_BBD_START};
while(($iBdbCnt> 0) && _isNormalBlock($iBlock)){
_setFilePos($iBlock, 0, $rhInfo);
$iGetCnt= ($iBdbCnt < $iBdlCnt)? $iBdbCnt: $iBdlCnt;
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize()*$iGetCnt);
push @aBdList, unpack("V$iGetCnt", $sWk);
$iBdbCnt -= $iGetCnt;
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize());
$iBlock = unpack("V", $sWk);
}
#3.Get BDs
my @aWk;
my %hBd;
my $iBlkNo = 0;
my $iBdL;
my $i;
my $iBdCnt = int($rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize());
foreach $iBdL (@aBdList) {
_setFilePos($iBdL, 0, $rhInfo);
$rhInfo->{_FILEH_}->read($sWk, $rhInfo->{_BIG_BLOCK_SIZE});
@aWk = unpack("V$iBdCnt", $sWk);
for($i=0;$i<$iBdCnt;$i++, $iBlkNo++) {
if($aWk[$i] != ($iBlkNo+1)){
$hBd{$iBlkNo} = $aWk[$i];
}
}
}
return \%hBd;
}
#------------------------------------------------------------------------------
# getNthPps (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getNthPps($$$){
my($iPos, $rhInfo, $bData) = @_;
my($iPpsStart) = ($rhInfo->{_ROOT_START});
my($iPpsBlock, $iPpsPos);
my $sWk;
my $iBlock;
my $iBaseCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::PpsSize();
$iPpsBlock = int($iPos / $iBaseCnt);
$iPpsPos = $iPos % $iBaseCnt;
$iBlock = _getNthBlockNo($iPpsStart, $iPpsBlock, $rhInfo);
return undef unless(defined($iBlock));
_setFilePos($iBlock, OLE::Storage_Lite::PpsSize()*$iPpsPos, $rhInfo);
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::PpsSize());
return undef unless($sWk);
my $iNmSize = unpack("v", substr($sWk, 0x40, 2));
$iNmSize = ($iNmSize > 2)? $iNmSize - 2 : $iNmSize;
my $sNm= substr($sWk, 0, $iNmSize);
my $iType = unpack("C", substr($sWk, 0x42, 2));
my $lPpsPrev = unpack("V", substr($sWk, 0x44, OLE::Storage_Lite::LongIntSize()));
my $lPpsNext = unpack("V", substr($sWk, 0x48, OLE::Storage_Lite::LongIntSize()));
my $lDirPps = unpack("V", substr($sWk, 0x4C, OLE::Storage_Lite::LongIntSize()));
my @raTime1st =
(($iType == OLE::Storage_Lite::PpsType_Root()) or ($iType == OLE::Storage_Lite::PpsType_Dir()))?
OLEDate2Local(substr($sWk, 0x64, 8)) : undef ,
my @raTime2nd =
(($iType == OLE::Storage_Lite::PpsType_Root()) or ($iType == OLE::Storage_Lite::PpsType_Dir()))?
OLEDate2Local(substr($sWk, 0x6C, 8)) : undef,
my($iStart, $iSize) = unpack("VV", substr($sWk, 0x74, 8));
if($bData) {
my $sData = _getData($iType, $iStart, $iSize, $rhInfo);
return OLE::Storage_Lite::PPS->new(
$iPos, $sNm, $iType, $lPpsPrev, $lPpsNext, $lDirPps,
\@raTime1st, \@raTime2nd, $iStart, $iSize, $sData, undef);
}
else {
return OLE::Storage_Lite::PPS->new(
$iPos, $sNm, $iType, $lPpsPrev, $lPpsNext, $lDirPps,
\@raTime1st, \@raTime2nd, $iStart, $iSize, undef, undef);
}
}
#------------------------------------------------------------------------------
# _setFilePos (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _setFilePos($$$){
my($iBlock, $iPos, $rhInfo) = @_;
$rhInfo->{_FILEH_}->seek(($iBlock+1)*$rhInfo->{_BIG_BLOCK_SIZE}+$iPos, 0);
}
#------------------------------------------------------------------------------
# _getNthBlockNo (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getNthBlockNo($$$){
my($iStBlock, $iNth, $rhInfo) = @_;
my $iSv;
my $iNext = $iStBlock;
for(my $i =0; $i<$iNth; $i++) {
$iSv = $iNext;
$iNext = _getNextBlockNo($iSv, $rhInfo);
return undef unless _isNormalBlock($iNext);
}
return $iNext;
}
#------------------------------------------------------------------------------
# _getData (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getData($$$$)
{
my($iType, $iBlock, $iSize, $rhInfo) = @_;
if ($iType == OLE::Storage_Lite::PpsType_File()) {
if($iSize < OLE::Storage_Lite::DataSizeSmall()) {
return _getSmallData($iBlock, $iSize, $rhInfo);
}
else {
return _getBigData($iBlock, $iSize, $rhInfo);
}
}
elsif($iType == OLE::Storage_Lite::PpsType_Root()) { #Root
return _getBigData($iBlock, $iSize, $rhInfo);
}
elsif($iType == OLE::Storage_Lite::PpsType_Dir()) { # Directory
return undef;
}
}
#------------------------------------------------------------------------------
# _getBigData (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getBigData($$$)
{
my($iBlock, $iSize, $rhInfo) = @_;
my($iRest, $sWk, $sRes);
return '' unless(_isNormalBlock($iBlock));
$iRest = $iSize;
my($i, $iGetSize, $iNext);
$sRes = '';
my @aKeys= sort({$a<=>$b} keys(%{$rhInfo->{_BBD_INFO}}));
while ($iRest > 0) {
my @aRes = grep($_ >= $iBlock, @aKeys);
my $iNKey = $aRes[0];
$i = $iNKey - $iBlock;
$iNext = $rhInfo->{_BBD_INFO}{$iNKey};
_setFilePos($iBlock, 0, $rhInfo);
my $iGetSize = ($rhInfo->{_BIG_BLOCK_SIZE} * ($i+1));
$iGetSize = $iRest if($iRest < $iGetSize);
$rhInfo->{_FILEH_}->read( $sWk, $iGetSize);
$sRes .= $sWk;
$iRest -= $iGetSize;
$iBlock= $iNext;
}
return $sRes;
}
#------------------------------------------------------------------------------
# _getNextBlockNo (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getNextBlockNo($$){
my($iBlockNo, $rhInfo) = @_;
my $iRes = $rhInfo->{_BBD_INFO}->{$iBlockNo};
return defined($iRes)? $iRes: $iBlockNo+1;
}
#------------------------------------------------------------------------------
# _isNormalBlock (OLE::Storage_Lite)
# 0xFFFFFFFC : BDList, 0xFFFFFFFD : BBD,
# 0xFFFFFFFE: End of Chain 0xFFFFFFFF : unused
#------------------------------------------------------------------------------
sub _isNormalBlock($){
my($iBlock) = @_;
return ($iBlock < 0xFFFFFFFC)? 1: undef;
}
#------------------------------------------------------------------------------
# _getSmallData (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getSmallData($$$)
{
my($iSmBlock, $iSize, $rhInfo) = @_;
my($sRes, $sWk);
my $iRest = $iSize;
$sRes = '';
while ($iRest > 0) {
_setFilePosSmall($iSmBlock, $rhInfo);
$rhInfo->{_FILEH_}->read($sWk,
($iRest >= $rhInfo->{_SMALL_BLOCK_SIZE})?
$rhInfo->{_SMALL_BLOCK_SIZE}: $iRest);
$sRes .= $sWk;
$iRest -= $rhInfo->{_SMALL_BLOCK_SIZE};
$iSmBlock= _getNextSmallBlockNo($iSmBlock, $rhInfo);
}
return $sRes;
}
#------------------------------------------------------------------------------
# _setFilePosSmall(OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _setFilePosSmall($$)
{
my($iSmBlock, $rhInfo) = @_;
my $iSmStart = $rhInfo->{_SB_START};
my $iBaseCnt = $rhInfo->{_BIG_BLOCK_SIZE} / $rhInfo->{_SMALL_BLOCK_SIZE};
my $iNth = int($iSmBlock/$iBaseCnt);
my $iPos = $iSmBlock % $iBaseCnt;
my $iBlk = _getNthBlockNo($iSmStart, $iNth, $rhInfo);
_setFilePos($iBlk, $iPos * $rhInfo->{_SMALL_BLOCK_SIZE}, $rhInfo);
}
#------------------------------------------------------------------------------
# _getNextSmallBlockNo (OLE::Storage_Lite)
#------------------------------------------------------------------------------
sub _getNextSmallBlockNo($$)
{
my($iSmBlock, $rhInfo) = @_;
my($sWk);
my $iBaseCnt = $rhInfo->{_BIG_BLOCK_SIZE} / OLE::Storage_Lite::LongIntSize();
my $iNth = int($iSmBlock/$iBaseCnt);
my $iPos = $iSmBlock % $iBaseCnt;
my $iBlk = _getNthBlockNo($rhInfo->{_SBD_START}, $iNth, $rhInfo);
_setFilePos($iBlk, $iPos * OLE::Storage_Lite::LongIntSize(), $rhInfo);
$rhInfo->{_FILEH_}->read($sWk, OLE::Storage_Lite::LongIntSize());
return unpack("V", $sWk);
}
#------------------------------------------------------------------------------
# Asc2Ucs: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub Asc2Ucs($)
{
my($sAsc) = @_;
return join("\x00", split //, $sAsc) . "\x00";
}
#------------------------------------------------------------------------------
# Ucs2Asc: OLE::Storage_Lite
#------------------------------------------------------------------------------
sub Ucs2Asc($)
{
my($sUcs) = @_;
return join('', map(pack('c', $_), unpack('v*', $sUcs)));
}
#------------------------------------------------------------------------------
# OLEDate2Local()
#
# Convert from a Window FILETIME structure to a localtime array. FILETIME is
# a 64-bit value representing the number of 100-nanosecond intervals since
# January 1 1601.
#
# We first convert the FILETIME to seconds and then subtract the difference
# between the 1601 epoch and the 1970 Unix epoch.
#
sub OLEDate2Local {
my $oletime = shift;
# Unpack the FILETIME into high and low longs.
my ( $lo, $hi ) = unpack 'V2', $oletime;
# Convert the longs to a double.
my $nanoseconds = $hi * 2**32 + $lo;
# Convert the 100 nanosecond units into seconds.
my $time = $nanoseconds / 1e7;
# Subtract the number of seconds between the 1601 and 1970 epochs.
$time -= 11644473600;
# Convert to a localtime (actually gmtime) structure.
my @localtime = gmtime($time);
return @localtime;
}
#------------------------------------------------------------------------------
# LocalDate2OLE()
#
# Convert from a a localtime array to a Window FILETIME structure. FILETIME is
# a 64-bit value representing the number of 100-nanosecond intervals since
# January 1 1601.
#
# We first convert the localtime (actually gmtime) to seconds and then add the
# difference between the 1601 epoch and the 1970 Unix epoch. We convert that to
# 100 nanosecond units, divide it into high and low longs and return it as a
# packed 64bit structure.
#
sub LocalDate2OLE {
my $localtime = shift;
return "\x00" x 8 unless $localtime;
# Convert from localtime (actually gmtime) to seconds.
my @localtimecopy = @{$localtime};
$localtimecopy[5] += 1900 unless $localtimecopy[5] > 99;
my $time = timegm( @localtimecopy );
# Add the number of seconds between the 1601 and 1970 epochs.
$time += 11644473600;
# The FILETIME seconds are in units of 100 nanoseconds.
my $nanoseconds = $time * 1E7;
use POSIX 'fmod';
# Pack the total nanoseconds into 64 bits...
my $hi = int( $nanoseconds / 2**32 );
my $lo = fmod($nanoseconds, 2**32);
my $oletime = pack "VV", $lo, $hi;
return $oletime;
}
1;
__END__
=head1 NAME
OLE::Storage_Lite - Simple Class for OLE document interface.
=head1 SYNOPSIS
use OLE::Storage_Lite;
# Initialize.
# From a file
my $oOl = OLE::Storage_Lite->new("some.xls");
# From a filehandle object
use IO::File;
my $oIo = new IO::File;
$oIo->open("<iofile.xls");
binmode($oIo);
my $oOl = OLE::Storage_Lite->new($oFile);
# Read data
my $oPps = $oOl->getPpsTree(1);
# Save Data
# To a File
$oPps->save("kaba.xls"); #kaba.xls
$oPps->save('-'); #STDOUT
# To a filehandle object
my $oIo = new IO::File;
$oIo->open(">iofile.xls");
bimode($oIo);
$oPps->save($oIo);
=head1 DESCRIPTION
OLE::Storage_Lite allows you to read and write an OLE structured file.
OLE::Storage_Lite::PPS is a class representing PPS. OLE::Storage_Lite::PPS::Root, OLE::Storage_Lite::PPS::File and OLE::Storage_Lite::PPS::Dir
are subclasses of OLE::Storage_Lite::PPS.
=head2 new()
Constructor.
$oOle = OLE::Storage_Lite->new($sFile);
Creates a OLE::Storage_Lite object for C<$sFile>. C<$sFile> must be a correct file name.
The C<new()> constructor also accepts a valid filehandle. Remember to C<binmode()> the filehandle first.
=head2 getPpsTree()
$oPpsRoot = $oOle->getPpsTree([$bData]);
Returns PPS as an OLE::Storage_Lite::PPS::Root object.
Other PPS objects will be included as its children.
If C<$bData> is true, the objects will have data in the file.
=head2 getPpsSearch()
$oPpsRoot = $oOle->getPpsTree($raName [, $bData][, $iCase] );
Returns PPSs as OLE::Storage_Lite::PPS objects that has the name specified in C<$raName> array.
If C<$bData> is true, the objects will have data in the file.
If C<$iCase> is true, search is case insensitive.
=head2 getNthPps()
$oPpsRoot = $oOle->getNthPps($iNth [, $bData]);
Returns PPS as C<OLE::Storage_Lite::PPS> object specified number C<$iNth>.
If C<$bData> is true, the objects will have data in the file.
=head2 Asc2Ucs()
$sUcs2 = OLE::Storage_Lite::Asc2Ucs($sAsc>);
Utility function. Just adds 0x00 after every characters in C<$sAsc>.
=head2 Ucs2Asc()
$sAsc = OLE::Storage_Lite::Ucs2Asc($sUcs2);
Utility function. Just deletes 0x00 after words in C<$sUcs>.
=head1 OLE::Storage_Lite::PPS
OLE::Storage_Lite::PPS has these properties:
=over 4
=item No
Order number in saving.
=item Name
Its name in UCS2 (a.k.a Unicode).
=item Type
Its type (1:Dir, 2:File (Data), 5: Root)
=item PrevPps
Previous pps (as No)
=item NextPps
Next pps (as No)
=item DirPps
Dir pps (as No).
=item Time1st
Timestamp 1st in array ref as similar fomat of localtime.
=item Time2nd
Timestamp 2nd in array ref as similar fomat of localtime.
=item StartBlock
Start block number
=item Size
Size of the pps
=item Data
Its data
=item Child
Its child PPSs in array ref
=back
=head1 OLE::Storage_Lite::PPS::Root
OLE::Storage_Lite::PPS::Root has 2 methods.
=head2 new()
$oRoot = OLE::Storage_Lite::PPS::Root->new(
$raTime1st,
$raTime2nd,
$raChild);
Constructor.
C<$raTime1st>, C<$raTime2nd> are array refs with ($iSec, $iMin, $iHour, $iDay, $iMon, $iYear).
$iSec means seconds, $iMin means minutes. $iHour means hours.
$iDay means day. $iMon is month -1. $iYear is year - 1900.
C<$raChild> is a array ref of children PPSs.
=head2 save()
$oRoot = $oRoot>->save(
$sFile,
$bNoAs);
Saves information into C<$sFile>. If C<$sFile> is '-', this will use STDOUT.
The C<new()> constructor also accepts a valid filehandle. Remember to C<binmode()> the filehandle first.
If C<$bNoAs> is defined, this function will use the No of PPSs for saving order.
If C<$bNoAs> is undefined, this will calculate PPS saving order.
=head1 OLE::Storage_Lite::PPS::Dir
OLE::Storage_Lite::PPS::Dir has 1 method.
=head2 new()
$oRoot = OLE::Storage_Lite::PPS::Dir->new(
$sName,
[, $raTime1st]
[, $raTime2nd]
[, $raChild>]);
Constructor.
C<$sName> is a name of the PPS.
C<$raTime1st>, C<$raTime2nd> is a array ref as
($iSec, $iMin, $iHour, $iDay, $iMon, $iYear).
$iSec means seconds, $iMin means minutes. $iHour means hours.
$iDay means day. $iMon is month -1. $iYear is year - 1900.
C<$raChild> is a array ref of children PPSs.
=head1 OLE::Storage_Lite::PPS::File
OLE::Storage_Lite::PPS::File has 3 method.
=head2 new
$oRoot = OLE::Storage_Lite::PPS::File->new($sName, $sData);
C<$sName> is name of the PPS.
C<$sData> is data of the PPS.
=head2 newFile()
$oRoot = OLE::Storage_Lite::PPS::File->newFile($sName, $sFile);
This function makes to use file handle for geting and storing data.
C<$sName> is name of the PPS.
If C<$sFile> is scalar, it assumes that is a filename.
If C<$sFile> is an IO::Handle object, it uses that specified handle.
If C<$sFile> is undef or '', it uses temporary file.
CAUTION: Take care C<$sFile> will be updated by C<append> method.
So if you want to use IO::Handle and append a data to it,
you should open the handle with "r+".
=head2 append()
$oRoot = $oPps->append($sData);
appends specified data to that PPS.
C<$sData> is appending data for that PPS.
=head1 CAUTION
A saved file with VBA (a.k.a Macros) by this module will not work correctly.
However modules can get the same information from the file,
the file occurs a error in application(Word, Excel ...).
=head1 DEPRECATED FEATURES
Older version of C<OLE::Storage_Lite> autovivified a scalar ref in the C<new()> constructors into a scalar filehandle. This functionality is still there for backwards compatibility but it is highly recommended that you do not use it. Instead create a filehandle (scalar or otherwise) and pass that in.
=head1 COPYRIGHT
The OLE::Storage_Lite module is Copyright (c) 2000,2001 Kawai Takanori. Japan.
All rights reserved.
You may distribute under the terms of either the GNU General Public
License or the Artistic License, as specified in the Perl README file.
=head1 ACKNOWLEDGEMENTS
First of all, I would like to acknowledge to Martin Schwartz and his module OLE::Storage.
=head1 AUTHOR
Kawai Takanori kwitknr@cpan.org
This module is currently maintained by John McNamara jmcnamara@cpan.org
=head1 SEE ALSO
OLE::Storage
Documentation for the OLE Compound document has been released by Microsoft under the I<Open Specification Promise>. See http://www.microsoft.com/interop/docs/supportingtechnologies.mspx
The Digital Imaging Group have also detailed the OLE format in the JPEG2000 specification: see Appendix A of http://www.i3a.org/pdf/wg1n1017.pdf
=cut
OLE_STORAGE_LITE
$fatpacked{"Spreadsheet/ParseExcel.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL';
package Spreadsheet::ParseExcel;
##############################################################################
#
# Spreadsheet::ParseExcel - Extract information from an Excel file.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2008 Takanori Kawai
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use 5.008;
use OLE::Storage_Lite;
use File::Basename qw(fileparse);
use IO::File;
use Config;
use Crypt::RC4;
use Digest::Perl::MD5;
our $VERSION = '0.65';
use Spreadsheet::ParseExcel::Workbook;
use Spreadsheet::ParseExcel::Worksheet;
use Spreadsheet::ParseExcel::Font;
use Spreadsheet::ParseExcel::Format;
use Spreadsheet::ParseExcel::Cell;
use Spreadsheet::ParseExcel::FmtDefault;
my $currentbook;
my @aColor = (
'000000', # 0x00
'FFFFFF', 'FFFFFF', 'FFFFFF', 'FFFFFF',
'FFFFFF', 'FFFFFF', 'FFFFFF', '000000', # 0x08
'FFFFFF', 'FF0000', '00FF00', '0000FF',
'FFFF00', 'FF00FF', '00FFFF', '800000', # 0x10
'008000', '000080', '808000', '800080',
'008080', 'C0C0C0', '808080', '9999FF', # 0x18
'993366', 'FFFFCC', 'CCFFFF', '660066',
'FF8080', '0066CC', 'CCCCFF', '000080', # 0x20
'FF00FF', 'FFFF00', '00FFFF', '800080',
'800000', '008080', '0000FF', '00CCFF', # 0x28
'CCFFFF', 'CCFFCC', 'FFFF99', '99CCFF',
'FF99CC', 'CC99FF', 'FFCC99', '3366FF', # 0x30
'33CCCC', '99CC00', 'FFCC00', 'FF9900',
'FF6600', '666699', '969696', '003366', # 0x38
'339966', '003300', '333300', '993300',
'993366', '333399', '333333', '000000' # 0x40
);
use constant verExcel95 => 0x500;
use constant verExcel97 => 0x600;
use constant verBIFF2 => 0x00;
use constant verBIFF3 => 0x02;
use constant verBIFF4 => 0x04;
use constant verBIFF5 => 0x08;
use constant verBIFF8 => 0x18;
use constant MS_BIFF_CRYPTO_NONE => 0;
use constant MS_BIFF_CRYPTO_XOR => 1;
use constant MS_BIFF_CRYPTO_RC4 => 2;
use constant sizeof_BIFF_8_FILEPASS => ( 6 + 3 * 16 );
use constant REKEY_BLOCK => 0x400;
# Error code for some of the common parsing errors.
use constant ErrorNone => 0;
use constant ErrorNoFile => 1;
use constant ErrorNoExcelData => 2;
use constant ErrorFileEncrypted => 3;
# Color index for the 'auto' color
use constant AutoColor => 64;
our %error_strings = (
ErrorNone, '', # 0
ErrorNoFile, 'File not found', # 1
ErrorNoExcelData, 'No Excel data found in file', # 2
ErrorFileEncrypted, 'File is encrypted', # 3
);
our %ProcTbl = (
#Develpers' Kit P291
0x14 => \&_subHeader, # Header
0x15 => \&_subFooter, # Footer
0x18 => \&_subName, # NAME(?)
0x1A => \&_subVPageBreak, # Vertical Page Break
0x1B => \&_subHPageBreak, # Horizontal Page Break
0x22 => \&_subFlg1904, # 1904 Flag
0x26 => \&_subMargin, # Left Margin
0x27 => \&_subMargin, # Right Margin
0x28 => \&_subMargin, # Top Margin
0x29 => \&_subMargin, # Bottom Margin
0x2A => \&_subPrintHeaders, # Print Headers
0x2B => \&_subPrintGridlines, # Print Gridlines
0x3C => \&_subContinue, # Continue
0x3D => \&_subWindow1, # Window1
0x43 => \&_subXF, # XF for Excel < 4.
0x0443 => \&_subXF, # XF for Excel = 4.
0x862 => \&_subSheetLayout, # Sheet Layout
0x1B8 => \&_subHyperlink, # HYPERLINK
#Develpers' Kit P292
0x55 => \&_subDefColWidth, # Consider
0x5C => \&_subWriteAccess, # WRITEACCESS
0x7D => \&_subColInfo, # Colinfo
0x7E => \&_subRK, # RK
0x81 => \&_subWSBOOL, # WSBOOL
0x83 => \&_subHcenter, # HCENTER
0x84 => \&_subVcenter, # VCENTER
0x85 => \&_subBoundSheet, # BoundSheet
0x92 => \&_subPalette, # Palette, fgp
0x99 => \&_subStandardWidth, # Standard Col
#Develpers' Kit P293
0xA1 => \&_subSETUP, # SETUP
0xBD => \&_subMulRK, # MULRK
0xBE => \&_subMulBlank, # MULBLANK
0xD6 => \&_subRString, # RString
#Develpers' Kit P294
0xE0 => \&_subXF, # ExTended Format
0xE5 => \&_subMergeArea, # MergeArea (Not Documented)
0xFC => \&_subSST, # Shared String Table
0xFD => \&_subLabelSST, # Label SST
#Develpers' Kit P295
0x201 => \&_subBlank, # Blank
0x202 => \&_subInteger, # Integer(Not Documented)
0x203 => \&_subNumber, # Number
0x204 => \&_subLabel, # Label
0x205 => \&_subBoolErr, # BoolErr
0x207 => \&_subString, # STRING
0x208 => \&_subRow, # RowData
0x221 => \&_subArray, # Array (Consider)
0x225 => \&_subDefaultRowHeight, # Consider
0x31 => \&_subFont, # Font
0x231 => \&_subFont, # Font
0x27E => \&_subRK, # RK
0x41E => \&_subFormat, # Format
0x06 => \&_subFormula, # Formula
0x406 => \&_subFormula, # Formula
0x009 => \&_subBOF, # BOF(BIFF2)
0x209 => \&_subBOF, # BOF(BIFF3)
0x409 => \&_subBOF, # BOF(BIFF4)
0x809 => \&_subBOF, # BOF(BIFF5-8)
);
our $BIGENDIAN;
our $PREFUNC;
our $_use_perlio;
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->new
#------------------------------------------------------------------------------
sub new {
my ( $class, %hParam ) = @_;
if ( not defined $_use_perlio ) {
if ( exists $Config{useperlio}
&& defined $Config{useperlio}
&& $Config{useperlio} eq "define" )
{
$_use_perlio = 1;
}
else {
$_use_perlio = 0;
require IO::Scalar;
import IO::Scalar;
}
}
# Check ENDIAN(Little: Intel etc. BIG: Sparc etc)
$BIGENDIAN =
( defined $hParam{Endian} ) ? $hParam{Endian}
: ( unpack( "H08", pack( "L", 2 ) ) eq '02000000' ) ? 0
: 1;
my $self = {};
bless $self, $class;
$self->{GetContent} = \&_subGetContent;
if ( $hParam{EventHandlers} ) {
$self->SetEventHandlers( $hParam{EventHandlers} );
}
else {
$self->SetEventHandlers( \%ProcTbl );
}
if ( $hParam{AddHandlers} ) {
foreach my $sKey ( keys( %{ $hParam{AddHandlers} } ) ) {
$self->SetEventHandler( $sKey, $hParam{AddHandlers}->{$sKey} );
}
}
$self->{CellHandler} = $hParam{CellHandler};
$self->{NotSetCell} = $hParam{NotSetCell};
$self->{Object} = $hParam{Object};
if ( defined $hParam{Password} ) {
$self->{Password} = $hParam{Password};
}
else {
$self->{Password} = 'VelvetSweatshop';
}
$self->{_error_status} = ErrorNone;
return $self;
}
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->SetEventHandler
#------------------------------------------------------------------------------
sub SetEventHandler {
my ( $self, $key, $sub_ref ) = @_;
$self->{FuncTbl}->{$key} = $sub_ref;
}
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->SetEventHandlers
#------------------------------------------------------------------------------
sub SetEventHandlers {
my ( $self, $rhTbl ) = @_;
$self->{FuncTbl} = undef;
foreach my $sKey ( keys %$rhTbl ) {
$self->{FuncTbl}->{$sKey} = $rhTbl->{$sKey};
}
}
#------------------------------------------------------------------------------
# Decryption routines
# based on sources of gnumeric (ms-biff.c ms-excel-read.c)
#------------------------------------------------------------------------------
sub md5state {
my ( $md5 ) = @_;
my $s = '';
for ( my $i = 0 ; $i < 4 ; $i++ ) {
my $v = $md5->{_state}[$i];
$s .= chr( $v & 0xff );
$s .= chr( ( $v >> 8 ) & 0xff );
$s .= chr( ( $v >> 16 ) & 0xff );
$s .= chr( ( $v >> 24 ) & 0xff );
}
return $s;
}
sub MakeKey {
my ( $block, $key, $valContext ) = @_;
my $pwarray = "\0" x 64;
substr( $pwarray, 0, 5 ) = substr( $valContext, 0, 5 );
substr( $pwarray, 5, 1 ) = chr( $block & 0xff );
substr( $pwarray, 6, 1 ) = chr( ( $block >> 8 ) & 0xff );
substr( $pwarray, 7, 1 ) = chr( ( $block >> 16 ) & 0xff );
substr( $pwarray, 8, 1 ) = chr( ( $block >> 24 ) & 0xff );
substr( $pwarray, 9, 1 ) = "\x80";
substr( $pwarray, 56, 1 ) = "\x48";
my $md5 = Digest::Perl::MD5->new();
$md5->add( $pwarray );
my $s = md5state( $md5 );
${$key} = Crypt::RC4->new( $s );
}
sub VerifyPassword {
my ( $password, $docid, $salt_data, $hashedsalt_data, $valContext ) = @_;
my $pwarray = "\0" x 64;
my $i;
my $md5 = Digest::Perl::MD5->new();
for ( $i = 0 ; $i < length( $password ) ; $i++ ) {
my $o = ord( substr( $password, $i, 1 ) );
substr( $pwarray, 2 * $i, 1 ) = chr( $o & 0xff );
substr( $pwarray, 2 * $i + 1, 1 ) = chr( ( $o >> 8 ) & 0xff );
}
substr( $pwarray, 2 * $i, 1 ) = chr( 0x80 );
substr( $pwarray, 56, 1 ) = chr( ( $i << 4 ) & 0xff );
$md5->add( $pwarray );
my $mdContext1 = md5state( $md5 );
my $offset = 0;
my $keyoffset = 0;
my $tocopy = 5;
$md5->reset;
while ( $offset != 16 ) {
if ( ( 64 - $offset ) < 5 ) {
$tocopy = 64 - $offset;
}
substr( $pwarray, $offset, $tocopy ) =
substr( $mdContext1, $keyoffset, $tocopy );
$offset += $tocopy;
if ( $offset == 64 ) {
$md5->add( $pwarray );
$keyoffset = $tocopy;
$tocopy = 5 - $tocopy;
$offset = 0;
next;
}
$keyoffset = 0;
$tocopy = 5;
substr( $pwarray, $offset, 16 ) = $docid;
$offset += 16;
}
substr( $pwarray, 16, 1 ) = "\x80";
substr( $pwarray, 17, 47 ) = "\0" x 47;
substr( $pwarray, 56, 1 ) = "\x80";
substr( $pwarray, 57, 1 ) = "\x0a";
$md5->add( $pwarray );
${$valContext} = md5state( $md5 );
my $key;
MakeKey( 0, \$key, ${$valContext} );
my $salt = $key->RC4( $salt_data );
my $hashedsalt = $key->RC4( $hashedsalt_data );
$salt .= "\x80" . "\0" x 47;
substr( $salt, 56, 1 ) = "\x80";
$md5->reset;
$md5->add( $salt );
my $mdContext2 = md5state( $md5 );
return ( $mdContext2 eq $hashedsalt );
}
sub SkipBytes {
my ( $q, $start, $count ) = @_;
my $scratch = "\0" x REKEY_BLOCK;
my $block;
$block = int( ( $start + $count ) / REKEY_BLOCK );
if ( $block != $q->{block} ) {
MakeKey( $q->{block} = $block, \$q->{rc4_key}, $q->{md5_ctxt} );
$count = ( $start + $count ) % REKEY_BLOCK;
}
$q->{rc4_key}->RC4( substr( $scratch, 0, $count ) );
return 1;
}
sub SetDecrypt {
my ( $q, $version, $password ) = @_;
if ( $q->{opcode} != 0x2f ) {
return 0;
}
if ( $password eq '' ) {
return 0;
}
# TODO old versions decryption
#if (version < MS_BIFF_V8 || q->data[0] == 0)
# return ms_biff_pre_biff8_query_set_decrypt (q, password);
if ( $q->{length} != sizeof_BIFF_8_FILEPASS ) {
return 0;
}
unless (
VerifyPassword(
$password,
substr( $q->{data}, 6, 16 ),
substr( $q->{data}, 22, 16 ),
substr( $q->{data}, 38, 16 ),
\$q->{md5_ctxt}
)
)
{
return 0;
}
$q->{encryption} = MS_BIFF_CRYPTO_RC4;
$q->{block} = -1;
# The first record after FILEPASS seems to be unencrypted
$q->{dont_decrypt_next_record} = 1;
# Pretend to decrypt the entire stream up till this point, it was
# encrypted, but do it anyway to keep the rc4 state in sync
SkipBytes( $q, 0, $q->{streamPos} );
return 1;
}
sub InitStream {
my ( $stream_data ) = @_;
my %q;
$q{opcode} = 0;
$q{length} = 0;
$q{data} = '';
$q{stream} = $stream_data; # data stream
$q{streamLen} = length( $stream_data ); # stream length
$q{streamPos} = 0; # stream position
$q{encryption} = 0;
$q{xor_key} = '';
$q{rc4_key} = '';
$q{md5_ctxt} = '';
$q{block} = 0;
$q{dont_decrypt_next_record} = 0;
return \%q;
}
sub QueryNext {
my ( $q ) = @_;
if ( $q->{streamPos} + 4 >= $q->{streamLen} ) {
return 0;
}
my $data = substr( $q->{stream}, $q->{streamPos}, 4 );
( $q->{opcode}, $q->{length} ) = unpack( 'v2', $data );
# No biff record should be larger than around 20,000.
if ( $q->{length} >= 20000 ) {
return 0;
}
if ( $q->{length} > 0 ) {
$q->{data} = substr( $q->{stream}, $q->{streamPos} + 4, $q->{length} );
}
else {
$q->{data} = undef;
$q->{dont_decrypt_next_record} = 1;
}
if ( $q->{encryption} == MS_BIFF_CRYPTO_RC4 ) {
if ( $q->{dont_decrypt_next_record} ) {
SkipBytes( $q, $q->{streamPos}, 4 + $q->{length} );
$q->{dont_decrypt_next_record} = 0;
}
else {
my $pos = $q->{streamPos};
my $data = $q->{data};
my $len = $q->{length};
my $res = '';
# Pretend to decrypt header.
SkipBytes( $q, $pos, 4 );
$pos += 4;
while ( $q->{block} != int( ( $pos + $len ) / REKEY_BLOCK ) ) {
my $step = REKEY_BLOCK - ( $pos % REKEY_BLOCK );
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $step ) );
$data = substr( $data, $step );
$pos += $step;
$len -= $step;
MakeKey( ++$q->{block}, \$q->{rc4_key}, $q->{md5_ctxt} );
}
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $len ) );
$q->{data} = $res;
}
}
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_XOR ) {
# not implemented
return 0;
}
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_NONE ) {
}
$q->{streamPos} += 4 + $q->{length};
return 1;
}
###############################################################################
#
# Parse()
#
# Parse the Excel file and convert it into a tree of objects..
#
sub parse {
my ( $self, $source, $formatter ) = @_;
my $workbook = Spreadsheet::ParseExcel::Workbook->new();
$currentbook = $workbook;
$workbook->{SheetCount} = 0;
$workbook->{CellHandler} = $self->{CellHandler};
$workbook->{NotSetCell} = $self->{NotSetCell};
$workbook->{Object} = $self->{Object};
$workbook->{aColor} = [ @aColor ];
my ( $biff_data, $data_length ) = $self->_get_content( $source, $workbook );
return undef if not $biff_data;
if ( $formatter ) {
$workbook->{FmtClass} = $formatter;
}
else {
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new();
}
# Parse the BIFF data.
my $stream = InitStream( $biff_data );
while ( QueryNext( $stream ) ) {
my $record = $stream->{opcode};
my $record_length = $stream->{length};
my $record_header = $stream->{data};
# If the file contains a FILEPASS record we assume that it is encrypted
# and cannot be parsed.
if ( $record == 0x002F ) {
unless ( SetDecrypt( $stream, '', $self->{Password} ) ) {
$self->{_error_status} = ErrorFileEncrypted;
return undef;
}
}
# Special case of a formula String with no string.
if ( $workbook->{_PrevPos}
&& ( defined $self->{FuncTbl}->{$record} )
&& ( $record != 0x207 ) )
{
my $iPos = $workbook->{_PrevPos};
$workbook->{_PrevPos} = undef;
my ( $row, $col, $format_index ) = @$iPos;
_NewCell(
$workbook, $row, $col,
Kind => 'Formula String',
Val => '',
FormatNo => $format_index,
Format => $workbook->{Format}[$format_index],
Numeric => 0,
Code => undef,
Book => $workbook,
);
}
# If the BIFF record matches 0x0*09 then it is a BOF record.
# We reset the _skip_chart flag to ensure we check the sheet type.
if ( ( $record & 0xF0FF ) == 0x09 ) {
$workbook->{_skip_chart} = 0;
}
if ( defined $self->{FuncTbl}->{$record} && !$workbook->{_skip_chart} )
{
$self->{FuncTbl}->{$record}
->( $workbook, $record, $record_length, $record_header );
}
$PREFUNC = $record if ( $record != 0x3C ); #Not Continue
last if defined $workbook->{_ParseAbort};
}
foreach my $worksheet (@{$workbook->{Worksheet}} ) {
# Install hyperlinks into each cell
# Range is undocumented for user; allows reuse of data
if ($worksheet->{HyperLinks}) {
foreach my $link (@{$worksheet->{HyperLinks}}) {
for( my $row = $link->[3]; $row <= $link->[4]; $row++ ) {
for( my $col = $link->[5]; $col <= $link->[6]; $col++ ) {
$worksheet->{Cells}[$row][$col]{Hyperlink} = $link;
}
}
}
}
}
return $workbook;
}
###############################################################################
#
# _get_content()
#
# Get the Excel BIFF content from the file or filehandle.
#
sub _get_content {
my ( $self, $source, $workbook ) = @_;
my ( $biff_data, $data_length );
# Reset the error status in case method is called more than once.
$self->{_error_status} = ErrorNone;
my $ref = ref($source);
if ( $ref ) {
if ( $ref eq 'SCALAR' ) {
# Specified by a scalar buffer.
( $biff_data, $data_length ) = $self->{GetContent}->( $source );
}
elsif ( $ref eq 'ARRAY' ) {
# Specified by file content
$workbook->{File} = undef;
my $sData = join( '', @$source );
( $biff_data, $data_length ) = $self->{GetContent}->( \$sData );
}
else {
# Assume filehandle
# For CGI.pm (Light FileHandle)
my $sBuff = '';
if ( eval { binmode( $source ) } ) {
my $sWk;
while ( read( $source, $sWk, 4096 ) ) {
$sBuff .= $sWk;
}
}
else {
# Assume IO::Wrap or some other filehandle-like OO-only object
my $sWk;
# IO::Wrap does not implement binmode
eval { $source->binmode() };
while ( $source->read( $sWk, 4096 ) ) {
$sBuff .= $sWk;
}
}
( $biff_data, $data_length ) = $self->{GetContent}->( \$sBuff );
}
}
else {
# Specified by filename .
$workbook->{File} = $source;
if ( !-e $source ) {
$self->{_error_status} = ErrorNoFile;
return undef;
}
( $biff_data, $data_length ) = $self->{GetContent}->( $source );
}
# If the read was successful return the data.
if ( $data_length ) {
return ( $biff_data, $data_length );
}
else {
$self->{_error_status} = ErrorNoExcelData;
return undef;
}
}
#------------------------------------------------------------------------------
# _subGetContent (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _subGetContent {
my ( $sFile ) = @_;
my $oOl = OLE::Storage_Lite->new( $sFile );
return ( undef, undef ) unless ( $oOl );
my @aRes = $oOl->getPpsSearch(
[
OLE::Storage_Lite::Asc2Ucs( 'Book' ),
OLE::Storage_Lite::Asc2Ucs( 'Workbook' )
],
1, 1
);
return ( undef, undef ) if ( $#aRes < 0 );
#Hack from Herbert
if ( $aRes[0]->{Data} ) {
return ( $aRes[0]->{Data}, length( $aRes[0]->{Data} ) );
}
#Same as OLE::Storage_Lite
my $oIo;
#1. $sFile is Ref of scalar
if ( ref( $sFile ) eq 'SCALAR' ) {
if ( $_use_perlio ) {
open $oIo, "<", \$sFile;
}
else {
$oIo = IO::Scalar->new;
$oIo->open( $sFile );
}
}
#2. $sFile is a IO::Handle object
elsif ( UNIVERSAL::isa( $sFile, 'IO::Handle' ) ) {
$oIo = $sFile;
binmode( $oIo );
}
#3. $sFile is a simple filename string
elsif ( !ref( $sFile ) ) {
$oIo = IO::File->new;
$oIo->open( "<$sFile" ) || return undef;
binmode( $oIo );
}
my $sWk;
my $sBuff = '';
while ( $oIo->read( $sWk, 4096 ) ) { #4_096 has no special meanings
$sBuff .= $sWk;
}
$oIo->close();
#Not Excel file (simple method)
return ( undef, undef ) if ( substr( $sBuff, 0, 1 ) ne "\x09" );
return ( $sBuff, length( $sBuff ) );
}
#------------------------------------------------------------------------------
# _subBOF (for Spreadsheet::ParseExcel) Developers' Kit : P303
#------------------------------------------------------------------------------
sub _subBOF {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iVer, $iDt ) = unpack( "v2", $sWk );
#Workbook Global
if ( $iDt == 0x0005 ) {
$oBook->{Version} = unpack( "v", $sWk );
$oBook->{BIFFVersion} =
( $oBook->{Version} == verExcel95 ) ? verBIFF5 : verBIFF8;
$oBook->{_CurSheet} = undef;
$oBook->{_CurSheet_} = -1;
}
#Worksheet or Dialogsheet
elsif ( $iDt != 0x0020 ) { #if($iDt == 0x0010)
if ( defined $oBook->{_CurSheet_} ) {
$oBook->{_CurSheet} = $oBook->{_CurSheet_} + 1;
$oBook->{_CurSheet_}++;
(
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetVersion},
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetType},
)
= unpack( "v2", $sWk )
if ( length( $sWk ) > 4 );
}
else {
$oBook->{BIFFVersion} = int( $bOp / 0x100 );
if ( ( $oBook->{BIFFVersion} == verBIFF2 )
|| ( $oBook->{BIFFVersion} == verBIFF3 )
|| ( $oBook->{BIFFVersion} == verBIFF4 ) )
{
$oBook->{Version} = $oBook->{BIFFVersion};
$oBook->{_CurSheet} = 0;
$oBook->{Worksheet}[ $oBook->{SheetCount} ] =
Spreadsheet::ParseExcel::Worksheet->new(
_Name => '',
Name => '',
_Book => $oBook,
_SheetNo => $oBook->{SheetCount},
);
$oBook->{SheetCount}++;
}
}
}
else {
# Set flag to ignore all chart records until we reach another BOF.
$oBook->{_skip_chart} = 1;
}
}
#------------------------------------------------------------------------------
# _subBlank (for Spreadsheet::ParseExcel) DK:P303
#------------------------------------------------------------------------------
sub _subBlank {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
_NewCell(
$oBook, $iR, $iC,
Kind => 'BLANK',
Val => '',
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subInteger (for Spreadsheet::ParseExcel) Not in DK
#------------------------------------------------------------------------------
sub _subInteger {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF, $sTxt, $sDum );
( $iR, $iC, $iF, $sDum, $sTxt ) = unpack( "v3cv", $sWk );
_NewCell(
$oBook, $iR, $iC,
Kind => 'INTEGER',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subNumber (for Spreadsheet::ParseExcel) : DK: P354
#------------------------------------------------------------------------------
sub _subNumber {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my $dVal = _convDval( substr( $sWk, 6, 8 ) );
_NewCell(
$oBook, $iR, $iC,
Kind => 'Number',
Val => $dVal,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 1,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _convDval (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _convDval {
my ( $sWk ) = @_;
return
unpack( "d",
( $BIGENDIAN ) ? pack( "c8", reverse( unpack( "c8", $sWk ) ) ) : $sWk );
}
#------------------------------------------------------------------------------
# _subRString (for Spreadsheet::ParseExcel) DK:P405
#------------------------------------------------------------------------------
sub _subRString {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF, $iL, $sTxt );
( $iR, $iC, $iF, $iL ) = unpack( "v4", $sWk );
$sTxt = substr( $sWk, 8, $iL );
#Has STRUN
if ( length( $sWk ) > ( 8 + $iL ) ) {
_NewCell(
$oBook, $iR, $iC,
Kind => 'RString',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => '_native_', #undef,
Book => $oBook,
Rich => substr( $sWk, ( 8 + $iL ) + 1 ),
);
}
else {
_NewCell(
$oBook, $iR, $iC,
Kind => 'RString',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => '_native_',
Book => $oBook,
);
}
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subBoolErr (for Spreadsheet::ParseExcel) DK:P306
#------------------------------------------------------------------------------
sub _subBoolErr {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my ( $iVal, $iFlg ) = unpack( "cc", substr( $sWk, 6, 2 ) );
my $sTxt = DecodeBoolErr( $iVal, $iFlg );
_NewCell(
$oBook, $iR, $iC,
Kind => 'BoolError',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
###############################################################################
#
# _subRK()
#
# Decode the RK BIFF record.
#
sub _subRK {
my ( $workbook, $biff_number, $length, $data ) = @_;
my ( $row, $col, $format_index, $rk_number ) = unpack( 'vvvV', $data );
my $number = _decode_rk_number( $rk_number );
_NewCell(
$workbook, $row, $col,
Kind => 'RK',
Val => $number,
FormatNo => $format_index,
Format => $workbook->{Format}->[$format_index],
Numeric => 1,
Code => undef,
Book => $workbook,
);
# Store the max and min row/col values.
_SetDimension( $workbook, $row, $col, $col );
}
#------------------------------------------------------------------------------
# _subArray (for Spreadsheet::ParseExcel) DK:P297
#------------------------------------------------------------------------------
sub _subArray {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iBR, $iER, $iBC, $iEC ) = unpack( "v2c2", $sWk );
}
#------------------------------------------------------------------------------
# _subFormula (for Spreadsheet::ParseExcel) DK:P336
#------------------------------------------------------------------------------
sub _subFormula {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my ( $iFlg ) = unpack( "v", substr( $sWk, 12, 2 ) );
if ( $iFlg == 0xFFFF ) {
my ( $iKind ) = unpack( "c", substr( $sWk, 6, 1 ) );
my ( $iVal ) = unpack( "c", substr( $sWk, 8, 1 ) );
if ( ( $iKind == 1 ) or ( $iKind == 2 ) ) {
my $sTxt =
( $iKind == 1 )
? DecodeBoolErr( $iVal, 0 )
: DecodeBoolErr( $iVal, 1 );
_NewCell(
$oBook, $iR, $iC,
Kind => 'Formula Bool',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
}
else { # Result (Reserve Only)
$oBook->{_PrevPos} = [ $iR, $iC, $iF ];
}
}
else {
my $dVal = _convDval( substr( $sWk, 6, 8 ) );
_NewCell(
$oBook, $iR, $iC,
Kind => 'Formula Number',
Val => $dVal,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 1,
Code => undef,
Book => $oBook,
);
}
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subString (for Spreadsheet::ParseExcel) DK:P414
#------------------------------------------------------------------------------
sub _subString {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
#Position (not enough for ARRAY)
my $iPos = $oBook->{_PrevPos};
return undef unless ( $iPos );
$oBook->{_PrevPos} = undef;
my ( $iR, $iC, $iF ) = @$iPos;
my ( $iLen, $sTxt, $sCode );
if ( $oBook->{BIFFVersion} == verBIFF8 ) {
my ( $raBuff, $iLen ) = _convBIFF8String( $oBook, $sWk, 1 );
$sTxt = $raBuff->[0];
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef;
}
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) {
$sCode = '_native_';
$iLen = unpack( "v", $sWk );
$sTxt = substr( $sWk, 2, $iLen );
}
else {
$sCode = '_native_';
$iLen = unpack( "c", $sWk );
$sTxt = substr( $sWk, 1, $iLen );
}
_NewCell(
$oBook, $iR, $iC,
Kind => 'String',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => $sCode,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subLabel (for Spreadsheet::ParseExcel) DK:P344
#------------------------------------------------------------------------------
sub _subLabel {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my ( $sLbl, $sCode );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
my ( $raBuff, $iLen, $iStPos, $iLenS ) =
_convBIFF8String( $oBook, substr( $sWk, 6 ), 1 );
$sLbl = $raBuff->[0];
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef;
}
#Before BIFF8
else {
$sLbl = substr( $sWk, 8 );
$sCode = '_native_';
}
_NewCell(
$oBook, $iR, $iC,
Kind => 'Label',
Val => $sLbl,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => $sCode,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
###############################################################################
#
# _subMulRK()
#
# Decode the Multiple RK BIFF record.
#
sub _subMulRK {
my ( $workbook, $biff_number, $length, $data ) = @_;
# JMN: I don't know why this is here.
return if $workbook->{SheetCount} <= 0;
my ( $row, $first_col ) = unpack( "v2", $data );
my $last_col = unpack( "v", substr( $data, length( $data ) - 2, 2 ) );
# Iterate over the RK array and decode the data.
my $pos = 4;
for my $col ( $first_col .. $last_col ) {
my $data = substr( $data, $pos, 6 );
my ( $format_index, $rk_number ) = unpack 'vV', $data;
my $number = _decode_rk_number( $rk_number );
_NewCell(
$workbook, $row, $col,
Kind => 'MulRK',
Val => $number,
FormatNo => $format_index,
Format => $workbook->{Format}->[$format_index],
Numeric => 1,
Code => undef,
Book => $workbook,
);
$pos += 6;
}
# Store the max and min row/col values.
_SetDimension( $workbook, $row, $first_col, $last_col );
}
#------------------------------------------------------------------------------
# _subMulBlank (for Spreadsheet::ParseExcel) DK:P349
#------------------------------------------------------------------------------
sub _subMulBlank {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iSc ) = unpack( "v2", $sWk );
my $iEc = unpack( "v", substr( $sWk, length( $sWk ) - 2, 2 ) );
my $iPos = 4;
for ( my $iC = $iSc ; $iC <= $iEc ; $iC++ ) {
my $iF = unpack( 'v', substr( $sWk, $iPos, 2 ) );
_NewCell(
$oBook, $iR, $iC,
Kind => 'MulBlank',
Val => '',
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
$iPos += 2;
}
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iSc, $iEc );
}
#------------------------------------------------------------------------------
# _subLabelSST (for Spreadsheet::ParseExcel) DK: P345
#------------------------------------------------------------------------------
sub _subLabelSST {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF, $iIdx ) = unpack( 'v3V', $sWk );
_NewCell(
$oBook, $iR, $iC,
Kind => 'PackedIdx',
Val => $oBook->{PkgStr}[$iIdx]->{Text},
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => ( $oBook->{PkgStr}[$iIdx]->{Unicode} ) ? 'ucs2' : undef,
Book => $oBook,
Rich => $oBook->{PkgStr}[$iIdx]->{Rich},
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subFlg1904 (for Spreadsheet::ParseExcel) DK:P296
#------------------------------------------------------------------------------
sub _subFlg1904 {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
$oBook->{Flg1904} = unpack( "v", $sWk );
}
#------------------------------------------------------------------------------
# _subRow (for Spreadsheet::ParseExcel) DK:P403
#------------------------------------------------------------------------------
sub _subRow {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
#0. Get Worksheet info (MaxRow, MaxCol, MinRow, MinCol)
my ( $iR, $iSc, $iEc, $iHght, $undef1, $undef2, $iGr, $iXf ) =
unpack( "v8", $sWk );
$iEc--;
if ( $iGr & 0x20 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHidden}[$iR] = 1;
}
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHeight}[$iR] = $iHght / 20;
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iSc, $iEc );
}
#------------------------------------------------------------------------------
# _SetDimension (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _SetDimension {
my ( $oBook, $iR, $iSc, $iEc ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
#2.MaxRow, MaxCol, MinRow, MinCol
#2.1 MinRow
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} = $iR
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} <= $iR );
#2.2 MaxRow
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} = $iR
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} > $iR );
#2.3 MinCol
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} = $iSc
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} <= $iSc );
#2.4 MaxCol
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} = $iEc
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} > $iEc );
}
#------------------------------------------------------------------------------
# _subDefaultRowHeight (for Spreadsheet::ParseExcel) DK: P318
#------------------------------------------------------------------------------
sub _subDefaultRowHeight {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
#1. RowHeight
my ( $iDum, $iHght ) = unpack( "v2", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{DefRowHeight} = $iHght / 20;
}
#------------------------------------------------------------------------------
# _subStandardWidth(for Spreadsheet::ParseExcel) DK:P413
#------------------------------------------------------------------------------
sub _subStandardWidth {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my $iW = unpack( "v", $sWk );
$oBook->{StandardWidth} = _convert_col_width( $oBook, $iW );
}
###############################################################################
#
# _subDefColWidth()
#
# Read the DEFCOLWIDTH Biff record. This gives the width in terms of chars
# and is different from the width in the COLINFO record.
#
sub _subDefColWidth {
my ( $self, $record, $length, $data ) = @_;
my $width = unpack 'v', $data;
# Adjustment for default Arial 10 width.
$width = 8.43 if $width == 8;
$self->{Worksheet}->[ $self->{_CurSheet} ]->{DefColWidth} = $width;
}
###############################################################################
#
# _convert_col_width()
#
# Converts from the internal Excel column width units to user units seen in the
# interface. It is first necessary to convert the internal width to pixels and
# then to user units. The conversion is specific to a default font of Arial 10.
# TODO, the conversion should be extended to other fonts and sizes.
#
sub _convert_col_width {
my $self = shift;
my $excel_width = shift;
# Convert from Excel units to pixels (rounded up).
my $pixels = int( 0.5 + $excel_width * 7 / 256 );
# Convert from pixels to user units.
# The conversion is different for columns <= 1 user unit (12 pixels).
my $user_width;
if ( $pixels <= 12 ) {
$user_width = $pixels / 12;
}
else {
$user_width = ( $pixels - 5 ) / 7;
}
# Round up to 2 decimal places.
$user_width = int( $user_width * 100 + 0.5 ) / 100;
return $user_width;
}
#------------------------------------------------------------------------------
# _subColInfo (for Spreadsheet::ParseExcel) DK:P309
#------------------------------------------------------------------------------
sub _subColInfo {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless defined $oBook->{_CurSheet};
my ( $iSc, $iEc, $iW, $iXF, $iGr ) = unpack( "v5", $sWk );
for ( my $i = $iSc ; $i <= $iEc ; $i++ ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColWidth}[$i] =
_convert_col_width( $oBook, $iW );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColFmtNo}[$i] = $iXF;
if ( $iGr & 0x01 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColHidden}[$i] = 1;
}
}
}
#------------------------------------------------------------------------------
# _subWindow1 Window information P 273
#------------------------------------------------------------------------------
sub _subWindow1 {
my ( $workbook, $op, $len, $wk ) = @_;
return if ( $workbook->{BIFFVersion} <= verBIFF4() );
my (
$hpos, $vpos, $width,
$height, $options, $active,
$firsttab, $numselected, $tabbarwidth
) = unpack( "v9", $wk );
$workbook->{ActiveSheet} = $active;
}
#------------------------------------------------------------------------------
# _subSheetLayout OpenOffice 5.96 (P207)
#------------------------------------------------------------------------------
sub _subSheetLayout {
my ( $workbook, $op, $len, $wk ) = @_;
my @unused;
(
my $rc,
@unused[ 1 .. 10 ],
@unused[ 11 .. 14 ],
my $color, @unused[ 15, 16 ]
) = unpack( "vC10C4vC2", $wk );
return unless ( $rc == 0x0862 );
$workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{TabColor} = $color;
}
#------------------------------------------------------------------------------
# _subHyperlink OpenOffice 5.96 (P182)
#
# Also see: http://msdn.microsoft.com/en-us/library/gg615407(v=office.14).aspx
#------------------------------------------------------------------------------
# Helper: Extract a GID, returns as text string
sub _getguid {
my( $wk ) = @_;
my( $text, $guidl, $guids1, $guids2, @guidb );
( $guidl, $guids1, $guids2, @guidb[0..7] ) = unpack( 'Vv2C8', $wk );
$text = sprintf( '%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X', $guidl, $guids1, $guids2, @guidb);
return $text;
}
# Helper: Extract a counted (16-bit) unicode string, returns string,
# updates $offset
# $zterm == 1 if string is null-terminated.
# $bc if length is in bytes (not chars)
sub _getustr {
my( $wk, $offset, $zterm, $bc ) = @_;
my $len = unpack( 'V', substr( $wk, $offset ) );
$offset += 4;
if( $bc ) {
$len /= 2;
}
$len -= $zterm;
my $text = join( '', map { chr $_ } unpack( "v$len", substr( $wk, $offset ) ) );
$text =~ s/\0.*\z// if( $zterm );
$_[1] = ( $offset += ($len + $zterm) *2 );
return $text;
}
# HYPERLINK record
sub _subHyperlink {
my ( $workbook, $op, $len, $wk ) = @_;
# REF
my( $srow, $erow, $scol, $ecol ) = unpack( 'v4', $wk );
my $guid = _getguid( substr( $wk, 8 ) );
return unless( $guid eq '79EAC9D0-BAF9-11CE-8C82-00AA004BA90B' );
my( $stmvers, $flags ) = unpack( 'VV', substr( $wk, 24 ) );
return if( $flags & 0x60 || $stmvers != 2 );
my $offset = 32;
my( $desc,$frame, $link, $mark );
if( ($flags & 0x14) == 0x14 ) {
$desc = _getustr( $wk, $offset, 1, 0 );
}
if( $flags & 0x80 ) {
$frame = _getustr( $wk, $offset, 1, 0 );
}
$link = '';
if( $flags & 0x100 ) {
# UNC path
$link = 'file:///' . _getustr( $wk, $offset, 1, 0 );
} elsif( $flags & 0x1 ) {
# Has link (URI)
$guid = _getguid( substr( $wk, $offset ) );
$offset += 16;
if( $guid eq '79EAC9E0-BAF9-11CE-8C82-00AA004BA90B' ) {
# URI
$link = _getustr( $wk, $offset, 1, 1 );
} elsif( $guid eq '00000303-0000-0000-C000-000000000046' ) {
# Local file
$link = 'file:///';
# !($flags & 2) = 'relative path'
if( !($flags & 0x2) ) {
my $file = $workbook->{File};
if( defined $file && length $file ) {
$link .= (fileparse($file))[1];
}
else {
$link .= '%REL%'
}
}
my $dirn = unpack( 'v', substr( $wk, $offset ) );
$offset += 2;
$link .= '..\\' x $dirn;
my $namelen = unpack( 'V', substr( $wk, $offset ) );
$offset += 4;
my $name = unpack( 'Z*', substr( $wk, $offset ) );
$offset += $namelen;
$offset += 24;
my $size = unpack( 'V', substr( $wk, $offset ) );
$offset += 4;
if( $size ) {
my $xlen = unpack( 'V', substr( $wk, $offset ) ) / 2;
$name = join( '', map { chr $_} unpack( "v$xlen", substr( $wk, $offset+4+2) ) );
$offset += $size;
}
$link .= $name;
} else {
return;
}
}
# Text mark (Fragment identifier)
if( $flags & 0x8 ) {
# Cellrefs contain reserved characters, so url-encode
my $fragment = _getustr( $wk, $offset, 1 );
$fragment =~ s/([^\w.~-])/sprintf( '%%%02X', ord( $1 ) )/gems;
$link .= '#' . $fragment;
}
# Update loop at end of parse() if this changes
push @{ $workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{HyperLinks} }, [
$desc, $link, $frame, $srow, $erow, $scol, $ecol ];
}
#------------------------------------------------------------------------------
# _subSST (for Spreadsheet::ParseExcel) DK:P413
#------------------------------------------------------------------------------
sub _subSST {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
_subStrWk( $oBook, substr( $sWk, 8 ) );
}
#------------------------------------------------------------------------------
# _subContinue (for Spreadsheet::ParseExcel) DK:P311
#------------------------------------------------------------------------------
sub _subContinue {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
#if(defined $self->{FuncTbl}->{$bOp}) {
# $self->{FuncTbl}->{$PREFUNC}->($oBook, $bOp, $bLen, $sWk);
#}
_subStrWk( $oBook, $sWk, 1 ) if ( $PREFUNC == 0xFC );
}
#------------------------------------------------------------------------------
# _subWriteAccess (for Spreadsheet::ParseExcel) DK:P451
#------------------------------------------------------------------------------
sub _subWriteAccess {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return if ( defined $oBook->{_Author} );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
$oBook->{Author} = _convBIFF8String( $oBook, $sWk );
}
#Before BIFF8
else {
my ( $iLen ) = unpack( "c", $sWk );
$oBook->{Author} =
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' );
}
}
#------------------------------------------------------------------------------
# _convBIFF8String (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _convBIFF8String {
my ( $oBook, $sWk, $iCnvFlg ) = @_;
my ( $iLen, $iFlg ) = unpack( "vc", $sWk );
my ( $iHigh, $iExt, $iRich ) = ( $iFlg & 0x01, $iFlg & 0x04, $iFlg & 0x08 );
my ( $iStPos, $iExtCnt, $iRichCnt, $sStr );
#2. Rich and Ext
if ( $iRich && $iExt ) {
$iStPos = 9;
( $iRichCnt, $iExtCnt ) = unpack( 'vV', substr( $sWk, 3, 6 ) );
}
elsif ( $iRich ) { #Only Rich
$iStPos = 5;
$iRichCnt = unpack( 'v', substr( $sWk, 3, 2 ) );
$iExtCnt = 0;
}
elsif ( $iExt ) { #Only Ext
$iStPos = 7;
$iRichCnt = 0;
$iExtCnt = unpack( 'V', substr( $sWk, 3, 4 ) );
}
else { #Nothing Special
$iStPos = 3;
$iExtCnt = 0;
$iRichCnt = 0;
}
#3.Get String
if ( $iHigh ) { #Compressed
$iLen *= 2;
$sStr = substr( $sWk, $iStPos, $iLen );
_SwapForUnicode( \$sStr );
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, 'ucs2' )
unless ( $iCnvFlg );
}
else { #Not Compressed
$sStr = substr( $sWk, $iStPos, $iLen );
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, undef ) unless ( $iCnvFlg );
}
#4. return
if ( wantarray ) {
#4.1 Get Rich and Ext
if ( length( $sWk ) < $iStPos + $iLen + $iRichCnt * 4 + $iExtCnt ) {
return (
[ undef, $iHigh, undef, undef ],
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt,
$iStPos, $iLen
);
}
else {
return (
[
$sStr,
$iHigh,
substr( $sWk, $iStPos + $iLen, $iRichCnt * 4 ),
substr( $sWk, $iStPos + $iLen + $iRichCnt * 4, $iExtCnt )
],
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt,
$iStPos, $iLen
);
}
}
else {
return $sStr;
}
}
#------------------------------------------------------------------------------
# _subXF (for Spreadsheet::ParseExcel) DK:P453
#------------------------------------------------------------------------------
sub _subXF {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iFnt, $iIdx );
my (
$iLock, $iHidden, $iStyle, $i123, $iAlH, $iWrap,
$iAlV, $iJustL, $iRotate, $iInd, $iShrink, $iMerge,
$iReadDir, $iBdrD, $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB,
$iBdrSD, $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB, $iBdrCD,
$iFillP, $iFillCF, $iFillCB
);
if ( $oBook->{BIFFVersion} == verBIFF4 ) {
# Minimal support for Excel 4. We just get the font and format indices
# so that the cell data value can be formatted.
( $iFnt, $iIdx, ) = unpack( "CC", $sWk );
}
elsif ( $oBook->{BIFFVersion} == verBIFF8 ) {
my ( $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn );
( $iFnt, $iIdx, $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn )
= unpack( "v7Vv", $sWk );
$iLock = ( $iGen & 0x01 ) ? 1 : 0;
$iHidden = ( $iGen & 0x02 ) ? 1 : 0;
$iStyle = ( $iGen & 0x04 ) ? 1 : 0;
$i123 = ( $iGen & 0x08 ) ? 1 : 0;
$iAlH = ( $iAlign & 0x07 );
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0;
$iAlV = ( $iAlign & 0x70 ) / 0x10;
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0;
$iRotate = ( ( $iAlign & 0xFF00 ) / 0x100 ) & 0x00FF;
$iRotate = 90 if ( $iRotate == 255 );
$iRotate = 90 - $iRotate if ( $iRotate > 90 );
$iInd = ( $iGen2 & 0x0F );
$iShrink = ( $iGen2 & 0x10 ) ? 1 : 0;
$iMerge = ( $iGen2 & 0x20 ) ? 1 : 0;
$iReadDir = ( ( $iGen2 & 0xC0 ) / 0x40 ) & 0x03;
$iBdrSL = $iBdr1 & 0x0F;
$iBdrSR = ( ( $iBdr1 & 0xF0 ) / 0x10 ) & 0x0F;
$iBdrST = ( ( $iBdr1 & 0xF00 ) / 0x100 ) & 0x0F;
$iBdrSB = ( ( $iBdr1 & 0xF000 ) / 0x1000 ) & 0x0F;
$iBdrCL = ( ( $iBdr2 & 0x7F ) ) & 0x7F;
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F;
$iBdrD = ( ( $iBdr2 & 0xC000 ) / 0x4000 ) & 0x3;
$iBdrCT = ( ( $iBdr3 & 0x7F ) ) & 0x7F;
$iBdrCB = ( ( $iBdr3 & 0x3F80 ) / 0x80 ) & 0x7F;
$iBdrCD = ( ( $iBdr3 & 0x1FC000 ) / 0x4000 ) & 0x7F;
$iBdrSD = ( ( $iBdr3 & 0x1E00000 ) / 0x200000 ) & 0xF;
$iFillP = ( ( $iBdr3 & 0xFC000000 ) / 0x4000000 ) & 0x3F;
$iFillCF = ( $iPtn & 0x7F );
$iFillCB = ( ( $iPtn & 0x3F80 ) / 0x80 ) & 0x7F;
}
else {
my ( $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 );
( $iFnt, $iIdx, $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 ) =
unpack( "v8", $sWk );
$iLock = ( $iGen & 0x01 ) ? 1 : 0;
$iHidden = ( $iGen & 0x02 ) ? 1 : 0;
$iStyle = ( $iGen & 0x04 ) ? 1 : 0;
$i123 = ( $iGen & 0x08 ) ? 1 : 0;
$iAlH = ( $iAlign & 0x07 );
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0;
$iAlV = ( $iAlign & 0x70 ) / 0x10;
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0;
$iRotate = ( ( $iAlign & 0x300 ) / 0x100 ) & 0x3;
$iFillCF = ( $iPtn & 0x7F );
$iFillCB = ( ( $iPtn & 0x1F80 ) / 0x80 ) & 0x7F;
$iFillP = ( $iPtn2 & 0x3F );
$iBdrSB = ( ( $iPtn2 & 0x1C0 ) / 0x40 ) & 0x7;
$iBdrCB = ( ( $iPtn2 & 0xFE00 ) / 0x200 ) & 0x7F;
$iBdrST = ( $iBdr1 & 0x07 );
$iBdrSL = ( ( $iBdr1 & 0x38 ) / 0x8 ) & 0x07;
$iBdrSR = ( ( $iBdr1 & 0x1C0 ) / 0x40 ) & 0x07;
$iBdrCT = ( ( $iBdr1 & 0xFE00 ) / 0x200 ) & 0x7F;
$iBdrCL = ( $iBdr2 & 0x7F ) & 0x7F;
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F;
}
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new(
FontNo => $iFnt,
Font => $oBook->{Font}[$iFnt],
FmtIdx => $iIdx,
Lock => $iLock,
Hidden => $iHidden,
Style => $iStyle,
Key123 => $i123,
AlignH => $iAlH,
Wrap => $iWrap,
AlignV => $iAlV,
JustLast => $iJustL,
Rotate => $iRotate,
Indent => $iInd,
Shrink => $iShrink,
Merge => $iMerge,
ReadDir => $iReadDir,
BdrStyle => [ $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB ],
BdrColor => [ $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB ],
BdrDiag => [ $iBdrD, $iBdrSD, $iBdrCD ],
Fill => [ $iFillP, $iFillCF, $iFillCB ],
);
}
#------------------------------------------------------------------------------
# _subFormat (for Spreadsheet::ParseExcel) DK: P336
#------------------------------------------------------------------------------
sub _subFormat {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my $sFmt;
if ( $oBook->{BIFFVersion} <= verBIFF5 ) {
$sFmt = substr( $sWk, 3, unpack( 'c', substr( $sWk, 2, 1 ) ) );
$sFmt = $oBook->{FmtClass}->TextFmt( $sFmt, '_native_' );
}
else {
$sFmt = _convBIFF8String( $oBook, substr( $sWk, 2 ) );
}
my $format_index = unpack( 'v', substr( $sWk, 0, 2 ) );
# Excel 4 and earlier used an index of 0 to indicate that a built-in format
# that was stored implicitly.
if ( $oBook->{BIFFVersion} <= verBIFF4 && $format_index == 0 ) {
$format_index = keys %{ $oBook->{FormatStr} };
}
$oBook->{FormatStr}->{$format_index} = $sFmt;
}
#------------------------------------------------------------------------------
# _subPalette (for Spreadsheet::ParseExcel) DK: P393
#------------------------------------------------------------------------------
sub _subPalette {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
for ( my $i = 0 ; $i < unpack( 'v', $sWk ) ; $i++ ) {
# push @aColor, unpack('H6', substr($sWk, $i*4+2));
$oBook->{aColor}[ $i + 8 ] = unpack( 'H6', substr( $sWk, $i * 4 + 2 ) );
}
}
#------------------------------------------------------------------------------
# _subFont (for Spreadsheet::ParseExcel) DK:P333
#------------------------------------------------------------------------------
sub _subFont {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline, $sFntName );
my ( $bBold, $bItalic, $bUnderline, $bStrikeout );
if ( $oBook->{BIFFVersion} == verBIFF8 ) {
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) =
unpack( "v5c", $sWk );
my ( $iSize, $iHigh ) = unpack( 'cc', substr( $sWk, 14, 2 ) );
if ( $iHigh ) {
$sFntName = substr( $sWk, 16, $iSize * 2 );
_SwapForUnicode( \$sFntName );
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, 'ucs2' );
}
else {
$sFntName = substr( $sWk, 16, $iSize );
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, '_native_' );
}
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0;
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0;
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0;
$bUnderline = ( $iUnderline ) ? 1 : 0;
}
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) {
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) =
unpack( "v5c", $sWk );
$sFntName =
$oBook->{FmtClass}
->TextFmt( substr( $sWk, 15, unpack( "c", substr( $sWk, 14, 1 ) ) ),
'_native_' );
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0;
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0;
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0;
$bUnderline = ( $iUnderline ) ? 1 : 0;
}
else {
( $iHeight, $iAttr ) = unpack( "v2", $sWk );
$iCIdx = undef;
$iSuper = 0;
$bBold = ( $iAttr & 0x01 ) ? 1 : 0;
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0;
$bUnderline = ( $iAttr & 0x04 ) ? 1 : 0;
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0;
$sFntName = substr( $sWk, 5, unpack( "c", substr( $sWk, 4, 1 ) ) );
}
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new(
Height => $iHeight / 20.0,
Attr => $iAttr,
Color => $iCIdx,
Super => $iSuper,
UnderlineStyle => $iUnderline,
Name => $sFntName,
Bold => $bBold,
Italic => $bItalic,
Underline => $bUnderline,
Strikeout => $bStrikeout,
);
#Skip Font[4]
push @{ $oBook->{Font} }, {} if ( scalar( @{ $oBook->{Font} } ) == 4 );
}
#------------------------------------------------------------------------------
# _subBoundSheet (for Spreadsheet::ParseExcel): DK: P307
#------------------------------------------------------------------------------
sub _subBoundSheet {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iPos, $iGr, $iKind ) = unpack( "Lc2", $sWk );
$iKind &= 0x0F;
return if ( ( $iKind != 0x00 ) && ( $iKind != 0x01 ) );
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
my ( $iSize, $iUni ) = unpack( "cc", substr( $sWk, 6, 2 ) );
my $sWsName = substr( $sWk, 8 );
if ( $iUni & 0x01 ) {
_SwapForUnicode( \$sWsName );
$sWsName = $oBook->{FmtClass}->TextFmt( $sWsName, 'ucs2' );
}
$oBook->{Worksheet}[ $oBook->{SheetCount} ] =
Spreadsheet::ParseExcel::Worksheet->new(
Name => $sWsName,
Kind => $iKind,
_Pos => $iPos,
_Book => $oBook,
_SheetNo => $oBook->{SheetCount},
SheetHidden => $iGr & 0x03
);
}
else {
$oBook->{Worksheet}[ $oBook->{SheetCount} ] =
Spreadsheet::ParseExcel::Worksheet->new(
Name =>
$oBook->{FmtClass}->TextFmt( substr( $sWk, 7 ), '_native_' ),
Kind => $iKind,
_Pos => $iPos,
_Book => $oBook,
_SheetNo => $oBook->{SheetCount},
SheetHidden => $iGr & 0x03
);
}
$oBook->{SheetCount}++;
}
#------------------------------------------------------------------------------
# _subHeader (for Spreadsheet::ParseExcel) DK: P340
#------------------------------------------------------------------------------
sub _subHeader {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $sW;
if ( !defined $sWk ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = undef;
return;
}
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
$sW = _convBIFF8String( $oBook, $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} =
( $sW eq "\x00" ) ? undef : $sW;
}
#Before BIFF8
else {
my ( $iLen ) = unpack( "c", $sWk );
$sW =
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} =
( $sW eq "\x00\x00\x00" ) ? undef : $sW;
}
}
#------------------------------------------------------------------------------
# _subFooter (for Spreadsheet::ParseExcel) DK: P335
#------------------------------------------------------------------------------
sub _subFooter {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $sW;
if ( !defined $sWk ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = undef;
return;
}
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
$sW = _convBIFF8String( $oBook, $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} =
( $sW eq "\x00" ) ? undef : $sW;
}
#Before BIFF8
else {
my ( $iLen ) = unpack( "c", $sWk );
$sW =
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} =
( $sW eq "\x00\x00\x00" ) ? undef : $sW;
}
}
#------------------------------------------------------------------------------
# _subHPageBreak (for Spreadsheet::ParseExcel) DK: P341
#------------------------------------------------------------------------------
sub _subHPageBreak {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my @aBreak;
my $iCnt = unpack( "v", $sWk );
return undef unless ( defined $oBook->{_CurSheet} );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iRow, $iColB, $iColE ) =
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) );
# push @aBreak, [$iRow, $iColB, $iColE];
push @aBreak, $iRow;
}
}
#Before BIFF8
else {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iRow ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) );
push @aBreak, $iRow;
# push @aBreak, [$iRow, 0, 255];
}
}
@aBreak = sort { $a <=> $b } @aBreak;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HPageBreak} = \@aBreak;
}
#------------------------------------------------------------------------------
# _subVPageBreak (for Spreadsheet::ParseExcel) DK: P447
#------------------------------------------------------------------------------
sub _subVPageBreak {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my @aBreak;
my $iCnt = unpack( "v", $sWk );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iCol, $iRowB, $iRowE ) =
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) );
push @aBreak, $iCol;
# push @aBreak, [$iCol, $iRowB, $iRowE];
}
}
#Before BIFF8
else {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iCol ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) );
push @aBreak, $iCol;
# push @aBreak, [$iCol, 0, 65535];
}
}
@aBreak = sort { $a <=> $b } @aBreak;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VPageBreak} = \@aBreak;
}
#------------------------------------------------------------------------------
# _subMargin (for Spreadsheet::ParseExcel) DK: P306, 345, 400, 440
#------------------------------------------------------------------------------
sub _subMargin {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
# The "Mergin" options are a workaround for a backward compatible typo.
my $dWk = _convDval( substr( $sWk, 0, 8 ) );
if ( $bOp == 0x26 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMargin} = $dWk;
}
elsif ( $bOp == 0x27 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMargin} = $dWk;
}
elsif ( $bOp == 0x28 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMargin} = $dWk;
}
elsif ( $bOp == 0x29 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMargin} = $dWk;
}
}
#------------------------------------------------------------------------------
# _subHcenter (for Spreadsheet::ParseExcel) DK: P340
#------------------------------------------------------------------------------
sub _subHcenter {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HCenter} = $iWk;
}
#------------------------------------------------------------------------------
# _subVcenter (for Spreadsheet::ParseExcel) DK: P447
#------------------------------------------------------------------------------
sub _subVcenter {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VCenter} = $iWk;
}
#------------------------------------------------------------------------------
# _subPrintGridlines (for Spreadsheet::ParseExcel) DK: P397
#------------------------------------------------------------------------------
sub _subPrintGridlines {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintGrid} = $iWk;
}
#------------------------------------------------------------------------------
# _subPrintHeaders (for Spreadsheet::ParseExcel) DK: P397
#------------------------------------------------------------------------------
sub _subPrintHeaders {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintHeaders} = $iWk;
}
#------------------------------------------------------------------------------
# _subSETUP (for Spreadsheet::ParseExcel) DK: P409
#------------------------------------------------------------------------------
sub _subSETUP {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
# Workaround for some apps and older Excels that don't write a
# complete SETUP record.
return undef if $bLen != 34;
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ];
my $iGrBit;
(
$oWkS->{PaperSize}, $oWkS->{Scale}, $oWkS->{PageStart},
$oWkS->{FitWidth}, $oWkS->{FitHeight}, $iGrBit,
$oWkS->{Res}, $oWkS->{VRes},
) = unpack( 'v8', $sWk );
$oWkS->{HeaderMargin} = _convDval( substr( $sWk, 16, 8 ) );
$oWkS->{FooterMargin} = _convDval( substr( $sWk, 24, 8 ) );
$oWkS->{Copis} = unpack( 'v2', substr( $sWk, 32, 2 ) );
$oWkS->{LeftToRight} = ( ( $iGrBit & 0x01 ) ? 1 : 0 );
$oWkS->{Landscape} = ( ( $iGrBit & 0x02 ) ? 1 : 0 );
$oWkS->{NoPls} = ( ( $iGrBit & 0x04 ) ? 1 : 0 );
$oWkS->{NoColor} = ( ( $iGrBit & 0x08 ) ? 1 : 0 );
$oWkS->{Draft} = ( ( $iGrBit & 0x10 ) ? 1 : 0 );
$oWkS->{Notes} = ( ( $iGrBit & 0x20 ) ? 1 : 0 );
$oWkS->{NoOrient} = ( ( $iGrBit & 0x40 ) ? 1 : 0 );
$oWkS->{UsePage} = ( ( $iGrBit & 0x80 ) ? 1 : 0 );
# The NoPls flag indicates that the values have not been taken from an
# actual printer and thus may not be accurate.
# Set default scale if NoPls otherwise it may be an invalid value of 0XFF.
$oWkS->{Scale} = 100 if $oWkS->{NoPls};
# Workaround for a backward compatible typo.
$oWkS->{HeaderMergin} = $oWkS->{HeaderMargin};
$oWkS->{FooterMergin} = $oWkS->{FooterMargin};
}
#------------------------------------------------------------------------------
# _subName (for Spreadsheet::ParseExcel) DK: P350
#------------------------------------------------------------------------------
sub _subName {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my (
$iGrBit, $cKey, $cCh, $iCce, $ixAls,
$iTab, $cchCust, $cchDsc, $cchHep, $cchStatus
) = unpack( 'vc2v3c4', $sWk );
#Builtin Name + Length == 1
if ( ( $iGrBit & 0x20 ) && ( $cCh == 1 ) ) {
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
my $iName = unpack( 'n', substr( $sWk, 14 ) );
my $iSheet = unpack( 'v', substr( $sWk, 8 ) ) - 1;
# Workaround for mal-formed Excel workbooks where Print_Title is
# set as Global (i.e. itab = 0). Note, this will have to be
# treated differently when we get around to handling global names.
return undef if $iSheet == -1;
if ( $iName == 6 ) { #PrintArea
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) );
$oBook->{PrintArea}[$iSheet] = $raArea;
}
elsif ( $iName == 7 ) { #Title
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) );
my @aTtlR = ();
my @aTtlC = ();
foreach my $raI ( @$raArea ) {
if ( $raI->[3] == 0xFF ) { #Row Title
push @aTtlR, [ $raI->[0], $raI->[2] ];
}
else { #Col Title
push @aTtlC, [ $raI->[1], $raI->[3] ];
}
}
$oBook->{PrintTitle}[$iSheet] =
{ Row => \@aTtlR, Column => \@aTtlC };
}
}
else {
my $iName = unpack( 'c', substr( $sWk, 14 ) );
if ( $iName == 6 ) { #PrintArea
my ( $iSheet, $raArea ) =
_ParseNameArea95( substr( $sWk, 15 ) );
$oBook->{PrintArea}[$iSheet] = $raArea;
}
elsif ( $iName == 7 ) { #Title
my ( $iSheet, $raArea ) =
_ParseNameArea95( substr( $sWk, 15 ) );
my @aTtlR = ();
my @aTtlC = ();
foreach my $raI ( @$raArea ) {
if ( $raI->[3] == 0xFF ) { #Row Title
push @aTtlR, [ $raI->[0], $raI->[2] ];
}
else { #Col Title
push @aTtlC, [ $raI->[1], $raI->[3] ];
}
}
$oBook->{PrintTitle}[$iSheet] =
{ Row => \@aTtlR, Column => \@aTtlC };
}
}
}
}
#------------------------------------------------------------------------------
# ParseNameArea (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d)
#------------------------------------------------------------------------------
sub _ParseNameArea {
my ( $sObj ) = @_;
my ( $iOp );
my @aRes = ();
$iOp = unpack( 'C', $sObj );
my $iSheet;
if ( $iOp == 0x3b ) {
my ( $iWkS, $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v5', substr( $sObj, 1 ) );
$iSheet = $iWkS;
push @aRes, [ $iRs, $iCs, $iRe, $iCe ];
}
elsif ( $iOp == 0x29 ) {
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) );
my $iSt = 0;
while ( $iSt < $iLen ) {
my ( $iOpW, $iWkS, $iRs, $iRe, $iCs, $iCe ) =
unpack( 'cv5', substr( $sObj, $iSt + 3, 11 ) );
if ( $iOpW == 0x3b ) {
$iSheet = $iWkS;
push @aRes, [ $iRs, $iCs, $iRe, $iCe ];
}
if ( $iSt == 0 ) {
$iSt += 11;
}
else {
$iSt += 12; #Skip 1 byte;
}
}
}
return ( $iSheet, \@aRes );
}
#------------------------------------------------------------------------------
# ParseNameArea95 (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d)
#------------------------------------------------------------------------------
sub _ParseNameArea95 {
my ( $sObj ) = @_;
my ( $iOp );
my @aRes = ();
$iOp = unpack( 'C', $sObj );
my $iSheet;
if ( $iOp == 0x3b ) {
$iSheet = unpack( 'v', substr( $sObj, 11, 2 ) );
my ( $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v2C2', substr( $sObj, 15, 6 ) );
push @aRes, [ $iRs, $iCs, $iRe, $iCe ];
}
elsif ( $iOp == 0x29 ) {
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) );
my $iSt = 0;
while ( $iSt < $iLen ) {
my $iOpW = unpack( 'c', substr( $sObj, $iSt + 3, 6 ) );
$iSheet = unpack( 'v', substr( $sObj, $iSt + 14, 2 ) );
my ( $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v2C2', substr( $sObj, $iSt + 18, 6 ) );
push @aRes, [ $iRs, $iCs, $iRe, $iCe ] if ( $iOpW == 0x3b );
if ( $iSt == 0 ) {
$iSt += 21;
}
else {
$iSt += 22; #Skip 1 byte;
}
}
}
return ( $iSheet, \@aRes );
}
#------------------------------------------------------------------------------
# _subBOOL (for Spreadsheet::ParseExcel) DK: P452
#------------------------------------------------------------------------------
sub _subWSBOOL {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PageFit} =
( ( unpack( 'v', $sWk ) & 0x100 ) ? 1 : 0 );
}
#------------------------------------------------------------------------------
# _subMergeArea (for Spreadsheet::ParseExcel) DK: (Not)
#------------------------------------------------------------------------------
sub _subMergeArea {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iCnt = unpack( "v", $sWk );
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ];
$oWkS->{MergedArea} = [] unless ( defined $oWkS->{MergedArea} );
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v4', substr( $sWk, $i * 8 + 2, 8 ) );
for ( my $iR = $iRs ; $iR <= $iRe ; $iR++ ) {
for ( my $iC = $iCs ; $iC <= $iCe ; $iC++ ) {
$oWkS->{Cells}[$iR][$iC]->{Merged} = 1
if ( defined $oWkS->{Cells}[$iR][$iC] );
}
}
push @{ $oWkS->{MergedArea} }, [ $iRs, $iCs, $iRe, $iCe ];
}
}
#------------------------------------------------------------------------------
# DecodeBoolErr (for Spreadsheet::ParseExcel) DK: P306
#------------------------------------------------------------------------------
sub DecodeBoolErr {
my ( $iVal, $iFlg ) = @_;
if ( $iFlg ) { # ERROR
if ( $iVal == 0x00 ) {
return "#NULL!";
}
elsif ( $iVal == 0x07 ) {
return "#DIV/0!";
}
elsif ( $iVal == 0x0F ) {
return "#VALUE!";
}
elsif ( $iVal == 0x17 ) {
return "#REF!";
}
elsif ( $iVal == 0x1D ) {
return "#NAME?";
}
elsif ( $iVal == 0x24 ) {
return "#NUM!";
}
elsif ( $iVal == 0x2A ) {
return "#N/A!";
}
else {
return "#ERR";
}
}
else {
return ( $iVal ) ? "TRUE" : "FALSE";
}
}
###############################################################################
#
# _decode_rk_number()
#
# Convert an encoded RK number into a real number. The RK encoding is
# explained in some detail in the MS docs. It is a way of storing applicable
# ints and doubles in 32bits (30 data + 2 info bits) in order to save space.
#
sub _decode_rk_number {
my $rk_number = shift;
my $number;
# Check the main RK type.
if ( $rk_number & 0x02 ) {
# RK Type 2 and 4, a packed integer.
# Shift off the info bits.
$number = $rk_number >> 2;
# Convert from unsigned to signed if required.
$number -= 0x40000000 if $number & 0x20000000;
}
else {
# RK Type 1 and 3, a truncated IEEE Double.
# Pack the RK number into the high 30 bits of an IEEE double.
$number = pack "VV", 0x0000, $rk_number & 0xFFFFFFFC;
# Reverse the packed IEEE double on big-endian machines.
$number = reverse $number if $BIGENDIAN;
# Unpack the number.
$number = unpack "d", $number;
}
# RK Types 3 and 4 were multiplied by 100 prior to encoding.
$number /= 100 if $rk_number & 0x01;
return $number;
}
###############################################################################
#
# _subStrWk()
#
# Extract the workbook strings from the SST (Shared String Table) record and
# any following CONTINUE records.
#
# The workbook strings are initially contained in the SST block but may also
# occupy one or more CONTINUE blocks. Reading the CONTINUE blocks is made a
# little tricky by the fact that they can contain an additional initial byte
# if a string is continued from a previous block.
#
# Parsing is further complicated by the fact that the continued section of the
# string may have a different encoding (ASCII or UTF-8) from the previous
# section. Excel does this to save space.
#
sub _subStrWk {
my ( $self, $biff_data, $is_continue ) = @_;
if ( $is_continue ) {
# We are reading a CONTINUE record.
if ( $self->{_buffer} eq '' ) {
# A CONTINUE block with no previous SST.
$self->{_buffer} .= $biff_data;
}
elsif ( !defined $self->{_string_continued} ) {
# The CONTINUE block starts with a new (non-continued) string.
# Strip the Grbit byte and store the string data.
$self->{_buffer} .= substr $biff_data, 1;
}
else {
# A CONTINUE block that starts with a continued string.
# The first byte (Grbit) of the CONTINUE record indicates if (0)
# the continued string section is single bytes or (1) double bytes.
my $grbit = ord $biff_data;
my ( $str_position, $str_length ) = @{ $self->{_previous_info} };
my $buff_length = length $self->{_buffer};
if ( $buff_length >= ( $str_position + $str_length ) ) {
# Not in a string.
$self->{_buffer} .= $biff_data;
}
elsif ( ( $self->{_string_continued} & 0x01 ) == ( $grbit & 0x01 ) )
{
# Same encoding as the previous block of the string.
$self->{_buffer} .= substr( $biff_data, 1 );
}
else {
# Different encoding to the previous block of the string.
if ( $grbit & 0x01 ) {
# Current block is UTF-16, previous was ASCII.
my ( undef, $cch ) = unpack 'vc', $self->{_buffer};
substr( $self->{_buffer}, 2, 1 ) = pack( 'C', $cch | 0x01 );
# Convert the previous ASCII, single character, portion of
# the string into a double character UTF-16 string by
# inserting zero bytes.
for (
my $i = ( $buff_length - $str_position ) ;
$i >= 1 ;
$i--
)
{
substr( $self->{_buffer}, $str_position + $i, 0 ) =
"\x00";
}
}
else {
# Current block is ASCII, previous was UTF-16.
# Convert the current ASCII, single character, portion of
# the string into a double character UTF-16 string by
# inserting null bytes.
my $change_length =
( $str_position + $str_length ) - $buff_length;
# Length of the current CONTINUE record data.
my $biff_length = length $biff_data;
# Restrict the portion to be changed to the current block
# if the string extends over more than one block.
if ( $change_length > ( $biff_length - 1 ) * 2 ) {
$change_length = ( $biff_length - 1 ) * 2;
}
# Insert the null bytes.
for ( my $i = ( $change_length / 2 ) ; $i >= 1 ; $i-- ) {
substr( $biff_data, $i + 1, 0 ) = "\x00";
}
}
# Strip the Grbit byte and store the string data.
$self->{_buffer} .= substr $biff_data, 1;
}
}
}
else {
# Not a CONTINUE block therefore an SST block.
$self->{_buffer} .= $biff_data;
}
# Reset the state variables.
$self->{_string_continued} = undef;
$self->{_previous_info} = undef;
# Extract out any full strings from the current buffer leaving behind a
# partial string that is continued into the next block, or an empty
# buffer is no string is continued.
while ( length $self->{_buffer} >= 4 ) {
my ( $str_info, $length, $str_position, $str_length ) =
_convBIFF8String( $self, $self->{_buffer}, 1 );
if ( defined $str_info->[0] ) {
push @{ $self->{PkgStr} },
{
Text => $str_info->[0],
Unicode => $str_info->[1],
Rich => $str_info->[2],
Ext => $str_info->[3],
};
$self->{_buffer} = substr( $self->{_buffer}, $length );
}
else {
$self->{_string_continued} = $str_info->[1];
$self->{_previous_info} = [ $str_position, $str_length ];
last;
}
}
}
#------------------------------------------------------------------------------
# _SwapForUnicode (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _SwapForUnicode {
my ( $sObj ) = @_;
# for(my $i = 0; $i<length($$sObj); $i+=2){
for ( my $i = 0 ; $i < ( int( length( $$sObj ) / 2 ) * 2 ) ; $i += 2 ) {
my $sIt = substr( $$sObj, $i, 1 );
substr( $$sObj, $i, 1 ) = substr( $$sObj, $i + 1, 1 );
substr( $$sObj, $i + 1, 1 ) = $sIt;
}
}
#------------------------------------------------------------------------------
# _NewCell (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _NewCell {
my ( $oBook, $iR, $iC, %rhKey ) = @_;
my ( $sWk, $iLen );
return undef unless ( defined $oBook->{_CurSheet} );
my $FmtClass = $oBook->{FmtClass};
$rhKey{Type} =
$FmtClass->ChkType( $rhKey{Numeric}, $rhKey{Format}{FmtIdx} );
my $FmtStr = $oBook->{FormatStr}{ $rhKey{Format}{FmtIdx} };
# Set "Date" type if required for numbers in a MulRK BIFF block.
if ( defined $FmtStr && $rhKey{Type} eq "Numeric" ) {
# Match a range of possible date formats. Note: this isn't important
# except for reporting. The number will still be converted to a date
# by ExcelFmt() even if 'Type' isn't set to 'Date'.
if ( $FmtStr =~ m{^[dmy][-\\/dmy]*$}i ) {
$rhKey{Type} = "Date";
}
}
my $oCell = Spreadsheet::ParseExcel::Cell->new(
Val => $rhKey{Val},
FormatNo => $rhKey{FormatNo},
Format => $rhKey{Format},
Code => $rhKey{Code},
Type => $rhKey{Type},
);
$oCell->{_Kind} = $rhKey{Kind};
$oCell->{_Value} = $FmtClass->ValFmt( $oCell, $oBook );
if ( $rhKey{Rich} ) {
my @aRich = ();
my $sRich = $rhKey{Rich};
for ( my $iWk = 0 ; $iWk < length( $sRich ) ; $iWk += 4 ) {
my ( $iPos, $iFnt ) = unpack( 'v2', substr( $sRich, $iWk ) );
push @aRich, [ $iPos, $oBook->{Font}[$iFnt] ];
}
$oCell->{Rich} = \@aRich;
}
if ( defined $oBook->{CellHandler} ) {
if ( defined $oBook->{Object} ) {
no strict;
ref( $oBook->{CellHandler} ) eq "CODE"
? $oBook->{CellHandler}->(
$_Object, $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell
)
: $oBook->{CellHandler}->callback( $_Object, $oBook, $oBook->{_CurSheet},
$iR, $iC, $oCell );
}
else {
$oBook->{CellHandler}->( $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell );
}
}
unless ( $oBook->{NotSetCell} ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Cells}[$iR][$iC] = $oCell;
}
return $oCell;
}
#------------------------------------------------------------------------------
# ColorIdxToRGB (for Spreadsheet::ParseExcel)
#
# Returns for most recently opened book for compatibility, use
# Workbook::color_idx_to_rgb instead
#
#------------------------------------------------------------------------------
sub ColorIdxToRGB {
my ( $sPkg, $iIdx ) = @_;
unless( defined $currentbook ) {
return ( ( defined $aColor[$iIdx] ) ? $aColor[$iIdx] : $aColor[0] );
}
return $currentbook->color_idx_to_rgb( $iIdx );
}
###############################################################################
#
# error().
#
# Return an error string for a failed parse().
#
sub error {
my $self = shift;
my $parse_error = $self->{_error_status};
if ( exists $error_strings{$parse_error} ) {
return $error_strings{$parse_error};
}
else {
return 'Unknown parse error';
}
}
###############################################################################
#
# error_code().
#
# Return an error code for a failed parse().
#
sub error_code {
my $self = shift;
return $self->{_error_status};
}
###############################################################################
#
# Mapping between legacy method names and new names.
#
{
no warnings; # Ignore warnings about variables used only once.
*Parse = *parse;
}
1;
__END__
=head1 NAME
Spreadsheet::ParseExcel - Read information from an Excel file.
=head1 SYNOPSIS
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die $parser->error(), ".\n";
}
for my $worksheet ( $workbook->worksheets() ) {
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
for my $row ( $row_min .. $row_max ) {
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col );
next unless $cell;
print "Row, Col = ($row, $col)\n";
print "Value = ", $cell->value(), "\n";
print "Unformatted = ", $cell->unformatted(), "\n";
print "\n";
}
}
}
=head1 DESCRIPTION
The Spreadsheet::ParseExcel module can be used to read information from Excel 95-2003 binary files.
The module cannot read files in the Excel 2007 Open XML XLSX format. See the L<Spreadsheet::XLSX> module instead.
=head1 Parser
=head2 new()
The C<new()> method is used to create a new C<Spreadsheet::ParseExcel> parser object.
my $parser = Spreadsheet::ParseExcel->new();
It is possible to pass a password to decrypt an encrypted file:
$parser = Spreadsheet::ParseExcel->new( Password => 'secret' );
Only the default Excel encryption scheme is currently supported. See L</Decryption>.
As an advanced feature it is also possible to pass a call-back handler to the parser to control the parsing of the spreadsheet.
$parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1,
);
The call-back can be used to ignore certain cells or to reduce memory usage. See the section L<Reducing the memory usage of Spreadsheet::ParseExcel> for more information.
=head2 parse($filename, $formatter)
The Parser C<parse()> method returns a L</Workbook> object.
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
If an error occurs C<parse()> returns C<undef>. In general, programs should contain a test for failed parsing as follows:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die $parser->error(), ".\n";
}
The C<$filename> parameter is generally the file to be parsed. However, it can also be a filehandle or a scalar reference.
The optional C<$formatter> parameter can be an reference to a L</Formatter Class> to format the value of cells. This is useful for parsing workbooks with Unicode or Asian characters:
my $parser = Spreadsheet::ParseExcel->new();
my $formatter = Spreadsheet::ParseExcel::FmtJapan->new();
my $workbook = $parser->parse( 'Book1.xls', $formatter );
The L<Spreadsheet::ParseExcel::FmtJapan> formatter also supports Unicode. If you encounter any encoding problems with the default formatter try that instead.
=head2 error()
The Parser C<error()> method returns an error string if a C<parse()> fails:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die $parser->error(), ".\n";
}
If you wish to generate you own error string you can use the C<error_code()> method instead (see below). The C<error()> and C<error_code()> values are as follows:
error() error_code()
======= ============
'' 0
'File not found' 1
'No Excel data found in file' 2
'File is encrypted' 3
The C<error_code()> method is explained below.
Spreadsheet::ParseExcel will try to decrypt an encrypted Excel file using the default password or a user supplied password passed to C<new()>, see above. If these fail the module will return the C<'File is encrypted'> error. Only the default Excel encryption scheme is currently supported, see L</Decryption>.
=head2 error_code()
The Parser C<error_code()> method returns an error code if a C<parse()> fails:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die "Got error code ", $parser->error_code, ".\n";
}
This can be useful if you wish to employ you own error strings or error handling methods.
=head1 Workbook
A C<Spreadsheet::ParseExcel::Workbook> is created via the C<Spreadsheet::ParseExcel> C<parse()> method:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
The main methods of the Workbook class are:
$workbook->worksheets()
$workbook->worksheet()
$workbook->worksheet_count()
$workbook->get_filename()
These more commonly used methods of the Workbook class are outlined below. The other, less commonly used, methods are documented in L<Spreadsheet::ParseExcel::Worksheet>.
=head2 worksheets()
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook:
for my $worksheet ( $workbook->worksheets() ) {
...
}
=head2 worksheet()
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
Returns C<undef> if the sheet name or index doesn't exist.
=head2 worksheet_count()
The C<worksheet_count()> method returns the number of Worksheet objects in the Workbook.
my $worksheet_count = $workbook->worksheet_count();
=head2 get_filename()
The C<get_filename()> method returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file.
my $filename = $workbook->get_filename();
=head2 Other Workbook Methods
For full documentation of the methods available via a Workbook object see L<Spreadsheet::ParseExcel::Workbook>.
=head1 Worksheet
The C<Spreadsheet::ParseExcel::Worksheet> class encapsulates the properties of an Excel worksheet.
A Worksheet object is obtained via the L</worksheets()> or L</worksheet()> methods.
for my $worksheet ( $workbook->worksheets() ) {
...
}
# Or:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
The most commonly used methods of the Worksheet class are:
$worksheet->get_cell()
$worksheet->row_range()
$worksheet->col_range()
$worksheet->get_name()
The Spreadsheet::ParseExcel::Worksheet class exposes a lot of methods but in general very few are required unless you are writing an advanced filter.
The most commonly used methods are detailed below. The others are documented in L<Spreadsheet::ParseExcel::Worksheet>.
=head2 get_cell($row, $col)
Return the L</Cell> object at row C<$row> and column C<$col> if it is defined. Otherwise returns undef.
my $cell = $worksheet->get_cell($row, $col);
=head2 row_range()
Returns a two-element list C<($min, $max)> containing the minimum and maximum defined rows in the worksheet. If there is no row defined C<$max> is smaller than C<$min>.
my ( $row_min, $row_max ) = $worksheet->row_range();
=head2 col_range()
Returns a two-element list C<($min, $max)> containing the minimum and maximum of defined columns in the worksheet. If there is no column defined C<$max> is smaller than C<$min>.
my ( $col_min, $col_max ) = $worksheet->col_range();
=head2 get_name()
The C<get_name()> method returns the name of the worksheet, such as 'Sheet1'.
my $name = $worksheet->get_name();
=head2 Other Worksheet Methods
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Worksheet>.
=head1 Cell
The C<Spreadsheet::ParseExcel::Cell> class has the following main methods.
$cell->value()
$cell->unformatted()
=head2 value()
The C<value()> method returns the formatted value of the cell.
my $value = $cell->value();
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30.
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below.
For a defined C<$cell> the C<value()> method will always return a value.
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>.
=head2 unformatted()
The C<unformatted()> method returns the unformatted value of the cell.
my $unformatted = $cell->unformatted();
Returns the cell value without a numeric format. See the C<value()> method above.
=head2 Other Cell Methods
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Cell>.
=head1 Format
The C<Spreadsheet::ParseExcel::Format> class has the following properties:
=head2 Format properties
$format->{Font}
$format->{AlignH}
$format->{AlignV}
$format->{Indent}
$format->{Wrap}
$format->{Shrink}
$format->{Rotate}
$format->{JustLast}
$format->{ReadDir}
$format->{BdrStyle}
$format->{BdrColor}
$format->{BdrDiag}
$format->{Fill}
$format->{Lock}
$format->{Hidden}
$format->{Style}
These properties are generally only of interest to advanced users. Casual users can skip this section.
=head2 $format->{Font}
Returns the L</Font> object for the Format.
=head2 $format->{AlignH}
Returns the horizontal alignment of the format where the value has the following meaning:
0 => No alignment
1 => Left
2 => Center
3 => Right
4 => Fill
5 => Justify
6 => Center across
7 => Distributed/Equal spaced
=head2 $format->{AlignV}
Returns the vertical alignment of the format where the value has the following meaning:
0 => Top
1 => Center
2 => Bottom
3 => Justify
4 => Distributed/Equal spaced
=head2 $format->{Indent}
Returns the indent level of the C<Left> horizontal alignment.
=head2 $format->{Wrap}
Returns true if textwrap is on.
=head2 $format->{Shrink}
Returns true if "Shrink to fit" is set for the format.
=head2 $format->{Rotate}
Returns the text rotation. In Excel97+, it returns the angle in degrees of the text rotation.
In Excel95 or earlier it returns a value as follows:
0 => No rotation
1 => Top down
2 => 90 degrees anti-clockwise,
3 => 90 clockwise
=head2 $format->{JustLast}
Return true if the "justify last" property is set for the format.
=head2 $format->{ReadDir}
Returns the direction that the text is read from.
=head2 $format->{BdrStyle}
Returns an array ref of border styles as follows:
[ $left, $right, $top, $bottom ]
=head2 $format->{BdrColor}
Returns an array ref of border color indexes as follows:
[ $left, $right, $top, $bottom ]
=head2 $format->{BdrDiag}
Returns an array ref of diagonal border kind, style and color index as follows:
[$kind, $style, $color ]
Where kind is:
0 => None
1 => Right-Down
2 => Right-Up
3 => Both
=head2 $format->{Fill}
Returns an array ref of fill pattern and color indexes as follows:
[ $pattern, $front_color, $back_color ]
=head2 $format->{Lock}
Returns true if the cell is locked.
=head2 $format->{Hidden}
Returns true if the cell is Hidden.
=head2 $format->{Style}
Returns true if the format is a Style format.
=head1 Font
I<Spreadsheet::ParseExcel::Font>
Format class has these properties:
=head1 Font Properties
$font->{Name}
$font->{Bold}
$font->{Italic}
$font->{Height}
$font->{Underline}
$font->{UnderlineStyle}
$font->{Color}
$font->{Strikeout}
$font->{Super}
=head2 $font->{Name}
Returns the name of the font, for example 'Arial'.
=head2 $font->{Bold}
Returns true if the font is bold.
=head2 $font->{Italic}
Returns true if the font is italic.
=head2 $font->{Height}
Returns the size (height) of the font.
=head2 $font->{Underline}
Returns true if the font in underlined.
=head2 $font->{UnderlineStyle}
Returns the style of an underlined font where the value has the following meaning:
0 => None
1 => Single
2 => Double
33 => Single accounting
34 => Double accounting
=head2 $font->{Color}
Returns the color index for the font. The mapping to an RGB color is defined by each workbook.
The index can be converted to a RGB string using the C<$workbook->ColorIdxToRGB()> Parser method.
(Older versions of C<Spreadsheet::ParseExcel> provided the C<ColorIdxToRGB> class method, which is deprecated.)
=head2 $font->{Strikeout}
Returns true if the font has the strikeout property set.
=head2 $font->{Super}
Returns one of the following values if the superscript or subscript property of the font is set:
0 => None
1 => Superscript
2 => Subscript
=head1 Formatter Class
Formatters can be passed to the C<parse()> method to deal with Unicode or Asian formatting.
Spreadsheet::ParseExcel includes 2 formatter classes. C<FmtDefault> and C<FmtJapanese>. It is also possible to create a user defined formatting class.
The formatter class C<Spreadsheet::ParseExcel::Fmt*> should provide the following functions:
=head2 ChkType($self, $is_numeric, $format_index)
Method to check the type of data in the cell. Should return C<Date>, C<Numeric> or C<Text>. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $is_numeric
If true, the value seems to be number.
=item $format_index
The index number for the cell Format object.
=back
=head2 TextFmt($self, $string_data, $string_encoding)
Converts the string data in the cell into the correct encoding. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $string_data
The original string/text data.
=item $string_encoding
The character encoding of original string/text.
=back
=head2 ValFmt($self, $cell, $workbook)
Convert the original unformatted cell value into the appropriate formatted value. For instance turn a number into a formatted date. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $cell
A scalar reference to the Cell object.
=item $workbook
A scalar reference to the Workbook object.
=back
=head2 FmtString($self, $cell, $workbook)
Get the format string for the Cell. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $cell
A scalar reference to the Cell object.
=item $workbook
A scalar reference to the Workbook object.
=back
=head1 Reducing the memory usage of Spreadsheet::ParseExcel
In some cases a C<Spreadsheet::ParseExcel> application may consume a lot of memory when processing a large Excel file and, as a result, may fail to complete. The following explains why this can occur and how to resolve it.
C<Spreadsheet::ParseExcel> processes an Excel file in two stages. In the first stage it extracts the Excel binary stream from the OLE container file using C<OLE::Storage_Lite>. In the second stage it parses the binary stream to read workbook, worksheet and cell data which it then stores in memory. The majority of the memory usage is required for storing cell data.
The reason for this is that as the Excel file is parsed and each cell is encountered a cell handling function creates a relatively large nested cell object that contains the cell value and all of the data that relates to the cell formatting. For large files (a 10MB Excel file on a 256MB system) this overhead can cause the system to grind to a halt.
However, in a lot of cases when an Excel file is being processed the only information that is required are the cell values. In these cases it is possible to avoid most of the memory overhead by specifying your own cell handling function and by telling Spreadsheet::ParseExcel not to store the parsed cell data. This is achieved by passing a cell handler function to C<new()> when creating the parse object. Here is an example.
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
my $workbook = $parser->parse('file.xls');
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Do something useful with the formatted cell value
print $cell->value(), "\n";
}
The user specified cell handler is passed as a code reference to C<new()> along with the parameter C<NotSetCell> which tells Spreadsheet::ParseExcel not to store the parsed cell. Note, you don't have to iterate over the rows and columns, this happens automatically as part of the parsing.
The cell handler is passed 5 arguments. The first, C<$workbook>, is a reference to the C<Spreadsheet::ParseExcel::Workbook> object that represent the parsed workbook. This can be used to access any of the C<Spreadsheet::ParseExcel::Workbook> methods, see L</Workbook>. The second C<$sheet_index> is the zero-based index of the worksheet being parsed. The third and fourth, C<$row> and C<$col>, are the zero-based row and column number of the cell. The fifth, C<$cell>, is a reference to the C<Spreadsheet::ParseExcel::Cell> object. This is used to extract the data from the cell. See L</Cell> for more information.
This technique can be useful if you are writing an Excel to database filter since you can put your DB calls in the cell handler.
If you don't want all of the data in the spreadsheet you can add some control logic to the cell handler. For example we can extend the previous example so that it only prints the first 10 rows of the first two worksheets in the parsed workbook by adding some C<if()> statements to the cell handler:
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
my $workbook = $parser->parse('file.xls');
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Skip some worksheets and rows (inefficiently).
return if $sheet_index >= 3;
return if $row >= 10;
# Do something with the formatted cell value
print $cell->value(), "\n";
}
However, this still processes the entire workbook. If you wish to save some additional processing time you can abort the parsing after you have read the data that you want, using the workbook C<ParseAbort> method:
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
my $workbook = $parser->parse('file.xls');
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Skip some worksheets and rows (more efficiently).
if ( $sheet_index >= 1 and $row >= 10 ) {
$workbook->ParseAbort(1);
return;
}
# Do something with the formatted cell value
print $cell->value(), "\n";
}
=head1 Decryption
If a workbook is "protected" then Excel will encrypt the file whether a password is supplied or not. As of version 0.59 Spreadsheet::ParseExcel supports decrypting Excel workbooks using a default or user supplied password. However, only the following encryption scheme is supported:
Office 97/2000 Compatible encryption
The following encryption methods are not supported:
Weak Encryption (XOR)
RC4, Microsoft Base Cryptographic Provider v1.0
RC4, Microsoft Base DSS and Diffie-Hellman Cryptographic Provider
RC4, Microsoft DH SChannel Cryptographic Provider
RC4, Microsoft Enhanced Cryptographic Provider v1.0
RC4, Microsoft Enhanced DSS and Diffie-Hellman Cryptographic Provider
RC4, Microsoft Enhanced RSA and AES Cryptographic Provider
RC4, Microsoft RSA SChannel Cryptographic Provider
RC4, Microsoft Strong Cryptographic Provider
See the following for more information on Excel encryption: L<http://office.microsoft.com/en-us/office-2003-resource-kit/important-aspects-of-password-and-encryption-protection-HA001140311.aspx>.
=head1 KNOWN PROBLEMS
=over
=item * Issues reported by users: L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel>
=item * This module cannot read the values of formulas from files created with Spreadsheet::WriteExcel unless the user specified the values when creating the file (which is generally not the case). The reason for this is that Spreadsheet::WriteExcel writes the formula but not the formula result since it isn't in a position to calculate arbitrary Excel formulas without access to Excel's formula engine.
=item * If Excel has date fields where the specified format is equal to the system-default for the short-date locale, Excel does not store the format, but defaults to an internal format which is system dependent. In these cases ParseExcel uses the date format 'yyyy-mm-dd'.
=back
=head1 REPORTING A BUG
Bugs can be reported via rt.cpan.org. See the following for instructions on bug reporting for Spreadsheet::ParseExcel
L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel>
=head1 SEE ALSO
=over
=item * xls2csv by Ken Prows L<http://search.cpan.org/~ken/xls2csv-1.06/script/xls2csv>.
=item * xls2csv and xlscat by H.Merijn Brand (these utilities are part of Spreadsheet::Read, see below).
=item * excel2txt by Ken Youens-Clark, L<http://search.cpan.org/~kclark/excel2txt/excel2txt>. This is an excellent example of an Excel filter using Spreadsheet::ParseExcel. It can produce CSV, Tab delimited, Html, XML and Yaml.
=item * XLSperl by Jon Allen L<http://search.cpan.org/~jonallen/XLSperl/bin/XLSperl>. This application allows you to use Perl "one-liners" with Microsoft Excel files.
=item * Spreadsheet::XLSX L<http://search.cpan.org/~dmow/Spreadsheet-XLSX/lib/Spreadsheet/XLSX.pm> by Dmitry Ovsyanko. A module with a similar interface to Spreadsheet::ParseExcel for parsing Excel 2007 XLSX OpenXML files.
=item * Spreadsheet::Read L<http://search.cpan.org/~hmbrand/Spreadsheet-Read/Read.pm> by H.Merijn Brand. A single interface for reading several different spreadsheet formats.
=item * Spreadsheet::WriteExcel L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm>. A perl module for creating new Excel files.
=item * Spreadsheet::ParseExcel::SaveParser L<http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/lib/Spreadsheet/ParseExcel/SaveParser.pm>. This is a combination of Spreadsheet::ParseExcel and Spreadsheet::WriteExcel and it allows you to "rewrite" an Excel file. See the following example L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm#MODIFYING_AND_REWRITING_EXCEL_FILES>. It is part of the Spreadsheet::ParseExcel distro.
=item * Text::CSV_XS L<http://search.cpan.org/~hmbrand/Text-CSV_XS/CSV_XS.pm> by H.Merijn Brand. A fast and rigorous module for reading and writing CSV data. Don't consider rolling your own CSV handling, use this module instead.
=back
=head1 MAILING LIST
There is a Google group for discussing and asking questions about Spreadsheet::ParseExcel. This is a good place to search to see if your question has been asked before: L<http://groups-beta.google.com/group/spreadsheet-parseexcel/>
=head1 DONATIONS
If you'd care to donate to the Spreadsheet::ParseExcel project, you can do so via PayPal: L<http://tinyurl.com/7ayes>
=head1 TODO
=over
=item * The current maintenance work is directed towards making the documentation more useful, improving and simplifying the API, and improving the maintainability of the code base. After that new features will be added.
=item * Fix open bugs and documentation for SaveParser.
=item * Add Formula support, Hyperlink support, Named Range support.
=item * Improve Spreadsheet::ParseExcel::SaveParser compatibility with Spreadsheet::WriteExcel.
=item * Improve Unicode and other encoding support. This will probably require dropping support for perls prior to 5.8+.
=back
=head1 ACKNOWLEDGEMENTS
From Kawai Takanori:
First of all, I would like to acknowledge the following valuable programs and modules:
XHTML, OLE::Storage and Spreadsheet::WriteExcel.
In no particular order: Yamaji Haruna, Simamoto Takesi, Noguchi Harumi, Ikezawa Kazuhiro, Suwazono Shugo, Hirofumi Morisada, Michael Edwards, Kim Namusk, Slaven Rezic, Grant Stevens, H.Merijn Brand and many many people + Kawai Mikako.
Alexey Mazurin added the decryption facility.
=head1 DISCLAIMER OF WARRANTY
Because this software is licensed free of charge, there is no warranty for the software, to the extent permitted by applicable law. Except when otherwise stated in writing the copyright holders and/or other parties provide the software "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the software is with you. Should the software prove defective, you assume the cost of all necessary servicing, repair, or correction.
In no event unless required by applicable law or agreed to in writing will any copyright holder, or any other party who may modify and/or redistribute the software as permitted by the above licence, be liable to you for damages, including any general, special, incidental, or consequential damages arising out of the use or inability to use the software (including but not limited to loss of data or data being rendered inaccurate or losses sustained by you or third parties or a failure of the software to operate with any other software), even if such holder or other party has been advised of the possibility of such damages.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori (Hippo2000) kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved. This is free software. You may distribute under the terms of either the GNU General Public License or the Artistic License.
=cut
SPREADSHEET_PARSEEXCEL
$fatpacked{"Spreadsheet/ParseExcel/Cell.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_CELL';
package Spreadsheet::ParseExcel::Cell;
###############################################################################
#
# Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my ( $package, %properties ) = @_;
my $self = \%properties;
bless $self, $package;
}
###############################################################################
#
# value()
#
# Returns the formatted value of the cell.
#
sub value {
my $self = shift;
return $self->{_Value};
}
###############################################################################
#
# unformatted()
#
# Returns the unformatted value of the cell.
#
sub unformatted {
my $self = shift;
return $self->{Val};
}
###############################################################################
#
# get_format()
#
# Returns the Format object for the cell.
#
sub get_format {
my $self = shift;
return $self->{Format};
}
###############################################################################
#
# type()
#
# Returns the type of cell such as Text, Numeric or Date.
#
sub type {
my $self = shift;
return $self->{Type};
}
###############################################################################
#
# encoding()
#
# Returns the character encoding of the cell.
#
sub encoding {
my $self = shift;
if ( !defined $self->{Code} ) {
return 1;
}
elsif ( $self->{Code} eq 'ucs2' ) {
return 2;
}
elsif ( $self->{Code} eq '_native_' ) {
return 3;
}
else {
return 0;
}
return $self->{Code};
}
###############################################################################
#
# is_merged()
#
# Returns true if the cell is merged.
#
sub is_merged {
my $self = shift;
return $self->{Merged};
}
###############################################################################
#
# get_rich_text()
#
# Returns an array ref of font information about each string block in a "rich",
# i.e. multi-format, string.
#
sub get_rich_text {
my $self = shift;
return $self->{Rich};
}
###############################################################################
#
# get_hyperlink {
#
# Returns an array ref of hyperlink information if the cell contains a hyperlink.
# Returns undef otherwise
#
# [0] : Description of link (You may want $cell->value, as it will have rich text)
# [1] : URL - the link expressed as a URL. N.B. relative URLs will be defaulted to
# the directory of the input file, if the input file name is known. Otherwise
# %REL% will be inserted as a place-holder. Depending on your application,
# you should either remove %REL% or replace it with the appropriate path.
# [2] : Target frame (or undef if none)
sub get_hyperlink {
my $self = shift;
return $self->{Hyperlink} if exists $self->{Hyperlink};
return undef;
}
#
###############################################################################
#
# Mapping between legacy method names and new names.
#
{
no warnings; # Ignore warnings about variables used only once.
*Value = \&value;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 Methods
The following Cell methods are available:
$cell->value()
$cell->unformatted()
$cell->get_format()
$cell->type()
$cell->encoding()
$cell->is_merged()
$cell->get_rich_text()
$cell->get_hyperlink()
=head2 value()
The C<value()> method returns the formatted value of the cell.
my $value = $cell->value();
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30.
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below.
For a defined C<$cell> the C<value()> method will always return a value.
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>.
=head2 unformatted()
The C<unformatted()> method returns the unformatted value of the cell.
my $unformatted = $cell->unformatted();
Returns the cell value without a numeric format. See the C<value()> method above.
=head2 get_format()
The C<get_format()> method returns the L<Spreadsheet::ParseExcel::Format> object for the cell.
my $format = $cell->get_format();
If a user defined format hasn't been applied to the cell then the default cell format is returned.
=head2 type()
The C<type()> method returns the type of cell such as Text, Numeric or Date. If the type was detected as Numeric, and the Cell Format matches C<m{^[dmy][-\\/dmy]*$}i>, it will be treated as a Date type.
my $type = $cell->type();
See also L<Dates and Time in Excel>.
=head2 encoding()
The C<encoding()> method returns the character encoding of the cell.
my $encoding = $cell->encoding();
This method is only of interest to developers. In general Spreadsheet::ParseExcel will return all character strings in UTF-8 regardless of the encoding used by Excel.
The C<encoding()> method returns one of the following values:
=over
=item * 0: Unknown format. This shouldn't happen. In the default case the format should be 1.
=item * 1: 8bit ASCII or single byte UTF-16. This indicates that the characters are encoded in a single byte. In Excel 95 and earlier This usually meant ASCII or an international variant. In Excel 97 it refers to a compressed UTF-16 character string where all of the high order bytes are 0 and are omitted to save space.
=item * 2: UTF-16BE.
=item * 3: Native encoding. In Excel 95 and earlier this encoding was used to represent multi-byte character encodings such as SJIS.
=back
=head2 is_merged()
The C<is_merged()> method returns true if the cell is merged.
my $is_merged = $cell->is_merged();
Returns C<undef> if the property isn't set.
=head2 get_rich_text()
The C<get_rich_text()> method returns an array ref of font information about each string block in a "rich", i.e. multi-format, string.
my $rich_text = $cell->get_rich_text();
The return value is an arrayref of arrayrefs in the form:
[
[ $start_position, $font_object ],
...,
]
Returns undef if the property isn't set.
=head2 get_hyperlink()
If a cell contains a hyperlink, the C<get_hyperlink()> method returns an array ref of information about it.
A cell can contain at most one hyperlink. If it does, it contains no other value.
Otherwise, it returns undef;
The array contains:
=over
=item * 0: Description (what's displayed); undef if not present
=item * 1: Link, converted to an appropriate URL - Note: Relative links are based on the input file. %REL% is used if the input file is unknown (e.g. a file handle or scalar)
=item * 2: Target - target frame (or undef if none)
=back
=head1 Dates and Time in Excel
Dates and times in Excel are represented by real numbers, for example "Jan 1 2001 12:30 PM" is represented by the number 36892.521.
The integer part of the number stores the number of days since the epoch and the fractional part stores the percentage of the day.
A date or time in Excel is just like any other number. The way in which it is displayed is controlled by the number format:
Number format $cell->value() $cell->unformatted()
============= ============== ==============
'dd/mm/yy' '28/02/08' 39506.5
'mm/dd/yy' '02/28/08' 39506.5
'd-m-yyyy' '28-2-2008' 39506.5
'dd/mm/yy hh:mm' '28/02/08 12:00' 39506.5
'd mmm yyyy' '28 Feb 2008' 39506.5
'mmm d yyyy hh:mm AM/PM' 'Feb 28 2008 12:00 PM' 39506.5
The L<Spreadsheet::ParseExcel::Utility> module contains a function called C<ExcelLocaltime> which will convert between an unformatted Excel date/time number and a C<localtime()> like array.
For date conversions using the CPAN C<DateTime> framework see L<DateTime::Format::Excel> http://search.cpan.org/search?dist=DateTime-Format-Excel
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_CELL
$fatpacked{"Spreadsheet/ParseExcel/Dump.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_DUMP';
package Spreadsheet::ParseExcel::Dump;
###############################################################################
#
# Spreadsheet::ParseExcel::Dump - A class for dumping Excel records.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
my %NameTbl = (
#P291
0x0A => 'EOF',
0x0C => 'CALCCOUNT',
0x0D => 'CALCMODE',
0x0E => 'PRECISION',
0x0F => 'REFMODE',
0x10 => 'DELTA',
0x11 => 'ITERATION',
0x12 => 'PROTECT',
0x13 => 'PASSWORD',
0x14 => 'HEADER',
0x15 => 'FOOTER',
0x16 => 'EXTERNCOUNT',
0x17 => 'EXTERNSHEET',
0x19 => 'WINDOWPROTECT',
0x1A => 'VERTICALPAGEBREAKS',
0x1B => 'HORIZONTALPAGEBREAKS',
0x1C => 'NOTE',
0x1D => 'SELECTION',
0x22 => '1904',
0x26 => 'LEFTMARGIN',
0x27 => 'RIGHTMARGIN',
0x28 => 'TOPMARGIN',
0x29 => 'BOTTOMMARGIN',
0x2A => 'PRINTHEADERS',
0x2B => 'PRINTGRIDLINES',
0x2F => 'FILEPASS',
0x3C => 'COUNTINUE',
0x3D => 'WINDOW1',
0x40 => 'BACKUP',
0x41 => 'PANE',
0x42 => 'CODEPAGE',
0x4D => 'PLS',
0x50 => 'DCON',
0x51 => 'DCONREF',
#P292
0x52 => 'DCONNAME',
0x55 => 'DEFCOLWIDTH',
0x59 => 'XCT',
0x5A => 'CRN',
0x5B => 'FILESHARING',
0x5C => 'WRITEACCES',
0x5D => 'OBJ',
0x5E => 'UNCALCED',
0x5F => 'SAVERECALC',
0x60 => 'TEMPLATE',
0x63 => 'OBJPROTECT',
0x7D => 'COLINFO',
0x7E => 'RK',
0x7F => 'IMDATA',
0x80 => 'GUTS',
0x81 => 'WSBOOL',
0x82 => 'GRIDSET',
0x83 => 'HCENTER',
0x84 => 'VCENTER',
0x85 => 'BOUNDSHEET',
0x86 => 'WRITEPROT',
0x87 => 'ADDIN',
0x88 => 'EDG',
0x89 => 'PUB',
0x8C => 'COUNTRY',
0x8D => 'HIDEOBJ',
0x90 => 'SORT',
0x91 => 'SUB',
0x92 => 'PALETTE',
0x94 => 'LHRECORD',
0x95 => 'LHNGRAPH',
0x96 => 'SOUND',
0x98 => 'LPR',
0x99 => 'STANDARDWIDTH',
0x9A => 'FNGROUPNAME',
0x9B => 'FILTERMODE',
0x9C => 'FNGROUPCOUNT',
#P293
0x9D => 'AUTOFILTERINFO',
0x9E => 'AUTOFILTER',
0xA0 => 'SCL',
0xA1 => 'SETUP',
0xA9 => 'COORDLIST',
0xAB => 'GCW',
0xAE => 'SCENMAN',
0xAF => 'SCENARIO',
0xB0 => 'SXVIEW',
0xB1 => 'SXVD',
0xB2 => 'SXV',
0xB4 => 'SXIVD',
0xB5 => 'SXLI',
0xB6 => 'SXPI',
0xB8 => 'DOCROUTE',
0xB9 => 'RECIPNAME',
0xBC => 'SHRFMLA',
0xBD => 'MULRK',
0xBE => 'MULBLANK',
0xBF => 'TOOLBARHDR',
0xC0 => 'TOOLBAREND',
0xC1 => 'MMS',
0xC2 => 'ADDMENU',
0xC3 => 'DELMENU',
0xC5 => 'SXDI',
0xC6 => 'SXDB',
0xCD => 'SXSTRING',
0xD0 => 'SXTBL',
0xD1 => 'SXTBRGIITM',
0xD2 => 'SXTBPG',
0xD3 => 'OBPROJ',
0xD5 => 'SXISDTM',
0xD6 => 'RSTRING',
0xD7 => 'DBCELL',
0xDA => 'BOOKBOOL',
0xDC => 'PARAMQRY',
0xDC => 'SXEXT',
0xDD => 'SCENPROTECT',
0xDE => 'OLESIZE',
#P294
0xDF => 'UDDESC',
0xE0 => 'XF',
0xE1 => 'INTERFACEHDR',
0xE2 => 'INTERFACEEND',
0xE3 => 'SXVS',
0xEA => 'TABIDCONF',
0xEB => 'MSODRAWINGGROUP',
0xEC => 'MSODRAWING',
0xED => 'MSODRAWINGSELECTION',
0xEF => 'PHONETICINFO',
0xF0 => 'SXRULE',
0xF1 => 'SXEXT',
0xF2 => 'SXFILT',
0xF6 => 'SXNAME',
0xF7 => 'SXSELECT',
0xF8 => 'SXPAIR',
0xF9 => 'SXFMLA',
0xFB => 'SXFORMAT',
0xFC => 'SST',
0xFD => 'LABELSST',
0xFF => 'EXTSST',
0x100 => 'SXVDEX',
0x103 => 'SXFORMULA',
0x122 => 'SXDBEX',
0x13D => 'TABID',
0x160 => 'USESELFS',
0x161 => 'DSF',
0x162 => 'XL5MODIFY',
0x1A5 => 'FILESHARING2',
0x1A9 => 'USERBVIEW',
0x1AA => 'USERVIEWBEGIN',
0x1AB => 'USERSVIEWEND',
0x1AD => 'QSI',
0x1AE => 'SUPBOOK',
0x1AF => 'PROT4REV',
0x1B0 => 'CONDFMT',
0x1B1 => 'CF',
0x1B2 => 'DVAL',
#P295
0x1B5 => 'DCONBIN',
0x1B6 => 'TXO',
0x1B7 => 'REFRESHALL',
0x1B8 => 'HLINK',
0x1BA => 'CODENAME',
0x1BB => 'SXFDBTYPE',
0x1BC => 'PROT4REVPASS',
0x1BE => 'DV',
0x200 => 'DIMENSIONS',
0x201 => 'BLANK',
0x202 => 'Integer', #Not Documented
0x203 => 'NUMBER',
0x204 => 'LABEL',
0x205 => 'BOOLERR',
0x207 => 'STRING',
0x208 => 'ROW',
0x20B => 'INDEX',
0x218 => 'NAME',
0x221 => 'ARRAY',
0x223 => 'EXTERNNAME',
0x225 => 'DEFAULTROWHEIGHT',
0x231 => 'FONT',
0x236 => 'TABLE',
0x23E => 'WINDOW2',
0x293 => 'STYLE',
0x406 => 'FORMULA',
0x41E => 'FORMAT',
0x18 => 'NAME',
0x06 => 'FORMULA',
0x09 => 'BOF(BIFF2)',
0x209 => 'BOF(BIFF3)',
0x409 => 'BOF(BIFF4)',
0x809 => 'BOF(BIFF5-7)',
0x31 => 'FONT', 0x27E => 'RK',
#Chart/Graph
0x1001 => 'UNITS',
0x1002 => 'CHART',
0x1003 => 'SERISES',
0x1006 => 'DATAFORMAT',
0x1007 => 'LINEFORMAT',
0x1009 => 'MAKERFORMAT',
0x100A => 'AREAFORMAT',
0x100B => 'PIEFORMAT',
0x100C => 'ATTACHEDLABEL',
0x100D => 'SERIESTEXT',
0x1014 => 'CHARTFORMAT',
0x1015 => 'LEGEND',
0x1016 => 'SERIESLIST',
0x1017 => 'BAR',
0x1018 => 'LINE',
0x1019 => 'PIE',
0x101A => 'AREA',
0x101B => 'SCATTER',
0x101C => 'CHARTLINE',
0x101D => 'AXIS',
0x101E => 'TICK',
0x101F => 'VALUERANGE',
0x1020 => 'CATSERRANGE',
0x1021 => 'AXISLINEFORMAT',
0x1022 => 'CHARTFORMATLINK',
0x1024 => 'DEFAULTTEXT',
0x1025 => 'TEXT',
0x1026 => 'FONTX',
0x1027 => 'OBJECTLINK',
0x1032 => 'FRAME',
0x1033 => 'BEGIN',
0x1034 => 'END',
0x1035 => 'PLOTAREA',
0x103A => '3D',
0x103C => 'PICF',
0x103D => 'DROPBAR',
0x103E => 'RADAR',
0x103F => 'SURFACE',
0x1040 => 'RADARAREA',
0x1041 => 'AXISPARENT',
0x1043 => 'LEGENDXN',
0x1044 => 'SHTPROPS',
0x1045 => 'SERTOCRT',
0x1046 => 'AXESUSED',
0x1048 => 'SBASEREF',
0x104A => 'SERPARENT',
0x104B => 'SERAUXTREND',
0x104E => 'IFMT',
0x104F => 'POS',
0x1050 => 'ALRUNS',
0x1051 => 'AI',
0x105B => 'SERAUXERRBAR',
0x105D => 'SERFMT',
0x1060 => 'FBI',
0x1061 => 'BOPPOP',
0x1062 => 'AXCEXT',
0x1063 => 'DAT',
0x1064 => 'PLOTGROWTH',
0x1065 => 'SINDEX',
0x1066 => 'GELFRAME',
0x1067 => 'BPOPPOPCUSTOM',
);
#------------------------------------------------------------------------------
# subDUMP (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub subDUMP {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
printf "%04X:%-23s (Len:%3d) : %s\n",
$bOp, OpName($bOp), $bLen, unpack( "H40", $sWk );
}
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->OpName
#------------------------------------------------------------------------------
sub OpName {
my ($bOp) = @_;
return ( defined $NameTbl{$bOp} ) ? $NameTbl{$bOp} : 'undef';
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Dump - A class for dumping Excel records.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_DUMP
$fatpacked{"Spreadsheet/ParseExcel/FmtDefault.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTDEFAULT';
package Spreadsheet::ParseExcel::FmtDefault;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt);
our $VERSION = '0.65';
my %hFmtDefault = (
0x00 => 'General',
0x01 => '0',
0x02 => '0.00',
0x03 => '#,##0',
0x04 => '#,##0.00',
0x05 => '($#,##0_);($#,##0)',
0x06 => '($#,##0_);[Red]($#,##0)',
0x07 => '($#,##0.00_);($#,##0.00_)',
0x08 => '($#,##0.00_);[Red]($#,##0.00_)',
0x09 => '0%',
0x0A => '0.00%',
0x0B => '0.00E+00',
0x0C => '# ?/?',
0x0D => '# ??/??',
0x0E => 'yyyy-mm-dd', # Was 'm-d-yy', which is bad as system default
0x0F => 'd-mmm-yy',
0x10 => 'd-mmm',
0x11 => 'mmm-yy',
0x12 => 'h:mm AM/PM',
0x13 => 'h:mm:ss AM/PM',
0x14 => 'h:mm',
0x15 => 'h:mm:ss',
0x16 => 'm-d-yy h:mm',
#0x17-0x24 -- Differs in Natinal
0x25 => '(#,##0_);(#,##0)',
0x26 => '(#,##0_);[Red](#,##0)',
0x27 => '(#,##0.00);(#,##0.00)',
0x28 => '(#,##0.00);[Red](#,##0.00)',
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)',
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)',
0x2D => 'mm:ss',
0x2E => '[h]:mm:ss',
0x2F => 'mm:ss.0',
0x30 => '##0.0E+0',
0x31 => '@',
);
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub new {
my ( $sPkg, %hKey ) = @_;
my $oThis = {};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $oThis, $sTxt, $sCode ) = @_;
return $sTxt if ( ( !defined($sCode) ) || ( $sCode eq '_native_' ) );
return pack( 'U*', unpack( 'n*', $sTxt ) );
}
#------------------------------------------------------------------------------
# FmtStringDef (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub FmtStringDef {
my ( $oThis, $iFmtIdx, $oBook, $rhFmt ) = @_;
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx};
if ( !( defined($sFmtStr) ) && defined($rhFmt) ) {
$sFmtStr = $rhFmt->{$iFmtIdx};
}
$sFmtStr = $hFmtDefault{$iFmtIdx} unless ($sFmtStr);
return $sFmtStr;
}
#------------------------------------------------------------------------------
# FmtString (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub FmtString {
my ( $oThis, $oCell, $oBook ) = @_;
my $sFmtStr =
$oThis->FmtStringDef( $oBook->{Format}[ $oCell->{FormatNo} ]->{FmtIdx},
$oBook );
# Special case for cells that use Lotus123 style leading
# apostrophe to designate text formatting.
if ( $oBook->{Format}[ $oCell->{FormatNo} ]->{Key123} ) {
$sFmtStr = '@';
}
unless ( defined($sFmtStr) ) {
if ( $oCell->{Type} eq 'Numeric' ) {
if ( int( $oCell->{Val} ) != $oCell->{Val} ) {
$sFmtStr = '0.00';
}
else {
$sFmtStr = '0';
}
}
elsif ( $oCell->{Type} eq 'Date' ) {
if ( int( $oCell->{Val} ) <= 0 ) {
$sFmtStr = 'h:mm:ss';
}
else {
$sFmtStr = 'yyyy-mm-dd';
}
}
else {
$sFmtStr = '@';
}
}
return $sFmtStr;
}
#------------------------------------------------------------------------------
# ValFmt (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub ValFmt {
my ( $oThis, $oCell, $oBook ) = @_;
my ( $Dt, $iFmtIdx, $iNumeric, $Flg1904 );
if ( $oCell->{Type} eq 'Text' ) {
$Dt =
( ( defined $oCell->{Val} ) && ( $oCell->{Val} ne '' ) )
? $oThis->TextFmt( $oCell->{Val}, $oCell->{Code} )
: '';
return $Dt;
}
else {
$Dt = $oCell->{Val};
$Flg1904 = $oBook->{Flg1904};
my $sFmtStr = $oThis->FmtString( $oCell, $oBook );
return ExcelFmt( $sFmtStr, $Dt, $Flg1904, $oCell->{Type} );
}
}
#------------------------------------------------------------------------------
# ChkType (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub ChkType {
my ( $oPkg, $iNumeric, $iFmtIdx ) = @_;
if ($iNumeric) {
if ( ( ( $iFmtIdx >= 0x0E ) && ( $iFmtIdx <= 0x16 ) )
|| ( ( $iFmtIdx >= 0x2D ) && ( $iFmtIdx <= 0x2F ) ) )
{
return "Date";
}
else {
return "Numeric";
}
}
else {
return "Text";
}
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_FMTDEFAULT
$fatpacked{"Spreadsheet/ParseExcel/FmtJapan.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTJAPAN';
package Spreadsheet::ParseExcel::FmtJapan;
use utf8;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Encode qw(find_encoding decode);
use base 'Spreadsheet::ParseExcel::FmtDefault';
our $VERSION = '0.65';
my %FormatTable = (
0x00 => 'General',
0x01 => '0',
0x02 => '0.00',
0x03 => '#,##0',
0x04 => '#,##0.00',
0x05 => '(\\#,##0_);(\\#,##0)',
0x06 => '(\\#,##0_);[Red](\\#,##0)',
0x07 => '(\\#,##0.00_);(\\#,##0.00_)',
0x08 => '(\\#,##0.00_);[Red](\\#,##0.00_)',
0x09 => '0%',
0x0A => '0.00%',
0x0B => '0.00E+00',
0x0C => '# ?/?',
0x0D => '# ??/??',
# 0x0E => 'm/d/yy',
0x0E => 'yyyy/m/d',
0x0F => 'd-mmm-yy',
0x10 => 'd-mmm',
0x11 => 'mmm-yy',
0x12 => 'h:mm AM/PM',
0x13 => 'h:mm:ss AM/PM',
0x14 => 'h:mm',
0x15 => 'h:mm:ss',
# 0x16 => 'm/d/yy h:mm',
0x16 => 'yyyy/m/d h:mm',
#0x17-0x24 -- Differs in Natinal
0x1E => 'm/d/yy',
0x1F => 'yyyy"年"m"月"d"日"',
0x20 => 'h"時"mm"分"',
0x21 => 'h"時"mm"分"ss"秒"',
#0x17-0x24 -- Differs in Natinal
0x25 => '(#,##0_);(#,##0)',
0x26 => '(#,##0_);[Red](#,##0)',
0x27 => '(#,##0.00);(#,##0.00)',
0x28 => '(#,##0.00);[Red](#,##0.00)',
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
0x2A => '_(\\*#,##0_);_(\\*(#,##0);_(*"-"_);_(@_)',
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
0x2C => '_(\\*#,##0.00_);_(\\*(#,##0.00);_(*"-"??_);_(@_)',
0x2D => 'mm:ss',
0x2E => '[h]:mm:ss',
0x2F => 'mm:ss.0',
0x30 => '##0.0E+0',
0x31 => '@',
0x37 => 'yyyy"年"m"月"',
0x38 => 'm"月"d"日"',
0x39 => 'ge.m.d',
0x3A => 'ggge"年"m"月"d"日"',
);
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
sub new {
my ( $class, %args ) = @_;
my $encoding = $args{Code} || $args{encoding};
my $self = { Code => $encoding };
if($encoding){
$self->{encoding} = find_encoding($encoding eq 'sjis' ? 'cp932' : $encoding)
or do{
require Carp;
Carp::croak(qq{Unknown encoding '$encoding'});
};
}
return bless $self, $class;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $self, $text, $input_encoding ) = @_;
if(!defined $input_encoding){
$input_encoding = 'utf8';
}
elsif($input_encoding eq '_native_'){
$input_encoding = 'cp932'; # Shift_JIS in Microsoft products
}
$text = decode($input_encoding, $text);
return $self->{Code} ? $self->{encoding}->encode($text) : $text;
}
#------------------------------------------------------------------------------
# FmtStringDef (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
sub FmtStringDef {
my ( $self, $format_index, $book ) = @_;
return $self->SUPER::FmtStringDef( $format_index, $book, \%FormatTable );
}
#------------------------------------------------------------------------------
# CnvNengo (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
# Convert A.D. into Japanese Nengo (aka Gengo)
my @Nengo = (
{
name => '平成', # Heisei
abbr_name => 'H',
base => 1988,
start => 19890108,
},
{
name => '昭和', # Showa
abbr_name => 'S',
base => 1925,
start => 19261225,
},
{
name => '大正', # Taisho
abbr_name => 'T',
base => 1911,
start => 19120730,
},
{
name => '明治', # Meiji
abbr_name => 'M',
base => 1867,
start => 18680908,
},
);
# Usage: CnvNengo(name => @tm) or CnvNeng(abbr_name => @tm)
sub CnvNengo {
my ( $kind, @tm ) = @_;
my $year = $tm[5] + 1900;
my $wk = ($year * 10000) + ($tm[4] * 100) + ($tm[3] * 1);
#my $wk = sprintf( '%04d%02d%02d', $year, $tm[4], $tm[3] );
foreach my $nengo(@Nengo){
if( $wk >= $nengo->{start} ){
return $nengo->{$kind} . ($year - $nengo->{base});
}
}
return $year;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_FMTJAPAN
$fatpacked{"Spreadsheet/ParseExcel/FmtJapan2.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTJAPAN2';
package Spreadsheet::ParseExcel::FmtJapan2;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Jcode;
use Unicode::Map;
use base 'Spreadsheet::ParseExcel::FmtJapan';
our $VERSION = '0.65';
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtJapan2)
#------------------------------------------------------------------------------
sub new {
my ( $sPkg, %hKey ) = @_;
my $oMap = Unicode::Map->new('CP932Excel');
die "NO MAP FILE CP932Excel!!"
unless ( -r Unicode::Map->mapping("CP932Excel") );
my $oThis = {
Code => $hKey{Code},
_UniMap => $oMap,
};
bless $oThis;
$oThis->SUPER::new(%hKey);
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan2)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $oThis, $sTxt, $sCode ) = @_;
# $sCode = 'sjis' if((! defined($sCode)) || ($sCode eq '_native_'));
if ( $oThis->{Code} ) {
if ( !defined($sCode) ) {
$sTxt =~ s/(.)/\x00$1/sg;
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
}
elsif ( $sCode eq 'ucs2' ) {
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
}
return Jcode::convert( $sTxt, $oThis->{Code}, 'sjis' );
}
else {
return $sTxt;
}
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_FMTJAPAN2
$fatpacked{"Spreadsheet/ParseExcel/FmtUnicode.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FMTUNICODE';
package Spreadsheet::ParseExcel::FmtUnicode;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Unicode::Map;
use base 'Spreadsheet::ParseExcel::FmtDefault';
our $VERSION = '0.65';
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtUnicode)
#------------------------------------------------------------------------------
sub new {
my ( $sPkg, %hKey ) = @_;
my $sMap = $hKey{Unicode_Map};
my $oMap;
$oMap = Unicode::Map->new($sMap) if $sMap;
my $oThis = {
Unicode_Map => $sMap,
_UniMap => $oMap,
};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtUnicode)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $oThis, $sTxt, $sCode ) = @_;
if ( $oThis->{_UniMap} ) {
if ( !defined($sCode) ) {
my $sSv = $sTxt;
$sTxt =~ s/(.)/\x00$1/sg;
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
$sTxt = $sSv unless ($sTxt);
}
elsif ( $sCode eq 'ucs2' ) {
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
}
# $sTxt = $oThis->{_UniMap}->from_unicode($sTxt)
# if(defined($sCode) && $sCode eq 'ucs2');
return $sTxt;
}
else {
return $sTxt;
}
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_FMTUNICODE
$fatpacked{"Spreadsheet/ParseExcel/Font.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FONT';
package Spreadsheet::ParseExcel::Font;
###############################################################################
#
# Spreadsheet::ParseExcel::Font - A class for Cell fonts.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
sub new {
my ( $class, %rhIni ) = @_;
my $self = \%rhIni;
bless $self, $class;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Font - A class for Cell fonts.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_FONT
$fatpacked{"Spreadsheet/ParseExcel/Format.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_FORMAT';
package Spreadsheet::ParseExcel::Format;
###############################################################################
#
# Spreadsheet::ParseExcel::Format - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
sub new {
my ( $class, %rhIni ) = @_;
my $self = \%rhIni;
bless $self, $class;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Format - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_FORMAT
$fatpacked{"Spreadsheet/ParseExcel/SaveParser.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_SAVEPARSER';
package Spreadsheet::ParseExcel::SaveParser;
###############################################################################
#
# Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseExcel::SaveParser::Workbook;
use Spreadsheet::ParseExcel::SaveParser::Worksheet;
use Spreadsheet::WriteExcel;
use base 'Spreadsheet::ParseExcel';
our $VERSION = '0.65';
###############################################################################
#
# new()
#
sub new {
my ( $package, %params ) = @_;
$package->SUPER::new(%params);
}
###############################################################################
#
# Create()
#
sub Create {
my ( $self, $formatter ) = @_;
#0. New $workbook
my $workbook = Spreadsheet::ParseExcel::Workbook->new();
$workbook->{SheetCount} = 0;
# User specified formatter class.
if ($formatter) {
$workbook->{FmtClass} = $formatter;
}
else {
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new();
}
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook);
}
###############################################################################
#
# Parse()
#
sub Parse {
my ( $self, $sFile, $formatter ) = @_;
my $workbook = $self->SUPER::Parse( $sFile, $formatter );
return undef unless defined $workbook;
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook);
}
###############################################################################
#
# SaveAs()
#
sub SaveAs {
my ( $self, $workbook, $filename ) = @_;
$workbook->SaveAs($filename);
}
1;
__END__
=head1 NAME
Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file.
=head1 SYNOPSIS
Say we start with an Excel file that looks like this:
-----------------------------------------------------
| | A | B | C |
-----------------------------------------------------
| 1 | Hello | ... | ... | ...
| 2 | World | ... | ... | ...
| 3 | *Bold text* | ... | ... | ...
| 4 | ... | ... | ... | ...
| 5 | ... | ... | ... | ...
Then we process it with the following program:
#!/usr/bin/perl
use strict;
use warnings;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseExcel::SaveParser;
# Open an existing file with SaveParser
my $parser = Spreadsheet::ParseExcel::SaveParser->new();
my $template = $parser->Parse('template.xls');
# Get the first worksheet.
my $worksheet = $template->worksheet(0);
my $row = 0;
my $col = 0;
# Overwrite the string in cell A1
$worksheet->AddCell( $row, $col, 'New string' );
# Add a new string in cell B1
$worksheet->AddCell( $row, $col + 1, 'Newer' );
# Add a new string in cell C1 with the format from cell A3.
my $cell = $worksheet->get_cell( $row + 2, $col );
my $format_number = $cell->{FormatNo};
$worksheet->AddCell( $row, $col + 2, 'Newest', $format_number );
# Write over the existing file or write a new file.
$template->SaveAs('newfile.xls');
We should now have an Excel file that looks like this:
-----------------------------------------------------
| | A | B | C |
-----------------------------------------------------
| 1 | New string | Newer | *Newest* | ...
| 2 | World | ... | ... | ...
| 3 | *Bold text* | ... | ... | ...
| 4 | ... | ... | ... | ...
| 5 | ... | ... | ... | ...
=head1 DESCRIPTION
The C<Spreadsheet::ParseExcel::SaveParser> module rewrite an existing Excel file by reading it with C<Spreadsheet::ParseExcel> and rewriting it with C<Spreadsheet::WriteExcel>.
=head1 METHODS
=head1 Parser
=head2 new()
$parse = new Spreadsheet::ParseExcel::SaveParser();
Constructor.
=head2 Parse()
$workbook = $parse->Parse($sFileName);
$workbook = $parse->Parse($sFileName , $formatter);
Returns a L</Workbook> object. If an error occurs, returns undef.
The optional C<$formatter> is a Formatter Class to format the value of cells.
=head1 Workbook
The C<Parse()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Workbook> object.
This is a subclass of the L<Spreadsheet::ParseExcel::Workbook> and has the following methods:
=head2 worksheets()
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook:
for my $worksheet ( $workbook->worksheets() ) {
...
}
=head2 worksheet()
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
Returns C<undef> if the sheet name or index doesn't exist.
=head2 AddWorksheet()
$workbook = $workbook->AddWorksheet($name, %properties);
Create a new Worksheet object of type C<Spreadsheet::ParseExcel::Worksheet>.
The C<%properties> hash contains the properties of new Worksheet.
=head2 AddFont
$workbook = $workbook->AddFont(%properties);
Create new Font object of type C<Spreadsheet::ParseExcel::Font>.
The C<%properties> hash contains the properties of new Font.
=head2 AddFormat
$workbook = $workbook->AddFormat(%properties);
The C<%properties> hash contains the properties of new Font.
=head1 Worksheet
Spreadsheet::ParseExcel::SaveParser::Worksheet
Worksheet is a subclass of Spreadsheet::ParseExcel::Worksheet.
And has these methods :
The C<Worksbook::worksheet()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Worksheet> object.
This is a subclass of the L<Spreadsheet::ParseExcel::Worksheet> and has the following methods:
=head1 AddCell
$workbook = $worksheet->AddCell($row, $col, $value, $format [$encoding]);
Create new Cell object of type C<Spreadsheet::ParseExcel::Cell>.
The C<$format> parameter is the format number rather than a full format object.
To specify just same as another cell,
you can set it like below:
$row = 0;
$col = 0;
$worksheet = $template->worksheet(0);
$cell = $worksheet->get_cell( $row, $col );
$format_number = $cell->{FormatNo};
$worksheet->AddCell($row +1, $coll, 'New data', $format_number);
=head1 TODO
Please note that this module is currently (versions 0.50-0.60) undergoing a major
restructuring and rewriting.
=head1 Known Problems
You can only rewrite the features that Spreadsheet::WriteExcel supports so
macros, graphs and some other features in the original Excel file will be lost.
Also, formulas aren't rewritten, only the result of a formula is written.
Only last print area will remain. (Others will be removed)
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2002 Kawai Takanori and Nippon-RAD Co. OP Division
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_SAVEPARSER
$fatpacked{"Spreadsheet/ParseExcel/SaveParser/Workbook.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKBOOK';
package Spreadsheet::ParseExcel::SaveParser::Workbook;
###############################################################################
#
# Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use base 'Spreadsheet::ParseExcel::Workbook';
our $VERSION = '0.65';
#==============================================================================
# Spreadsheet::ParseExcel::SaveParser::Workbook
#==============================================================================
sub new {
my ( $sPkg, $oBook ) = @_;
return undef unless ( defined $oBook );
my %oThis = %$oBook;
bless \%oThis, $sPkg;
# re-bless worksheets (and set their _Book properties !!!)
my $sWkP = ref($sPkg) || "$sPkg";
$sWkP =~ s/Workbook$/Worksheet/;
map { bless( $_, $sWkP ); } @{ $oThis{Worksheet} };
map { $_->{_Book} = \%oThis; } @{ $oThis{Worksheet} };
return \%oThis;
}
#------------------------------------------------------------------------------
# Parse (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub Parse {
my ( $sClass, $sFile, $oWkFmt ) = @_;
my $oBook = Spreadsheet::ParseExcel::Workbook->Parse( $sFile, $oWkFmt );
bless $oBook, $sClass;
# re-bless worksheets (and set their _Book properties !!!)
my $sWkP = ref($sClass) || "$sClass";
$sWkP =~ s/Workbook$/Worksheet/;
map { bless( $_, $sWkP ); } @{ $oBook->{Worksheet} };
map { $_->{_Book} = $oBook; } @{ $oBook->{Worksheet} };
return $oBook;
}
#------------------------------------------------------------------------------
# SaveAs (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub SaveAs {
my ( $oBook, $sName ) = @_;
# Create a new Excel workbook
my $oWrEx = Spreadsheet::WriteExcel->new($sName);
$oWrEx->compatibility_mode();
my %hFmt;
my $iNo = 0;
my @aAlH = (
'left', 'left', 'center', 'right',
'fill', 'justify', 'merge', 'equal_space'
);
my @aAlV = ( 'top', 'vcenter', 'bottom', 'vjustify', 'vequal_space' );
foreach my $pFmt ( @{ $oBook->{Format} } ) {
my $oFmt = $oWrEx->addformat(); # Add Formats
unless ( $pFmt->{Style} ) {
$hFmt{$iNo} = $oFmt;
my $rFont = $pFmt->{Font};
$oFmt->set_font( $rFont->{Name} );
$oFmt->set_size( $rFont->{Height} );
$oFmt->set_color( $rFont->{Color} );
$oFmt->set_bold( $rFont->{Bold} );
$oFmt->set_italic( $rFont->{Italic} );
$oFmt->set_underline( $rFont->{Underline} );
$oFmt->set_font_strikeout( $rFont->{Strikeout} );
$oFmt->set_font_script( $rFont->{Super} );
$oFmt->set_hidden( $rFont->{Hidden} ); #Add
$oFmt->set_locked( $pFmt->{Lock} );
$oFmt->set_align( $aAlH[ $pFmt->{AlignH} ] );
$oFmt->set_align( $aAlV[ $pFmt->{AlignV} ] );
$oFmt->set_rotation( $pFmt->{Rotate} );
$oFmt->set_num_format(
$oBook->{FmtClass}->FmtStringDef( $pFmt->{FmtIdx}, $oBook ) );
$oFmt->set_text_wrap( $pFmt->{Wrap} );
$oFmt->set_pattern( $pFmt->{Fill}->[0] );
$oFmt->set_fg_color( $pFmt->{Fill}->[1] )
if ( ( $pFmt->{Fill}->[1] >= 8 )
&& ( $pFmt->{Fill}->[1] <= 63 ) );
$oFmt->set_bg_color( $pFmt->{Fill}->[2] )
if ( ( $pFmt->{Fill}->[2] >= 8 )
&& ( $pFmt->{Fill}->[2] <= 63 ) );
$oFmt->set_left(
( $pFmt->{BdrStyle}->[0] > 7 ) ? 3 : $pFmt->{BdrStyle}->[0] );
$oFmt->set_right(
( $pFmt->{BdrStyle}->[1] > 7 ) ? 3 : $pFmt->{BdrStyle}->[1] );
$oFmt->set_top(
( $pFmt->{BdrStyle}->[2] > 7 ) ? 3 : $pFmt->{BdrStyle}->[2] );
$oFmt->set_bottom(
( $pFmt->{BdrStyle}->[3] > 7 ) ? 3 : $pFmt->{BdrStyle}->[3] );
$oFmt->set_left_color( $pFmt->{BdrColor}->[0] )
if ( ( $pFmt->{BdrColor}->[0] >= 8 )
&& ( $pFmt->{BdrColor}->[0] <= 63 ) );
$oFmt->set_right_color( $pFmt->{BdrColor}->[1] )
if ( ( $pFmt->{BdrColor}->[1] >= 8 )
&& ( $pFmt->{BdrColor}->[1] <= 63 ) );
$oFmt->set_top_color( $pFmt->{BdrColor}->[2] )
if ( ( $pFmt->{BdrColor}->[2] >= 8 )
&& ( $pFmt->{BdrColor}->[2] <= 63 ) );
$oFmt->set_bottom_color( $pFmt->{BdrColor}->[3] )
if ( ( $pFmt->{BdrColor}->[3] >= 8 )
&& ( $pFmt->{BdrColor}->[3] <= 63 ) );
}
$iNo++;
}
for ( my $iSheet = 0 ; $iSheet < $oBook->{SheetCount} ; $iSheet++ ) {
my $oWkS = $oBook->{Worksheet}[$iSheet];
my $oWrS = $oWrEx->addworksheet( $oWkS->{Name} );
#Landscape
if ( !$oWkS->{Landscape} ) { # Landscape (0:Horizontal, 1:Vertical)
$oWrS->set_landscape();
}
else {
$oWrS->set_portrait();
}
#Protect
if ( defined $oWkS->{Protect} )
{ # Protect ('':NoPassword, Password:Password)
if ( $oWkS->{Protect} ne '' ) {
$oWrS->protect( $oWkS->{Protect} );
}
else {
$oWrS->protect();
}
}
if ( $oWkS->{Scale} != 100 ) {
# Pages on fit with width and Heigt
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} );
#Print Scale and reset FitWidth/FitHeight
$oWrS->set_print_scale( $oWkS->{Scale} );
}
else {
#Print Scale
$oWrS->set_print_scale( $oWkS->{Scale} );
# Pages on fit with width and Heigt
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} );
}
# Paper Size
$oWrS->set_paper( $oWkS->{PaperSize} );
# Margin
$oWrS->set_margin_left( $oWkS->{LeftMargin} );
$oWrS->set_margin_right( $oWkS->{RightMargin} );
$oWrS->set_margin_top( $oWkS->{TopMargin} );
$oWrS->set_margin_bottom( $oWkS->{BottomMargin} );
# HCenter
$oWrS->center_horizontally() if ( $oWkS->{HCenter} );
# VCenter
$oWrS->center_vertically() if ( $oWkS->{VCenter} );
# Header, Footer
$oWrS->set_header( $oWkS->{Header}, $oWkS->{HeaderMargin} );
$oWrS->set_footer( $oWkS->{Footer}, $oWkS->{FooterMargin} );
# Print Area
if ( ref( $oBook->{PrintArea}[$iSheet] ) eq 'ARRAY' ) {
my $raP;
for $raP ( @{ $oBook->{PrintArea}[$iSheet] } ) {
$oWrS->print_area(@$raP);
}
}
# Print Title
my $raW;
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Row} } ) {
$oWrS->repeat_rows(@$raW);
}
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Column} } ) {
$oWrS->repeat_columns(@$raW);
}
# Print Gridlines
if ( $oWkS->{PrintGrid} == 1 ) {
$oWrS->hide_gridlines(0);
}
else {
$oWrS->hide_gridlines(1);
}
# Print Headings
if ( $oWkS->{PrintHeaders} ) {
$oWrS->print_row_col_headers();
}
# Horizontal Page Breaks
$oWrS->set_h_pagebreaks( @{ $oWkS->{HPageBreak} } );
# Veritical Page Breaks
$oWrS->set_v_pagebreaks( @{ $oWkS->{VPageBreak} } );
# PageStart => $oWkS->{PageStart}, # Page number for start
# UsePage => $oWkS->{UsePage}, # Use own start page number
# NoColor => $oWkS->{NoColor}, # Print in black-white
# Draft => $oWkS->{Draft}, # Print in draft mode
# Notes => $oWkS->{Notes}, # Print notes
# LeftToRight => $oWkS->{LeftToRight}, # Left to Right
for (
my $iC = $oWkS->{MinCol} ;
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ;
$iC++
)
{
if ( defined $oWkS->{ColWidth}[$iC] ) {
if ( $oWkS->{ColWidth}[$iC] > 0 ) {
$oWrS->set_column( $iC, $iC, $oWkS->{ColWidth}[$iC] )
; #, undef, 1) ;
}
else {
$oWrS->set_column( $iC, $iC, 0, undef, 1 );
}
}
}
my $merged_areas = $oWkS->get_merged_areas();
my $merged_areas_h = {};
if ($merged_areas) {
foreach my $range (@$merged_areas) {
$merged_areas_h->{$range->[0]}{$range->[1]} = $range;
}
}
for (
my $iR = $oWkS->{MinRow} ;
defined $oWkS->{MaxRow} && $iR <= $oWkS->{MaxRow} ;
$iR++
)
{
$oWrS->set_row( $iR, $oWkS->{RowHeight}[$iR] );
for (
my $iC = $oWkS->{MinCol} ;
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ;
$iC++
)
{
my $oWkC = $oWkS->{Cells}[$iR][$iC];
if ($oWkC) {
if ( $oWkC->{Merged} and exists $merged_areas_h->{$iR}{$iC} ) {
my $oFmtN = $oWrEx->addformat();
$oFmtN->copy( $hFmt{ $oWkC->{FormatNo} } );
$oWrS->merge_range (
@{$merged_areas_h->{$iR}{$iC}},
$oBook->{FmtClass}
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ),
$oFmtN
);
}
else {
$oWrS->write(
$iR,
$iC,
$oBook->{FmtClass}
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ),
$hFmt{ $oWkC->{FormatNo} }
);
}
}
}
}
}
return $oWrEx;
}
#------------------------------------------------------------------------------
# AddWorksheet (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddWorksheet {
my ( $oBook, $sName, %hAttr ) = @_;
$oBook->AddFormat if ( $#{ $oBook->{Format} } < 0 );
$hAttr{Name} ||= $sName;
$hAttr{LeftMargin} ||= 0;
$hAttr{RightMargin} ||= 0;
$hAttr{TopMargin} ||= 0;
$hAttr{BottomMargin} ||= 0;
$hAttr{HeaderMargin} ||= 0;
$hAttr{FooterMargin} ||= 0;
$hAttr{FitWidth} ||= 0;
$hAttr{FitHeight} ||= 0;
$hAttr{PrintGrid} ||= 0;
my $oWkS = Spreadsheet::ParseExcel::SaveParser::Worksheet->new(%hAttr);
$oWkS->{_Book} = $oBook;
$oWkS->{_SheetNo} = $oBook->{SheetCount};
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = $oWkS;
$oBook->{SheetCount}++;
return $oWkS; #$oBook->{SheetCount} - 1;
}
#------------------------------------------------------------------------------
# AddFont (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddFont {
my ( $oBook, %hAttr ) = @_;
$hAttr{Name} ||= 'Arial';
$hAttr{Height} ||= 10;
$hAttr{Bold} ||= 0;
$hAttr{Italic} ||= 0;
$hAttr{Underline} ||= 0;
$hAttr{Strikeout} ||= 0;
$hAttr{Super} ||= 0;
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new(%hAttr);
return $#{ $oBook->{Font} };
}
#------------------------------------------------------------------------------
# AddFormat (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddFormat {
my ( $oBook, %hAttr ) = @_;
$hAttr{Fill} ||= [ 0, 0, 0 ];
$hAttr{BdrStyle} ||= [ 0, 0, 0, 0 ];
$hAttr{BdrColor} ||= [ 0, 0, 0, 0 ];
$hAttr{AlignH} ||= 0;
$hAttr{AlignV} ||= 0;
$hAttr{Rotate} ||= 0;
$hAttr{Landscape} ||= 0;
$hAttr{FmtIdx} ||= 0;
if ( !defined( $hAttr{Font} ) ) {
my $oFont;
if ( defined $hAttr{FontNo} ) {
$oFont = $oBook->{Font}[ $hAttr{FontNo} ];
}
elsif ( !defined $oFont ) {
if ( $#{ $oBook->{Font} } >= 0 ) {
$oFont = $oBook->{Font}[0];
}
else {
my $iNo = $oBook->AddFont;
$oFont = $oBook->{Font}[$iNo];
}
}
$hAttr{Font} = $oFont;
}
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new(%hAttr);
return $#{ $oBook->{Format} };
}
#------------------------------------------------------------------------------
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddCell {
my ( $oBook, $iSheet, $iR, $iC, $sVal, $oCell, $sCode ) = @_;
my %rhKey;
$oCell ||= $oBook->{Worksheet}[$iSheet]
->{Cells}[$iR][$iC]->{FormatNo} || 0;
my $iFmt =
( UNIVERSAL::isa( $oCell, 'Spreadsheet::ParseExcel::Cell' ) )
? $oCell->{FormatNo}
: ( ref($oCell) ) ? 0
: $oCell + 0;
$rhKey{FormatNo} = $iFmt;
$rhKey{Format} = $oBook->{Format}[$iFmt];
$rhKey{Val} = $sVal;
$rhKey{Code} = $sCode || '_native_';
$oBook->{_CurSheet} = $iSheet;
my $merged_areas = $oBook->{Worksheet}[$iSheet]->get_merged_areas();
my $merged_areas_h = {};
if ($merged_areas) {
foreach my $range (@$merged_areas) {
$merged_areas_h->{$range->[0]}{$range->[1]} = $range;
}
}
my $oNewCell =
Spreadsheet::ParseExcel::_NewCell( $oBook, $iR, $iC, %rhKey );
Spreadsheet::ParseExcel::_SetDimension( $oBook, $iR, $iC, $iC );
$oNewCell->{Merged} = 1
if exists $merged_areas_h->{$iR}{$iC};
return $oNewCell;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKBOOK
$fatpacked{"Spreadsheet/ParseExcel/SaveParser/Worksheet.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKSHEET';
package Spreadsheet::ParseExcel::SaveParser::Worksheet;
###############################################################################
#
# Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
#==============================================================================
# Spreadsheet::ParseExcel::SaveParser::Worksheet
#==============================================================================
use base 'Spreadsheet::ParseExcel::Worksheet';
our $VERSION = '0.65';
sub new {
my ( $sClass, %rhIni ) = @_;
$sClass->SUPER::new(%rhIni); # returns object
}
#------------------------------------------------------------------------------
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Worksheet)
#------------------------------------------------------------------------------
sub AddCell {
my ( $oSelf, $iR, $iC, $sVal, $oCell, $sCode ) = @_;
$oSelf->{_Book}
->AddCell( $oSelf->{_SheetNo}, $iR, $iC, $sVal, $oCell, $sCode );
}
#------------------------------------------------------------------------------
# Protect (for Spreadsheet::ParseExcel::SaveParser::Worksheet)
# - Password = undef -> No protect
# - Password = '' -> Protected. No password
# - Password = $pwd -> Protected. Password = $pwd
#------------------------------------------------------------------------------
sub Protect {
my ( $oSelf, $sPassword ) = @_;
$oSelf->{Protect} = $sPassword;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_SAVEPARSER_WORKSHEET
$fatpacked{"Spreadsheet/ParseExcel/Utility.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_UTILITY';
package Spreadsheet::ParseExcel::Utility;
###############################################################################
#
# Spreadsheet::ParseExcel::Utility - Utility functions for ParseExcel.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
require Exporter;
use vars qw(@ISA @EXPORT_OK);
@ISA = qw(Exporter);
@EXPORT_OK = qw(ExcelFmt LocaltimeExcel ExcelLocaltime
col2int int2col sheetRef xls2csv);
our $VERSION = '0.65';
my $qrNUMBER = qr/(^[+-]?\d+(\.\d+)?$)|(^[+-]?\d+\.?(\d*)[eE][+-](\d+))$/;
###############################################################################
#
# ExcelFmt()
#
# This function takes an Excel style number format and converts a number into
# that format. for example: 'hh:mm:ss AM/PM' + 0.01023148 = '12:14:44 AM'.
#
# It does this with a type of templating mechanism. The format string is parsed
# to identify tokens that need to be replaced and their position within the
# string is recorded. These can be thought of as placeholders. The number is
# then converted to the required formats and substituted into the placeholders.
#
# Interested parties should refer to the Excel documentation on cell formats for
# more information: http://office.microsoft.com/en-us/excel/HP051995001033.aspx
# The Microsoft documentation for the Excel Binary File Format, [MS-XLS].pdf,
# also contains a ABNF grammar for number format strings.
#
# Maintainers notes:
# ==================
#
# Note on format subsections:
# A format string can contain 4 possible sub-sections separated by semi-colons:
# Positive numbers, negative numbers, zero values, and text.
# For example: _(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)
#
# Note on conditional formats.
# A number format in Excel can have a conditional expression such as:
# [>9999999](000)000-0000;000-0000
# This is equivalent to the following in Perl:
# $format = $number > 9999999 ? '(000)000-0000' : '000-0000';
# Nested conditionals are also possible but we don't support them.
#
# Efficiency: The excessive use of substr() isn't very efficient. However,
# it probably doesn't merit rewriting this function with a parser or regular
# expressions and \G.
#
# TODO: I think the single quote handling may not be required. Check.
#
sub ExcelFmt {
my ( $format_str, $number, $is_1904, $number_type, $want_subformats ) = @_;
# Return text strings without further formatting.
return $number unless $number =~ $qrNUMBER;
# Handle OpenOffice.org GENERAL format.
$format_str = '@' if uc($format_str) eq "GENERAL";
# Check for a conditional at the start of the format. See notes above.
my $conditional;
if ( $format_str =~ /^\[([<>=][^\]]+)\](.*)$/ ) {
$conditional = $1;
$format_str = $2;
}
# Ignore the underscore token which is used to indicate a padding space.
$format_str =~ s/_/ /g;
# Split the format string into 4 possible sub-sections: positive numbers,
# negative numbers, zero values, and text. See notes above.
my @formats;
my $section = 0;
my $double_quote = 0;
my $single_quote = 0;
# Initial parsing of the format string to remove escape characters. This
# also handles quoted strings. See note about single quotes above.
CHARACTER:
for my $char ( split //, $format_str ) {
if ( $double_quote or $single_quote ) {
$formats[$section] .= $char;
$double_quote = 0 if $char eq '"';
$single_quote = 0;
next CHARACTER;
}
if ( $char eq ';' ) {
$section++;
next CHARACTER;
}
elsif ( $char eq '"' ) {
$double_quote = 1;
}
elsif ( $char eq '!' ) {
$single_quote = 1;
}
elsif ( $char eq '\\' ) {
$single_quote = 1;
}
elsif ( $char eq '(' ) {
next CHARACTER; # Ignore.
}
elsif ( $char eq ')' ) {
next CHARACTER; # Ignore.
}
# Convert upper case OpenOffice.org date/time formats to lowercase..
$char = lc($char) if $char =~ /[DMYHS]/;
$formats[$section] .= $char;
}
# Select the appropriate format from the 4 possible sub-sections:
# positive numbers, negative numbers, zero values, and text.
# We ignore the Text section since non-numeric values are returned
# unformatted at the start of the function.
my $format;
$section = 0;
if ( @formats == 1 ) {
$section = 0;
}
elsif ( @formats == 2 ) {
if ( $number < 0 ) {
$section = 1;
}
else {
$section = 0;
}
}
elsif ( @formats == 3 ) {
if ( $number == 0 ) {
$section = 2;
}
elsif ( $number < 0 ) {
$section = 1;
}
else {
$section = 0;
}
}
else {
$section = 0;
}
# Override the previous choice if the format is conditional.
if ($conditional) {
# TODO. Replace string eval with a function.
$section = eval "$number $conditional" ? 0 : 1;
}
# We now have the required format.
$format = $formats[$section];
# The format string can contain one of the following colours:
# [Black] [Blue] [Cyan] [Green] [Magenta] [Red] [White] [Yellow]
# or the string [ColorX] where x is a colour index from 1 to 56.
# We don't use the colour but we return it to the caller.
#
my $color = '';
if ( $format =~ s/^(\[[A-Za-z]{3,}(\d{1,2})?\])// ) {
$color = $1;
}
# Remove the locale, such as [$-409], from the format string.
my $locale = '';
if ( $format =~ s/^(\[\$?-F?\d+\])// ) {
$locale = $1;
}
# Replace currency locale, such as [$$-409], with $ in the format string.
# See the RT#60547 test cases in 21_number_format_user.t.
if ( $format =~ s/(\[\$([^-]+)(-\d+)?\])/$2/s ) {
$locale = $1;
}
# Remove leading # from '# ?/?', '# ??/??' fraction formats.
$format =~ s{# \?}{?}g;
# Parse the format string and create an AoA of placeholders that contain
# the parts of the string to be replaced. The format of the information
# stored is: [ $token, $start_pos, $end_pos, $option_info ].
#
my $format_mode = ''; # Either: '', 'number', 'date'
my $pos = 0; # Character position within format string.
my @placeholders = (); # Arefs with parts of the format to be replaced.
my $token = ''; # The actual format extracted from the total str.
my $start_pos; # A position variable. Initial parser position.
my $token_start = -1; # A position variable.
my $decimal_pos = -1; # Position of the punctuation char "." or ",".
my $comma_count = 0; # Count of the commas in the format.
my $is_fraction = 0; # Number format is a fraction.
my $is_currency = 0; # Number format is a currency.
my $is_percent = 0; # Number format is a percentage.
my $is_12_hour = 0; # Time format is using 12 hour clock.
my $seen_dot = 0; # Treat only the first "." as the decimal point.
# Parse the format.
PARSER:
while ( $pos < length $format ) {
$start_pos = $pos;
my $char = substr( $format, $pos, 1 );
# Ignore control format characters such as '#0+-.?eE,%'. However,
# only ignore '.' if it is the first one encountered. RT 45502.
if ( ( !$seen_dot && $char !~ /[#0\+\-\.\?eE\,\%]/ )
|| $char !~ /[#0\+\-\?eE\,\%]/ )
{
if ( $token_start != -1 ) {
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start ),
$decimal_pos, $pos - $token_start
];
$token_start = -1;
}
}
# Processing for quoted strings within the format. See notes above.
if ( $char eq '"' ) {
$double_quote = $double_quote ? 0 : 1;
$pos++;
next PARSER;
}
elsif ( $char eq '!' ) {
$single_quote = 1;
$pos++;
next PARSER;
}
elsif ( $char eq '\\' ) {
if ( $single_quote != 1 ) {
$single_quote = 1;
$pos++;
next PARSER;
}
}
if ( ( defined($double_quote) and ($double_quote) )
or ( defined($single_quote) and ($single_quote) )
or ( $seen_dot && $char eq '.' ) )
{
$single_quote = 0;
if (
( $format_mode ne 'date' )
and ( ( substr( $format, $pos, 2 ) eq "\x81\xA2" )
|| ( substr( $format, $pos, 2 ) eq "\x81\xA3" )
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA4" )
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA5" ) )
)
{
# The above matches are currency symbols.
push @placeholders,
[ substr( $format, $pos, 2 ), length($token), 2 ];
$is_currency = 1;
$pos += 2;
}
else {
$pos++;
}
}
elsif (
( $char =~ /[#0\+\.\?eE\,\%]/ )
|| ( ( $format_mode ne 'date' )
and ( ( $char eq '-' ) || ( $char eq '(' ) || ( $char eq ')' ) )
)
)
{
$format_mode = 'number' unless $format_mode;
if ( substr( $format, $pos, 1 ) =~ /[#0]/ ) {
if (
substr( $format, $pos ) =~
/^([#0]+[\.]?[0#]*[eE][\+\-][0#]+)/ )
{
push @placeholders, [ $1, $pos, length($1) ];
$pos += length($1);
}
else {
if ( $token_start == -1 ) {
$token_start = $pos;
$decimal_pos = length($token);
}
}
}
elsif ( substr( $format, $pos, 1 ) eq '?' ) {
# Look for a fraction format like ?/? or ??/??
if ( $token_start != -1 ) {
push @placeholders,
[
substr(
$format, $token_start, $pos - $token_start + 1
),
$decimal_pos,
$pos - $token_start + 1
];
}
$token_start = $pos;
# Find the end of the fraction format.
FRACTION:
while ( $pos < length($format) ) {
if ( substr( $format, $pos, 1 ) eq '/' ) {
$is_fraction = 1;
}
elsif ( substr( $format, $pos, 1 ) eq '?' ) {
$pos++;
next FRACTION;
}
else {
if ( $is_fraction
&& ( substr( $format, $pos, 1 ) =~ /[0-9]/ ) )
{
# TODO: Could invert if() logic and remove this.
$pos++;
next FRACTION;
}
else {
last FRACTION;
}
}
$pos++;
}
$pos--;
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start + 1 ),
length($token), $pos - $token_start + 1
];
$token_start = -1;
}
elsif ( substr( $format, $pos, 3 ) =~ /^[eE][\+\-][0#]$/ ) {
if ( substr( $format, $pos ) =~ /([eE][\+\-][0#]+)/ ) {
push @placeholders, [ $1, $pos, length($1) ];
$pos += length($1);
}
$token_start = -1;
}
else {
if ( $token_start != -1 ) {
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start ),
$decimal_pos, $pos - $token_start
];
$token_start = -1;
}
if ( substr( $format, $pos, 1 ) =~ /[\+\-]/ ) {
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
$is_currency = 1;
}
elsif ( substr( $format, $pos, 1 ) eq '.' ) {
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
$seen_dot = 1;
}
elsif ( substr( $format, $pos, 1 ) eq ',' ) {
$comma_count++;
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
}
elsif ( substr( $format, $pos, 1 ) eq '%' ) {
$is_percent = 1;
}
elsif (( substr( $format, $pos, 1 ) eq '(' )
|| ( substr( $format, $pos, 1 ) eq ')' ) )
{
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
$is_currency = 1;
}
}
$pos++;
}
elsif ( $char =~ /[ymdhsapg]/i ) {
$format_mode = 'date' unless $format_mode;
if ( substr( $format, $pos, 5 ) =~ /am\/pm/i ) {
push @placeholders, [ 'am/pm', length($token), 5 ];
$is_12_hour = 1;
$pos += 5;
}
elsif ( substr( $format, $pos, 3 ) =~ /a\/p/i ) {
push @placeholders, [ 'a/p', length($token), 3 ];
$is_12_hour = 1;
$pos += 3;
}
elsif ( substr( $format, $pos, 5 ) eq 'mmmmm' ) {
push @placeholders, [ 'mmmmm', length($token), 5 ];
$pos += 5;
}
elsif (( substr( $format, $pos, 4 ) eq 'mmmm' )
|| ( substr( $format, $pos, 4 ) eq 'dddd' )
|| ( substr( $format, $pos, 4 ) eq 'yyyy' )
|| ( substr( $format, $pos, 4 ) eq 'ggge' ) )
{
push @placeholders,
[ substr( $format, $pos, 4 ), length($token), 4 ];
$pos += 4;
}
elsif (( substr( $format, $pos, 3 ) eq 'ddd' )
|| ( substr( $format, $pos, 3 ) eq 'mmm' )
|| ( substr( $format, $pos, 3 ) eq 'yyy' ) )
{
push @placeholders,
[ substr( $format, $pos, 3 ), length($token), 3 ];
$pos += 3;
}
elsif (( substr( $format, $pos, 2 ) eq 'yy' )
|| ( substr( $format, $pos, 2 ) eq 'mm' )
|| ( substr( $format, $pos, 2 ) eq 'dd' )
|| ( substr( $format, $pos, 2 ) eq 'hh' )
|| ( substr( $format, $pos, 2 ) eq 'ss' )
|| ( substr( $format, $pos, 2 ) eq 'ge' ) )
{
if (
( substr( $format, $pos, 2 ) eq 'mm' )
&& (@placeholders)
&& ( ( $placeholders[-1]->[0] eq 'h' )
or ( $placeholders[-1]->[0] eq 'hh' ) )
)
{
# For this case 'm' is minutes not months.
push @placeholders, [ 'mm', length($token), 2, 'minutes' ];
}
else {
push @placeholders,
[ substr( $format, $pos, 2 ), length($token), 2 ];
}
if ( ( substr( $format, $pos, 2 ) eq 'ss' )
&& ( @placeholders > 1 ) )
{
if ( ( $placeholders[-2]->[0] eq 'm' )
|| ( $placeholders[-2]->[0] eq 'mm' ) )
{
# For this case 'm' is minutes not months.
push( @{ $placeholders[-2] }, 'minutes' );
}
}
$pos += 2;
}
elsif (( substr( $format, $pos, 1 ) eq 'm' )
|| ( substr( $format, $pos, 1 ) eq 'd' )
|| ( substr( $format, $pos, 1 ) eq 'h' )
|| ( substr( $format, $pos, 1 ) eq 's' ) )
{
if (
( substr( $format, $pos, 1 ) eq 'm' )
&& (@placeholders)
&& ( ( $placeholders[-1]->[0] eq 'h' )
or ( $placeholders[-1]->[0] eq 'hh' ) )
)
{
# For this case 'm' is minutes not months.
push @placeholders, [ 'm', length($token), 1, 'minutes' ];
}
else {
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
}
if ( ( substr( $format, $pos, 1 ) eq 's' )
&& ( @placeholders > 1 ) )
{
if ( ( $placeholders[-2]->[0] eq 'm' )
|| ( $placeholders[-2]->[0] eq 'mm' ) )
{
# For this case 'm' is minutes not months.
push( @{ $placeholders[-2] }, 'minutes' );
}
}
$pos += 1;
}
}
elsif ( ( substr( $format, $pos, 3 ) eq '[h]' ) ) {
$format_mode = 'date' unless $format_mode;
push @placeholders, [ '[h]', length($token), 3 ];
$pos += 3;
}
elsif ( ( substr( $format, $pos, 4 ) eq '[mm]' ) ) {
$format_mode = 'date' unless $format_mode;
push @placeholders, [ '[mm]', length($token), 4 ];
$pos += 4;
}
elsif ( $char eq '@' ) {
push @placeholders, [ '@', length($token), 1 ];
$pos++;
}
elsif ( $char eq '*' ) {
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
}
else {
$pos++;
}
$pos++ if ( $pos == $start_pos ); #No Format match
$token .= substr( $format, $start_pos, $pos - $start_pos );
} # End of parsing.
# Copy the located format string to a result string that we will perform
# the substitutions on and return to the user.
my $result = $token;
# Add a placeholder between the decimal/comma and end of the token, if any.
if ( $token_start != -1 ) {
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start + 1 ),
$decimal_pos, $pos - $token_start + 1
];
}
#
# In the next sections we process date, number and text formats. We take a
# format such as yyyy/mm/dd and replace it with something like 2008/12/25.
#
if ( ( $format_mode eq 'date' ) && ( $number =~ $qrNUMBER ) ) {
# The maximum allowable date in Excel is 9999-12-31T23:59:59.000 which
# equates to 2958465.999+ in the 1900 epoch and 2957003.999+ in the
# 1904 epoch. We use 0 as the minimum in both epochs. The 1904 system
# actually supports negative numbers but that isn't worth the effort.
my $min_date = 0;
my $max_date = 2958466;
$max_date = 2957004 if $is_1904;
if ( $number < $min_date || $number >= $max_date ) {
return $number; # Return unformatted number.
}
# Process date formats.
my @time = ExcelLocaltime( $number, $is_1904 );
# 0 1 2 3 4 5 6 7
my ( $sec, $min, $hour, $day, $month, $year, $wday, $msec ) = @time;
$month++; # localtime() zero indexed month.
$year += 1900; # localtime() year.
my @full_month_name = qw(
None January February March April May June July
August September October November December
);
my @short_month_name = qw(
None Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
);
my @full_day_name = qw(
Sunday Monday Tuesday Wednesday Thursday Friday Saturday
);
my @short_day_name = qw(
Sun Mon Tue Wed Thu Fri Sat
);
# Replace the placeholders in the template such as yyyy mm dd with
# actual numbers or strings.
my $replacement;
for my $placeholder ( reverse @placeholders ) {
if ( $placeholder->[-1] eq 'minutes' ) {
# For this case 'm/mm' is minutes not months.
if ( $placeholder->[0] eq 'mm' ) {
$replacement = sprintf( "%02d", $min );
}
else {
$replacement = sprintf( "%d", $min );
}
}
elsif ( $placeholder->[0] eq 'yyyy' ) {
# 4 digit Year. 2000 -> 2000.
$replacement = sprintf( '%04d', $year );
}
elsif ( $placeholder->[0] eq 'yy' ) {
# 2 digit Year. 2000 -> 00.
$replacement = sprintf( '%02d', $year % 100 );
}
elsif ( $placeholder->[0] eq 'mmmmm' ) {
# First character of the month name. 1 -> J.
$replacement = substr( $short_month_name[$month], 0, 1 );
}
elsif ( $placeholder->[0] eq 'mmmm' ) {
# Full month name. 1 -> January.
$replacement = $full_month_name[$month];
}
elsif ( $placeholder->[0] eq 'mmm' ) {
# Short month name. 1 -> Jan.
$replacement = $short_month_name[$month];
}
elsif ( $placeholder->[0] eq 'mm' ) {
# 2 digit month. 1 -> 01.
$replacement = sprintf( '%02d', $month );
}
elsif ( $placeholder->[0] eq 'm' ) {
# 1 digit month. 1 -> 1.
$replacement = sprintf( '%d', $month );
}
elsif ( $placeholder->[0] eq 'dddd' ) {
# Full day name. Wednesday (for example.)
$replacement = $full_day_name[$wday];
}
elsif ( $placeholder->[0] eq 'ddd' ) {
# Short day name. Wed (for example.)
$replacement = $short_day_name[$wday];
}
elsif ( $placeholder->[0] eq 'dd' ) {
# 2 digit day. 1 -> 01.
$replacement = sprintf( '%02d', $day );
}
elsif ( $placeholder->[0] eq 'd' ) {
# 1 digit day. 1 -> 1.
$replacement = sprintf( '%d', $day );
}
elsif ( $placeholder->[0] eq 'hh' ) {
# 2 digit hour.
if ($is_12_hour) {
my $hour_tmp = $hour % 12;
$hour_tmp = 12 if $hour % 12 == 0;
$replacement = sprintf( '%d', $hour_tmp );
}
else {
$replacement = sprintf( '%02d', $hour );
}
}
elsif ( $placeholder->[0] eq 'h' ) {
# 1 digit hour.
if ($is_12_hour) {
my $hour_tmp = $hour % 12;
$hour_tmp = 12 if $hour % 12 == 0;
$replacement = sprintf( '%2d', $hour_tmp );
}
else {
$replacement = sprintf( '%d', $hour );
}
}
elsif ( $placeholder->[0] eq 'ss' ) {
# 2 digit seconds.
$replacement = sprintf( '%02d', $sec );
}
elsif ( $placeholder->[0] eq 's' ) {
# 1 digit seconds.
$replacement = sprintf( '%d', $sec );
}
elsif ( $placeholder->[0] eq 'am/pm' ) {
# AM/PM.
$replacement = ( $hour >= 12 ) ? 'PM' : 'AM';
}
elsif ( $placeholder->[0] eq 'a/p' ) {
# AM/PM.
$replacement = ( $hour >= 12 ) ? 'P' : 'A';
}
elsif ( $placeholder->[0] eq '.' ) {
# Decimal point for seconds.
$replacement = '.';
}
elsif ( $placeholder->[0] =~ /(^0+$)/ ) {
# Milliseconds. For example h:ss.000.
my $length = length($1);
$replacement =
substr( sprintf( "%.${length}f", $msec / 1000 ), 2, $length );
}
elsif ( $placeholder->[0] eq '[h]' ) {
# Hours modulus 24. 25 displays as 25 not as 1.
$replacement = sprintf( '%d', int($number) * 24 + $hour );
}
elsif ( $placeholder->[0] eq '[mm]' ) {
# Mins modulus 60. 72 displays as 72 not as 12.
$replacement =
sprintf( '%d', ( int($number) * 24 + $hour ) * 60 + $min );
}
elsif ( $placeholder->[0] eq 'ge' ) {
require Spreadsheet::ParseExcel::FmtJapan;
# Japanese Nengo (aka Gengo) in initialism (abbr. name)
$replacement =
Spreadsheet::ParseExcel::FmtJapan::CnvNengo( abbr_name => @time );
}
elsif ( $placeholder->[0] eq 'ggge' ) {
require Spreadsheet::ParseExcel::FmtJapan;
# Japanese Nengo (aka Gengo) in Kanji (full name)
$replacement =
Spreadsheet::ParseExcel::FmtJapan::CnvNengo( name => @time );
}
elsif ( $placeholder->[0] eq '@' ) {
# Text format.
$replacement = $number;
}
elsif ( $placeholder->[0] eq ',' ) {
next;
}
# Substitute the replacement string back into the template.
substr( $result, $placeholder->[1], $placeholder->[2],
$replacement );
}
}
elsif ( ( $format_mode eq 'number' ) && ( $number =~ $qrNUMBER ) ) {
# Process non date formats.
if (@placeholders) {
while ( $placeholders[-1]->[0] eq ',' ) {
$comma_count--;
substr(
$result,
$placeholders[-1]->[1],
$placeholders[-1]->[2], ''
);
$number /= 1000;
pop @placeholders;
}
my $number_format = join( '', map { $_->[0] } @placeholders );
my $number_result;
my $str_length = 0;
my $engineering = 0;
my $is_decimal = 0;
my $is_integer = 0;
my $after_decimal = undef;
for my $token ( split //, $number_format ) {
if ( $token eq '.' ) {
$str_length++;
$is_decimal = 1;
}
elsif ( ( $token eq 'E' ) || ( $token eq 'e' ) ) {
$engineering = 1;
}
elsif ( $token eq '0' ) {
$str_length++;
$after_decimal++ if $is_decimal;
$is_integer = 1;
}
elsif ( $token eq '#' ) {
$after_decimal++ if $is_decimal;
$is_integer = 1;
}
elsif ( $token eq '?' ) {
$after_decimal++ if $is_decimal;
}
}
$number *= 100.0 if $is_percent;
my $data = ($is_currency) ? abs($number) : $number + 0;
if ($is_fraction) {
$number_result = sprintf( "%0${str_length}d", int($data) );
}
else {
if ($is_decimal) {
if ( defined $after_decimal ) {
$number_result =
sprintf "%0${str_length}.${after_decimal}f", $data;
}
else {
$number_result = sprintf "%0${str_length}f", $data;
}
# Fix for Perl and sprintf not rounding up like Excel.
# http://rt.cpan.org/Public/Bug/Display.html?id=45626
if ( $data =~ /^${number_result}5/ ) {
$number_result =
sprintf "%0${str_length}.${after_decimal}f",
$data . '1';
}
}
else {
$number_result = sprintf( "%0${str_length}.0f", $data );
}
}
$number_result = AddComma($number_result) if $comma_count > 0;
my $number_length = length($number_result);
my $decimal_pos = -1;
my $replacement;
for ( my $i = @placeholders - 1 ; $i >= 0 ; $i-- ) {
my $placeholder = $placeholders[$i];
if ( $placeholder->[0] =~
/([#0]*)([\.]?)([0#]*)([eE])([\+\-])([0#]+)/ )
{
substr( $result, $placeholder->[1], $placeholder->[2],
MakeE( $placeholder->[0], $number ) );
}
elsif ( $placeholder->[0] =~ /\// ) {
substr( $result, $placeholder->[1], $placeholder->[2],
MakeFraction( $placeholder->[0], $number, $is_integer )
);
}
elsif ( $placeholder->[0] eq '.' ) {
$number_length--;
$decimal_pos = $number_length;
}
elsif ( $placeholder->[0] eq '+' ) {
substr( $result, $placeholder->[1], $placeholder->[2],
( $number > 0 )
? '+'
: ( ( $number == 0 ) ? '+' : '-' ) );
}
elsif ( $placeholder->[0] eq '-' ) {
substr( $result, $placeholder->[1], $placeholder->[2],
( $number > 0 )
? ''
: ( ( $number == 0 ) ? '' : '-' ) );
}
elsif ( $placeholder->[0] eq '@' ) {
substr( $result, $placeholder->[1], $placeholder->[2],
$number );
}
elsif ( $placeholder->[0] eq '*' ) {
substr( $result, $placeholder->[1], $placeholder->[2], '' );
}
elsif (( $placeholder->[0] eq "\xA2\xA4" )
or ( $placeholder->[0] eq "\xA2\xA5" )
or ( $placeholder->[0] eq "\x81\xA2" )
or ( $placeholder->[0] eq "\x81\xA3" ) )
{
substr(
$result, $placeholder->[1],
$placeholder->[2], $placeholder->[0]
);
}
elsif (( $placeholder->[0] eq '(' )
or ( $placeholder->[0] eq ')' ) )
{
substr(
$result, $placeholder->[1],
$placeholder->[2], $placeholder->[0]
);
}
else {
if ( $number_length > 0 ) {
if ( $i <= 0 ) {
$replacement =
substr( $number_result, 0, $number_length );
$number_length = 0;
}
else {
my $real_part_length = length( $placeholder->[0] );
if ( $decimal_pos >= 0 ) {
my $format = $placeholder->[0];
$format =~ s/^#+//;
$real_part_length = length $format;
$real_part_length =
( $number_length <= $real_part_length )
? $number_length
: $real_part_length;
}
else {
$real_part_length =
( $number_length <= $real_part_length )
? $number_length
: $real_part_length;
}
$replacement =
substr( $number_result,
$number_length - $real_part_length,
$real_part_length );
$number_length -= $real_part_length;
}
}
else {
$replacement = '';
}
substr( $result, $placeholder->[1], $placeholder->[2],
"\x00" . $replacement );
}
}
$replacement =
( $number_length > 0 )
? substr( $number_result, 0, $number_length )
: '';
$result =~ s/\x00/$replacement/;
$result =~ s/\x00//g;
}
}
else {
# Process text formats
my $is_text = 0;
for ( my $i = @placeholders - 1 ; $i >= 0 ; $i-- ) {
my $placeholder = $placeholders[$i];
if ( $placeholder->[0] eq '@' ) {
substr( $result, $placeholder->[1], $placeholder->[2],
$number );
$is_text++;
}
else {
substr( $result, $placeholder->[1], $placeholder->[2], '' );
}
}
$result = $number unless $is_text;
} # End of placeholder substitutions.
# Trim the leading and trailing whitespace from the results.
$result =~ s/^\s+//;
$result =~ s/\s+$//;
# Fix for negative currency.
$result =~ s/^\$\-/\-\$/;
$result =~ s/^\$ \-/\-\$ /;
# Return color and locale strings if required.
if ($want_subformats) {
return ( $result, $color, $locale );
}
else {
return $result;
}
}
#------------------------------------------------------------------------------
# AddComma (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
sub AddComma {
my ($sNum) = @_;
if ( $sNum =~ /^([^\d]*)(\d\d\d\d+)(\.*.*)$/ ) {
my ( $sPre, $sObj, $sAft ) = ( $1, $2, $3 );
for ( my $i = length($sObj) - 3 ; $i > 0 ; $i -= 3 ) {
substr( $sObj, $i, 0, ',' );
}
return $sPre . $sObj . $sAft;
}
else {
return $sNum;
}
}
#------------------------------------------------------------------------------
# MakeFraction (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
sub MakeFraction {
my ( $sFmt, $iData, $iFlg ) = @_;
my $iBunbo;
my $iShou;
#1. Init
# print "FLG: $iFlg\n";
if ($iFlg) {
$iShou = $iData - int($iData);
return '' if ( $iShou == 0 );
}
else {
$iShou = $iData;
}
$iShou = abs($iShou);
my $sSWk;
#2.Calc BUNBO
#2.1 BUNBO defined
if ( $sFmt =~ /\/(\d+)$/ ) {
$iBunbo = $1;
return sprintf( "%d/%d", $iShou * $iBunbo, $iBunbo );
}
else {
#2.2 Calc BUNBO
$sFmt =~ /\/(\?+)$/;
my $iKeta = length($1);
my $iSWk = 1;
my $sSWk = '';
my $iBunsi;
for ( my $iBunbo = 2 ; $iBunbo < 10**$iKeta ; $iBunbo++ ) {
$iBunsi = int( $iShou * $iBunbo + 0.5 );
my $iCmp = abs( $iShou - ( $iBunsi / $iBunbo ) );
if ( $iCmp < $iSWk ) {
$iSWk = $iCmp;
$sSWk = sprintf( "%d/%d", $iBunsi, $iBunbo );
last if ( $iSWk == 0 );
}
}
return $sSWk;
}
}
#------------------------------------------------------------------------------
# MakeE (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
sub MakeE {
my ( $sFmt, $iData ) = @_;
$sFmt =~ /(([#0]*)[\.]?[#0]*)([eE])([\+\-][0#]+)/;
my ( $sKari, $iKeta, $sE, $sSisu ) = ( $1, length($2), $3, $4 );
$iKeta = 1 if ( $iKeta <= 0 );
my $iLog10 = 0;
$iLog10 = ( $iData == 0 ) ? 0 : ( log( abs($iData) ) / log(10) );
$iLog10 = (
int( $iLog10 / $iKeta ) +
( ( ( $iLog10 - int( $iLog10 / $iKeta ) ) < 0 ) ? -1 : 0 ) ) * $iKeta;
my $sUe = ExcelFmt( $sKari, $iData * ( 10**( $iLog10 * -1 ) ), 0 );
my $sShita = ExcelFmt( $sSisu, $iLog10, 0 );
return $sUe . $sE . $sShita;
}
#------------------------------------------------------------------------------
# LeapYear (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
sub LeapYear {
my ($iYear) = @_;
return 1 if ( $iYear == 1900 ); #Special for Excel
return ( ( ( $iYear % 4 ) == 0 )
&& ( ( $iYear % 100 ) || ( $iYear % 400 ) == 0 ) )
? 1
: 0;
}
#------------------------------------------------------------------------------
# LocaltimeExcel (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
sub LocaltimeExcel {
my ( $iSec, $iMin, $iHour, $iDay, $iMon, $iYear, $iwDay, $iMSec, $flg1904 )
= @_;
#0. Init
$iMon++;
$iYear += 1900;
#1. Calc Time
my $iTime;
$iTime = $iHour;
$iTime *= 60;
$iTime += $iMin;
$iTime *= 60;
$iTime += $iSec;
$iTime += $iMSec / 1000.0 if ( defined($iMSec) );
$iTime /= 86400.0; #3600*24(1day in seconds)
my $iY;
my $iYDays;
#2. Calc Days
if ($flg1904) {
$iY = 1904;
$iTime--; #Start from Jan 1st
$iYDays = 366;
}
else {
$iY = 1900;
$iYDays = 366; #In Excel 1900 is leap year (That's not TRUE!)
}
while ( $iY < $iYear ) {
$iTime += $iYDays;
$iY++;
$iYDays = ( LeapYear($iY) ) ? 366 : 365;
}
for ( my $iM = 1 ; $iM < $iMon ; $iM++ ) {
if ( $iM == 1
|| $iM == 3
|| $iM == 5
|| $iM == 7
|| $iM == 8
|| $iM == 10
|| $iM == 12 )
{
$iTime += 31;
}
elsif ( $iM == 4 || $iM == 6 || $iM == 9 || $iM == 11 ) {
$iTime += 30;
}
elsif ( $iM == 2 ) {
$iTime += ( LeapYear($iYear) ) ? 29 : 28;
}
}
$iTime += $iDay;
return $iTime;
}
my @month_days = qw(
0 31 28 31 30 31 30 31 31 30 31 30 31
);
#------------------------------------------------------------------------------
# ExcelLocaltime (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
sub ExcelLocaltime {
my ( $dObj, $flg1904 ) = @_;
my ( $iSec, $iMin, $iHour, $iDay, $iMon, $iYear, $iwDay, $iMSec );
my ( $iDt, $iTime, $iYDays, $iMD );
$iDt = int($dObj);
$iTime = $dObj - $iDt;
#1. Calc Days
if ($flg1904) {
$iYear = 1904;
$iDt++; #Start from Jan 1st
$iYDays = 366;
$iwDay = ( ( $iDt + 4 ) % 7 );
}
else {
$iYear = 1900;
$iYDays = 366; #In Excel 1900 is leap year (That's not TRUE!)
$iwDay = ( ( $iDt + 6 ) % 7 );
}
while ( $iDt > $iYDays ) {
$iDt -= $iYDays;
$iYear++;
$iYDays =
( ( ( $iYear % 4 ) == 0 )
&& ( ( $iYear % 100 ) || ( $iYear % 400 ) == 0 ) ) ? 366 : 365;
}
$iYear -= 1900; # Localtime year is relative to 1900.
for ( $iMon = 1 ; $iMon <= 12 ; $iMon++ ) {
$iMD = $month_days[$iMon];
$iMD++ if $iMon == 2 and $iYear % 4 == 0;
last if ( $iDt <= $iMD );
$iDt -= $iMD;
}
#2. Calc Time
$iDay = $iDt;
$iTime += ( 0.0005 / 86400.0 );
if ($iTime >= 1.0)
{
$iTime -= int($iTime);
$iwDay = ($iwDay == 6) ? 0 : $iwDay + 1;
if ($iDay == $iMD)
{
if ($iMon == 12)
{
$iMon = 1;
$iYear++;
}
else
{
$iMon++;
}
$iDay = 1;
}
else
{
$iDay++;
}
}
# Localtime month is 0 based.
$iMon -= 1;
$iTime *= 24.0;
$iHour = int($iTime);
$iTime -= $iHour;
$iTime *= 60.0;
$iMin = int($iTime);
$iTime -= $iMin;
$iTime *= 60.0;
$iSec = int($iTime);
$iTime -= $iSec;
$iTime *= 1000.0;
$iMSec = int($iTime);
return ( $iSec, $iMin, $iHour, $iDay, $iMon, $iYear, $iwDay, $iMSec );
}
# -----------------------------------------------------------------------------
# col2int (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
# converts a excel row letter into an int for use in an array
sub col2int {
my $result = 0;
my $str = shift;
my $incr = 0;
for ( my $i = length($str) ; $i > 0 ; $i-- ) {
my $char = substr( $str, $i - 1 );
my $curr += ord( lc($char) ) - ord('a') + 1;
$curr *= $incr if ($incr);
$result += $curr;
$incr += 26;
}
# this is one out as we range 0..x-1 not 1..x
$result--;
return $result;
}
# -----------------------------------------------------------------------------
# int2col (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
### int2col
# convert a column number into column letters
# @note this is quite a brute force coarse method
# does not manage values over 701 (ZZ)
# @arg number, to convert
# @returns string, column name
#
sub int2col {
my $out = "";
my $val = shift;
do {
$out .= chr( ( $val % 26 ) + ord('A') );
$val = int( $val / 26 ) - 1;
} while ( $val >= 0 );
return scalar reverse $out;
}
# -----------------------------------------------------------------------------
# sheetRef (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
# -----------------------------------------------------------------------------
### sheetRef
# convert an excel letter-number address into a useful array address
# @note that also Excel uses X-Y notation, we normally use Y-X in arrays
# @args $str, excel coord eg. A2
# @returns an array - 2 elements - column, row, or undefined
#
sub sheetRef {
my $str = shift;
my @ret;
$str =~ m/^(\D+)(\d+)$/;
if ( $1 && $2 ) {
push( @ret, $2 - 1, col2int($1) );
}
if ( $ret[0] < 0 ) {
undef @ret;
}
return @ret;
}
# -----------------------------------------------------------------------------
# xls2csv (for Spreadsheet::ParseExcel::Utility)
#------------------------------------------------------------------------------
### xls2csv
# convert a chunk of an excel file into csv text chunk
# @args $param, sheet-colrow:colrow (1-A1:B2 or A1:B2 for sheet 1
# @args $rotate, 0 or 1 decides if output should be rotated or not
# @returns string containing a chunk of csv
#
sub xls2csv {
my ( $filename, $regions, $rotate ) = @_;
my $sheet = 0;
# We need Text::CSV_XS for proper CSV handling.
require Text::CSV_XS;
# extract any sheet number from the region string
$regions =~ m/^(\d+)-(.*)/;
if ($2) {
$sheet = $1 - 1;
$regions = $2;
}
# now extract the start and end regions
$regions =~ m/(.*):(.*)/;
if ( !$1 || !$2 ) {
print STDERR "Bad Params";
return "";
}
my @start = sheetRef($1);
my @end = sheetRef($2);
if ( !@start ) {
print STDERR "Bad coorinates - $1";
return "";
}
if ( !@end ) {
print STDERR "Bad coorinates - $2";
return "";
}
if ( $start[1] > $end[1] ) {
print STDERR "Bad COLUMN ordering\n";
print STDERR "Start column " . int2col( $start[1] );
print STDERR " after end column " . int2col( $end[1] ) . "\n";
return "";
}
if ( $start[0] > $end[0] ) {
print STDERR "Bad ROW ordering\n";
print STDERR "Start row " . ( $start[0] + 1 );
print STDERR " after end row " . ( $end[0] + 1 ) . "\n";
exit;
}
# start the excel object now
my $oExcel = new Spreadsheet::ParseExcel;
my $oBook = $oExcel->Parse($filename);
# open the sheet
my $oWkS = $oBook->{Worksheet}[$sheet];
# now check that the region exists in the file
# if not truncate to the possible region
# output a warning msg
if ( $start[1] < $oWkS->{MinCol} ) {
print STDERR int2col( $start[1] )
. " < min col "
. int2col( $oWkS->{MinCol} )
. " Resetting\n";
$start[1] = $oWkS->{MinCol};
}
if ( $end[1] > $oWkS->{MaxCol} ) {
print STDERR int2col( $end[1] )
. " > max col "
. int2col( $oWkS->{MaxCol} )
. " Resetting\n";
$end[1] = $oWkS->{MaxCol};
}
if ( $start[0] < $oWkS->{MinRow} ) {
print STDERR ""
. ( $start[0] + 1 )
. " < min row "
. ( $oWkS->{MinRow} + 1 )
. " Resetting\n";
$start[0] = $oWkS->{MinCol};
}
if ( $end[0] > $oWkS->{MaxRow} ) {
print STDERR ""
. ( $end[0] + 1 )
. " > max row "
. ( $oWkS->{MaxRow} + 1 )
. " Resetting\n";
$end[0] = $oWkS->{MaxRow};
}
my $x1 = $start[1];
my $y1 = $start[0];
my $x2 = $end[1];
my $y2 = $end[0];
my @cell_data;
my $row = 0;
if ( !$rotate ) {
for ( my $y = $y1 ; $y <= $y2 ; $y++ ) {
for ( my $x = $x1 ; $x <= $x2 ; $x++ ) {
my $cell = $oWkS->{Cells}[$y][$x];
my $value;
if ( defined $cell ) {
$value .= $cell->value();
}
else {
$value = '';
}
push @{ $cell_data[$row] }, $value;
}
$row++;
}
}
else {
for ( my $x = $x1 ; $x <= $x2 ; $x++ ) {
for ( my $y = $y1 ; $y <= $y2 ; $y++ ) {
my $cell = $oWkS->{Cells}[$y][$x];
my $value;
if ( defined $cell ) {
$value .= $cell->value();
}
else {
$value = '';
}
push @{ $cell_data[$row] }, $value;
}
$row++;
}
}
# Create the CSV output string.
my $csv = Text::CSV_XS->new( { binary => 1, eol => $/ } );
my $output = "";
for my $row (@cell_data) {
$csv->combine(@$row);
$output .= $csv->string();
}
return $output;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Utility - Utility functions for Spreadsheet::ParseExcel.
=head1 SYNOPSIS
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt ExcelLocaltime LocaltimeExcel);
# Convert localtime to Excel time
my $datetime = LocaltimeExcel(11, 10, 12, 23, 2, 64); # 1964-3-23 12:10:11
print $datetime, "\n"; # 23459.5070717593 (Excel date/time format)
# Convert Excel Time to localtime
my @time = ExcelLocaltime($datetime);
print join(":", @time), "\n"; # 11:10:12:23:2:64:1:0
# Formatting
print ExcelFmt('yyyy-mm-dd', $datetime), "\n"; # 1964-3-23
print ExcelFmt('m-d-yy', $datetime), "\n"; # 3-23-64
print ExcelFmt('#,##0', $datetime), "\n"; # 23,460
print ExcelFmt('#,##0.00', $datetime), "\n"; # 23,459.51
=head1 DESCRIPTION
The C<Spreadsheet::ParseExcel::Utility> module provides utility functions for working with ParseExcel and Excel data.
=head1 Functions
C<Spreadsheet::ParseExcel::Utility> can export the following functions:
ExcelFmt
ExcelLocaltime
LocaltimeExcel
col2int
int2col
sheetRef
xls2csv
These functions must be imported implicitly:
# Just one function.
use Spreadsheet::ParseExcel::Utility 'col2int';
# More than one.
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt ExcelLocaltime LocaltimeExcel);
=head2 ExcelFmt($format_string, $number, $is_1904)
Excel stores data such as dates and currency values as numbers. The way these numbers are displayed is controlled by the number format string for the cell. For example a cell with a number format of C<'$#,##0.00'> for currency and a value of 1234.567 would be displayed as follows:
'$#,##0.00' + 1234.567 = '$1,234.57'.
The C<ExcelFmt()> function tries to emulate this formatting so that the user can convert raw numbers returned by C<Spreadsheet::ParseExel> to a desired format. For example:
print ExcelFmt('$#,##0.00', 1234.567); # $1,234.57.
The syntax of the function is:
my $text = ExcelFmt($format_string, $number, $is_1904);
Where C<$format_string> is an Excel number format string, C<$number> is a real or integer number and C<is_1904> is an optional flag to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch.
C<ExcelFmt()> is also used internally to convert numbers returned by the C<Cell::unformatted()> method to the formatted value returned by the C<Cell::value()> method:
my $cell = $worksheet->get_cell( 0, 0 );
print $cell->unformatted(), "\n"; # 1234.567
print $cell->value(), "\n"; # $1,234.57
The most common usage for C<ExcelFmt> is to convert numbers to dates. Dates and times in Excel are represented by real numbers, for example "1 Jan 2001 12:30 PM" is represented by the number 36892.521. The integer part of the number stores the number of days since the epoch and the fractional part stores the percentage of the day. By applying an Excel number format the number is converted to the desired string representation:
print ExcelFmt('d mmm yyyy h:mm AM/PM', 36892.521); # 1 Jan 2001 12:30 PM
C<$is_1904> is an optional flag to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch. Excel for Windows generally uses 1900 and Excel for Mac OS uses 1904. The C<$is1904> flag isn't required very often by a casual user and can usually be ignored.
=head2 ExcelLocaltime($excel_datetime, $is_1904)
The C<ExcelLocaltime()> function converts from an Excel date/time number to a C<localtime()>-like array of values:
my @time = ExcelLocaltime($excel_datetime);
# 0 1 2 3 4 5 6 7
my ( $sec, $min, $hour, $day, $month, $year, $wday, $msec ) = @time;
The array elements from C<(0 .. 6)> are the same as Perl's C<localtime()>. The last element C<$msec> is milliseconds. In particular it should be noted that, in common with C<localtime()>, the month is zero indexed and the year is the number of years since 1900. This means that you will usually need to do the following:
$month++;
$year += 1900;
See also Perl's documentation for L<localtime()|perlfunc>:
The C<$is_1904> flag is an optional. It is used to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch.
=head2 LocaltimeExcel($sec, $min, $hour, $day, $month, $year, $wday, $msec, $is_1904)
The C<LocaltimeExcel()> function converts from a C<localtime()>-like array of values to an Excel date/time number:
$excel_datetime = LocaltimeExcel($sec, $min, $hour, $day, $month, $year, $wday, $msec);
The array elements from C<(0 .. 6)> are the same as Perl's C<localtime()>. The last element C<$msec> is milliseconds. In particular it should be noted that, in common with C<localtime()>, the month is zero indexed and the year is the number of years since 1900. See also Perl's documentation for L<localtime()|perlfunc>:
The C<$wday> and C<$msec> elements are usually optional. This time elements can also be zeroed if they aren't of interest:
# sec, min, hour, day, month, year
$excel_datetime = LocaltimeExcel( 0, 0, 0, 1, 0, 101 );
print ExcelFmt('d mmm yyyy', $excel_datetime); # 1 Jan 2001
The C<$is_1904> flag is also optional. It is used to indicate that dates should use Excel's 1904 epoch instead of the default 1900 epoch.
=head2 col2int($column)
The C<col2int()> function converts an Excel column letter to an zero-indexed column number:
print col2int('A'); # 0
print col2int('AA'); # 26
This function was contributed by Kevin Mulholland.
=head2 int2col($column_number)
The C<int2col()> function converts an zero-indexed Excel column number to a column letter:
print int2col(0); # 'A'
print int2col(26); # 'AA'
This function was contributed by Kevin Mulholland.
=head2 sheetRef($cell_string)
The C<sheetRef()> function converts an Excel cell reference in 'A1' notation to a zero-indexed C<(row, col)> pair.
my ($row, $col) = sheetRef('A1'); # ( 0, 0 )
my ($row, $col) = sheetRef('C2'); # ( 1, 2 )
This function was contributed by Kevin Mulholland.
=head2 xls2csv($filename, $region, $rotate)
The C<xls2csv()> function converts a section of an Excel file into a CSV text string.
$csv_text = xls2csv($filename, $region, $rotate);
Where:
$region = "sheet-colrow:colrow"
For example '1-A1:B2' means 'A1:B2' for sheet 1.
and
$rotate = 0 or 1 (output is rotated/transposed or not)
This function requires C<Text::CSV_XS> to be installed. It was contributed by Kevin Mulholland along with the C<xls2csv> script in the C<sample> directory of the distro.
See also the following xls2csv utilities: Ken Prows' C<xls2csv>: http://search.cpan.org/~ken/xls2csv/script/xls2csv and H.Merijn Brand's C<xls2csv> (which is part of Spreadsheet::Read): http://search.cpan.org/~hmbrand/Spreadsheet-Read/
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_UTILITY
$fatpacked{"Spreadsheet/ParseExcel/Workbook.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_WORKBOOK';
package Spreadsheet::ParseExcel::Workbook;
###############################################################################
#
# Spreadsheet::ParseExcel::Workbook - A class for Workbooks.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my ($class) = @_;
my $self = {};
bless $self, $class;
}
###############################################################################
sub color_idx_to_rgb {
my( $workbook, $iidx ) = @_;
my $palette = $workbook->{aColor};
return ( ( defined $palette->[$iidx] ) ? $palette->[$iidx] : $palette->[0] );
}
###############################################################################
#
# worksheet()
#
# This method returns a single Worksheet object using either its name or index.
#
sub worksheet {
my ( $oBook, $sName ) = @_;
my $oWkS;
foreach $oWkS ( @{ $oBook->{Worksheet} } ) {
return $oWkS if ( $oWkS->{Name} eq $sName );
}
if ( $sName =~ /^\d+$/ ) {
return $oBook->{Worksheet}->[$sName];
}
return undef;
}
###############################################################################
#
# worksheets()
#
# Returns an array of Worksheet objects.
#
sub worksheets {
my $self = shift;
return @{ $self->{Worksheet} };
}
###############################################################################
#
# worksheet_count()
#
# Returns the number Woksheet objects in the Workbook.
#
sub worksheet_count {
my $self = shift;
return $self->{SheetCount};
}
###############################################################################
#
# get_filename()
#
# Returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file.
#
sub get_filename {
my $self = shift;
return $self->{File};
}
###############################################################################
#
# get_print_areas()
#
# Returns an array ref of print areas.
#
# TODO. This should really be a Worksheet method.
#
sub get_print_areas {
my $self = shift;
return $self->{PrintArea};
}
###############################################################################
#
# get_print_titles()
#
# Returns an array ref of print title hash refs.
#
# TODO. This should really be a Worksheet method.
#
sub get_print_titles {
my $self = shift;
return $self->{PrintTitle};
}
###############################################################################
#
# using_1904_date()
#
# Returns true if the Excel file is using the 1904 date epoch.
#
sub using_1904_date {
my $self = shift;
return $self->{Flg1904};
}
###############################################################################
#
# ParseAbort()
#
# Todo
#
sub ParseAbort {
my ( $self, $val ) = @_;
$self->{_ParseAbort} = $val;
}
=head2 get_active_sheet()
Return the number of the active (open) worksheet (at the time the workbook
was saved. May return undef.
=cut
sub get_active_sheet {
my $workbook = shift;
return $workbook->{ActiveSheet};
}
###############################################################################
#
# Parse(). Deprecated.
#
# Syntactic wrapper around Spreadsheet::ParseExcel::Parse().
# This method is *deprecated* since it doesn't conform to the current
# error handling in the S::PE Parse() method.
#
sub Parse {
my ( $class, $source, $formatter ) = @_;
my $excel = Spreadsheet::ParseExcel->new();
my $workbook = $excel->Parse( $source, $formatter );
$workbook->{_Excel} = $excel;
return $workbook;
}
###############################################################################
#
# Mapping between legacy method names and new names.
#
{
no warnings; # Ignore warnings about variables used only once.
*Worksheet = *worksheet;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Workbook - A class for Workbooks.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for L<Spreadsheet::ParseExcel>.
=head1 Methods
The following Workbook methods are available:
$workbook->worksheets()
$workbook->worksheet()
$workbook->worksheet_count()
$workbook->get_filename()
$workbook->get_print_areas()
$workbook->get_print_titles()
$workbook->using_1904_date()
=head2 worksheets()
The C<worksheets()> method returns an array of Worksheet objects. This was most commonly used to iterate over the worksheets in a workbook:
for my $worksheet ( $workbook->worksheets() ) {
...
}
=head2 worksheet()
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
Returns C<undef> if the sheet name or index doesn't exist.
=head2 worksheet_count()
The C<worksheet_count()> method returns the number of Woksheet objects in the Workbook.
my $worksheet_count = $workbook->worksheet_count();
=head2 get_filename()
The C<get_filename()> method returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file.
my $filename = $workbook->get_filename();
=head2 get_print_areas()
The C<get_print_areas()> method returns an array ref of print areas.
my $print_areas = $workbook->get_print_areas();
Each print area is as follows:
[ $start_row, $start_col, $end_row, $end_col ]
Returns undef if there are no print areas.
=head2 get_print_titles()
The C<get_print_titles()> method returns an array ref of print title hash refs.
my $print_titles = $workbook->get_print_titles();
Each print title array ref is as follows:
{
Row => [ $start_row, $end_row ],
Column => [ $start_col, $end_col ],
}
Returns undef if there are no print titles.
=head2 using_1904_date()
The C<using_1904_date()> method returns true if the Excel file is using the 1904 date epoch instead of the 1900 epoch.
my $using_1904_date = $workbook->using_1904_date();
The Windows version of Excel generally uses the 1900 epoch while the Mac version of Excel generally uses the 1904 epoch.
Returns 0 if the 1900 epoch is in use.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_WORKBOOK
$fatpacked{"Spreadsheet/ParseExcel/Worksheet.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_PARSEEXCEL_WORKSHEET';
package Spreadsheet::ParseExcel::Worksheet;
###############################################################################
#
# Spreadsheet::ParseExcel::Worksheet - A class for Worksheets.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Scalar::Util qw(weaken);
our $VERSION = '0.65';
###############################################################################
#
# new()
#
sub new {
my ( $class, %properties ) = @_;
my $self = \%properties;
weaken $self->{_Book};
$self->{Cells} = undef;
$self->{DefColWidth} = 8.43;
return bless $self, $class;
}
###############################################################################
#
# get_cell( $row, $col )
#
# Returns the Cell object at row $row and column $col, if defined.
#
sub get_cell {
my ( $self, $row, $col ) = @_;
if ( !defined $row
|| !defined $col
|| !defined $self->{MaxRow}
|| !defined $self->{MaxCol} )
{
# Return undef if no arguments are given or if no cells are defined.
return undef;
}
elsif ($row < $self->{MinRow}
|| $row > $self->{MaxRow}
|| $col < $self->{MinCol}
|| $col > $self->{MaxCol} )
{
# Return undef if outside allowable row/col range.
return undef;
}
else {
# Return the Cell object.
return $self->{Cells}->[$row]->[$col];
}
}
###############################################################################
#
# row_range()
#
# Returns a two-element list ($min, $max) containing the minimum and maximum
# defined rows in the worksheet.
#
# If there is no row defined $max is smaller than $min.
#
sub row_range {
my $self = shift;
my $min = $self->{MinRow} || 0;
my $max = defined( $self->{MaxRow} ) ? $self->{MaxRow} : ( $min - 1 );
return ( $min, $max );
}
###############################################################################
#
# col_range()
#
# Returns a two-element list ($min, $max) containing the minimum and maximum
# defined cols in the worksheet.
#
# If there is no column defined $max is smaller than $min.
#
sub col_range {
my $self = shift;
my $min = $self->{MinCol} || 0;
my $max = defined( $self->{MaxCol} ) ? $self->{MaxCol} : ( $min - 1 );
return ( $min, $max );
}
###############################################################################
#
# get_name()
#
# Returns the name of the worksheet.
#
sub get_name {
my $self = shift;
return $self->{Name};
}
###############################################################################
#
# sheet_num()
#
sub sheet_num {
my $self = shift;
return $self->{_SheetNo};
}
###############################################################################
#
# get_h_pagebreaks()
#
# Returns an array ref of row numbers where a horizontal page break occurs.
#
sub get_h_pagebreaks {
my $self = shift;
return $self->{HPageBreak};
}
###############################################################################
#
# get_v_pagebreaks()
#
# Returns an array ref of column numbers where a vertical page break occurs.
#
sub get_v_pagebreaks {
my $self = shift;
return $self->{VPageBreak};
}
###############################################################################
#
# get_merged_areas()
#
# Returns an array ref of cells that are merged.
#
sub get_merged_areas {
my $self = shift;
return $self->{MergedArea};
}
###############################################################################
#
# get_row_heights()
#
# Returns an array of row heights.
#
sub get_row_heights {
my $self = shift;
if ( wantarray() ) {
return unless $self->{RowHeight};
return @{ $self->{RowHeight} };
}
return $self->{RowHeight};
}
###############################################################################
#
# get_col_widths()
#
# Returns an array of column widths.
#
sub get_col_widths {
my $self = shift;
if ( wantarray() ) {
return unless $self->{ColWidth};
return @{ $self->{ColWidth} };
}
return $self->{ColWidth};
}
###############################################################################
#
# get_default_row_height()
#
# Returns the default row height for the worksheet. Generally 12.75.
#
sub get_default_row_height {
my $self = shift;
return $self->{DefRowHeight};
}
###############################################################################
#
# get_default_col_width()
#
# Returns the default column width for the worksheet. Generally 8.43.
#
sub get_default_col_width {
my $self = shift;
return $self->{DefColWidth};
}
###############################################################################
#
# _get_row_properties()
#
# Returns an array_ref of row properties.
# TODO. This is a placeholder for a future method.
#
sub _get_row_properties {
my $self = shift;
return $self->{RowProperties};
}
###############################################################################
#
# _get_col_properties()
#
# Returns an array_ref of column properties.
# TODO. This is a placeholder for a future method.
#
sub _get_col_properties {
my $self = shift;
return $self->{ColProperties};
}
###############################################################################
#
# get_header()
#
# Returns the worksheet header string.
#
sub get_header {
my $self = shift;
return $self->{Header};
}
###############################################################################
#
# get_footer()
#
# Returns the worksheet footer string.
#
sub get_footer {
my $self = shift;
return $self->{Footer};
}
###############################################################################
#
# get_margin_left()
#
# Returns the left margin of the worksheet in inches.
#
sub get_margin_left {
my $self = shift;
return $self->{LeftMargin};
}
###############################################################################
#
# get_margin_right()
#
# Returns the right margin of the worksheet in inches.
#
sub get_margin_right {
my $self = shift;
return $self->{RightMargin};
}
###############################################################################
#
# get_margin_top()
#
# Returns the top margin of the worksheet in inches.
#
sub get_margin_top {
my $self = shift;
return $self->{TopMargin};
}
###############################################################################
#
# get_margin_bottom()
#
# Returns the bottom margin of the worksheet in inches.
#
sub get_margin_bottom {
my $self = shift;
return $self->{BottomMargin};
}
###############################################################################
#
# get_margin_header()
#
# Returns the header margin of the worksheet in inches.
#
sub get_margin_header {
my $self = shift;
return $self->{HeaderMargin};
}
###############################################################################
#
# get_margin_footer()
#
# Returns the footer margin of the worksheet in inches.
#
sub get_margin_footer {
my $self = shift;
return $self->{FooterMargin};
}
###############################################################################
#
# get_paper()
#
# Returns the printer paper size.
#
sub get_paper {
my $self = shift;
return $self->{PaperSize};
}
###############################################################################
#
# get_start_page()
#
# Returns the page number that printing will start from.
#
sub get_start_page {
my $self = shift;
# Only return the page number if the "First page number" option is set.
if ( $self->{UsePage} ) {
return $self->{PageStart};
}
else {
return 0;
}
}
###############################################################################
#
# get_print_order()
#
# Returns the Worksheet page printing order.
#
sub get_print_order {
my $self = shift;
return $self->{LeftToRight};
}
###############################################################################
#
# get_print_scale()
#
# Returns the workbook scale for printing.
#
sub get_print_scale {
my $self = shift;
return $self->{Scale};
}
###############################################################################
#
# get_fit_to_pages()
#
# Returns the number of pages wide and high that the printed worksheet page
# will fit to.
#
sub get_fit_to_pages {
my $self = shift;
if ( !$self->{PageFit} ) {
return ( 0, 0 );
}
else {
return ( $self->{FitWidth}, $self->{FitHeight} );
}
}
###############################################################################
#
# is_portrait()
#
# Returns true if the worksheet has been set for printing in portrait mode.
#
sub is_portrait {
my $self = shift;
return $self->{Landscape};
}
###############################################################################
#
# is_centered_horizontally()
#
# Returns true if the worksheet has been centered horizontally for printing.
#
sub is_centered_horizontally {
my $self = shift;
return $self->{HCenter};
}
###############################################################################
#
# is_centered_vertically()
#
# Returns true if the worksheet has been centered vertically for printing.
#
sub is_centered_vertically {
my $self = shift;
return $self->{HCenter};
}
###############################################################################
#
# is_print_gridlines()
#
# Returns true if the worksheet print "gridlines" option is turned on.
#
sub is_print_gridlines {
my $self = shift;
return $self->{PrintGrid};
}
###############################################################################
#
# is_print_row_col_headers()
#
# Returns true if the worksheet print "row and column headings" option is on.
#
sub is_print_row_col_headers {
my $self = shift;
return $self->{PrintHeaders};
}
###############################################################################
#
# is_print_black_and_white()
#
# Returns true if the worksheet print "black and white" option is turned on.
#
sub is_print_black_and_white {
my $self = shift;
return $self->{NoColor};
}
###############################################################################
#
# is_print_draft()
#
# Returns true if the worksheet print "draft" option is turned on.
#
sub is_print_draft {
my $self = shift;
return $self->{Draft};
}
###############################################################################
#
# is_print_comments()
#
# Returns true if the worksheet print "comments" option is turned on.
#
sub is_print_comments {
my $self = shift;
return $self->{Notes};
}
=head2 get_tab_color()
Return color index of tab, or undef if not set.
=cut
sub get_tab_color {
my $worksheet = shift;
return $worksheet->{TabColor};
}
=head2 is_sheet_hidden()
Return true if sheet is hidden
=cut
sub is_sheet_hidden {
my $worksheet = shift;
return $worksheet->{SheetHidden};
}
=head2 is_row_hidden($row)
In scalar context, return true if $row is hidden
In array context, return an array whose elements are true
if the corresponding row is hidden.
=cut
sub is_row_hidden {
my $worksheet = shift;
my ($row) = @_;
unless ( $worksheet->{RowHidden} ) {
return () if (wantarray);
return 0;
}
return @{ $worksheet->{RowHidden} } if (wantarray);
return $worksheet->{RowHidden}[$row];
}
=head2 is_col_hidden($col)
In scalar context, return true if $col is hidden
In array context, return an array whose elements are true
if the corresponding column is hidden.
=cut
sub is_col_hidden {
my $worksheet = shift;
my ($col) = @_;
unless ( $worksheet->{ColHidden} ) {
return () if (wantarray);
return 0;
}
return @{ $worksheet->{ColHidden} } if (wantarray);
return $worksheet->{ColHidden}[$col];
}
###############################################################################
#
# Mapping between legacy method names and new names.
#
{
no warnings; # Ignore warnings about variables used only once.
*sheetNo = *sheet_num;
*Cell = *get_cell;
*RowRange = *row_range;
*ColRange = *col_range;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Worksheet - A class for Worksheets.
=head1 SYNOPSIS
See the documentation for L<Spreadsheet::ParseExcel>.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 Methods
The C<Spreadsheet::ParseExcel::Worksheet> class encapsulates the properties of an Excel worksheet. It has the following methods:
$worksheet->get_cell()
$worksheet->row_range()
$worksheet->col_range()
$worksheet->get_name()
$worksheet->get_h_pagebreaks()
$worksheet->get_v_pagebreaks()
$worksheet->get_merged_areas()
$worksheet->get_row_heights()
$worksheet->get_col_widths()
$worksheet->get_default_row_height()
$worksheet->get_default_col_width()
$worksheet->get_header()
$worksheet->get_footer()
$worksheet->get_margin_left()
$worksheet->get_margin_right()
$worksheet->get_margin_top()
$worksheet->get_margin_bottom()
$worksheet->get_margin_header()
$worksheet->get_margin_footer()
$worksheet->get_paper()
$worksheet->get_start_page()
$worksheet->get_print_order()
$worksheet->get_print_scale()
$worksheet->get_fit_to_pages()
$worksheet->is_portrait()
$worksheet->is_centered_horizontally()
$worksheet->is_centered_vertically()
$worksheet->is_print_gridlines()
$worksheet->is_print_row_col_headers()
$worksheet->is_print_black_and_white()
$worksheet->is_print_draft()
$worksheet->is_print_comments()
=head2 get_cell($row, $col)
Return the L</Cell> object at row C<$row> and column C<$col> if it is defined. Otherwise returns undef.
my $cell = $worksheet->get_cell($row, $col);
=head2 row_range()
Returns a two-element list C<($min, $max)> containing the minimum and maximum defined rows in the worksheet. If there is no row defined C<$max> is smaller than C<$min>.
my ( $row_min, $row_max ) = $worksheet->row_range();
=head2 col_range()
Returns a two-element list C<($min, $max)> containing the minimum and maximum of defined columns in the worksheet. If there is no column defined C<$max> is smaller than C<$min>.
my ( $col_min, $col_max ) = $worksheet->col_range();
=head2 get_name()
The C<get_name()> method returns the name of the worksheet.
my $name = $worksheet->get_name();
=head2 get_h_pagebreaks()
The C<get_h_pagebreaks()> method returns an array ref of row numbers where a horizontal page break occurs.
my $h_pagebreaks = $worksheet->get_h_pagebreaks();
Returns C<undef> if there are no pagebreaks.
=head2 get_v_pagebreaks()
The C<get_v_pagebreaks()> method returns an array ref of column numbers where a vertical page break occurs.
my $v_pagebreaks = $worksheet->get_v_pagebreaks();
Returns C<undef> if there are no pagebreaks.
=head2 get_merged_areas()
The C<get_merged_areas()> method returns an array ref of cells that are merged.
my $merged_areas = $worksheet->get_merged_areas();
Each merged area is represented as follows:
[ $start_row, $start_col, $end_row, $end_col]
Returns C<undef> if there are no merged areas.
=head2 get_row_heights()
The C<get_row_heights()> method returns an array_ref of row heights in scalar context,
and an array in list context.
my $row_heights = $worksheet->get_row_heights();
Returns C<undef> if the property isn't set.
=head2 get_col_widths()
The C<get_col_widths()> method returns an array_ref of column widths in scalar context,
and an array in list context.
my $col_widths = $worksheet->get_col_widths();
Returns C<undef> if the property isn't set.
=head2 get_default_row_height()
The C<get_default_row_height()> method returns the default row height for the worksheet. Generally 12.75.
my $default_row_height = $worksheet->get_default_row_height();
=head2 get_default_col_width()
The C<get_default_col_width()> method returns the default column width for the worksheet. Generally 8.43.
my $default_col_width = $worksheet->get_default_col_width();
=head2 get_header()
The C<get_header()> method returns the worksheet header string. This string can contain control codes for alignment and font properties. Refer to the Excel on-line help on headers and footers or to the Spreadsheet::WriteExcel documentation for set_header().
my $header = $worksheet->get_header();
Returns C<undef> if the property isn't set.
=head2 get_footer()
The C<get_footer()> method returns the worksheet footer string. This string can contain control codes for alignment and font properties. Refer to the Excel on-line help on headers and footers or to the Spreadsheet::WriteExcel documentation for set_header().
my $footer = $worksheet->get_footer();
Returns C<undef> if the property isn't set.
=head2 get_margin_left()
The C<get_margin_left()> method returns the left margin of the worksheet in inches.
my $margin_left = $worksheet->get_margin_left();
Returns C<undef> if the property isn't set.
=head2 get_margin_right()
The C<get_margin_right()> method returns the right margin of the worksheet in inches.
my $margin_right = $worksheet->get_margin_right();
Returns C<undef> if the property isn't set.
=head2 get_margin_top()
The C<get_margin_top()> method returns the top margin of the worksheet in inches.
my $margin_top = $worksheet->get_margin_top();
Returns C<undef> if the property isn't set.
=head2 get_margin_bottom()
The C<get_margin_bottom()> method returns the bottom margin of the worksheet in inches.
my $margin_bottom = $worksheet->get_margin_bottom();
Returns C<undef> if the property isn't set.
=head2 get_margin_header()
The C<get_margin_header()> method returns the header margin of the worksheet in inches.
my $margin_header = $worksheet->get_margin_header();
Returns a default value of 0.5 if not set.
=head2 get_margin_footer()
The C<get_margin_footer()> method returns the footer margin of the worksheet in inches.
my $margin_footer = $worksheet->get_margin_footer();
Returns a default value of 0.5 if not set.
=head2 get_paper()
The C<get_paper()> method returns the printer paper size.
my $paper = $worksheet->get_paper();
The value corresponds to the formats shown below:
Index Paper format Paper size
===== ============ ==========
0 Printer default -
1 Letter 8 1/2 x 11 in
2 Letter Small 8 1/2 x 11 in
3 Tabloid 11 x 17 in
4 Ledger 17 x 11 in
5 Legal 8 1/2 x 14 in
6 Statement 5 1/2 x 8 1/2 in
7 Executive 7 1/4 x 10 1/2 in
8 A3 297 x 420 mm
9 A4 210 x 297 mm
10 A4 Small 210 x 297 mm
11 A5 148 x 210 mm
12 B4 250 x 354 mm
13 B5 182 x 257 mm
14 Folio 8 1/2 x 13 in
15 Quarto 215 x 275 mm
16 - 10x14 in
17 - 11x17 in
18 Note 8 1/2 x 11 in
19 Envelope 9 3 7/8 x 8 7/8
20 Envelope 10 4 1/8 x 9 1/2
21 Envelope 11 4 1/2 x 10 3/8
22 Envelope 12 4 3/4 x 11
23 Envelope 14 5 x 11 1/2
24 C size sheet -
25 D size sheet -
26 E size sheet -
27 Envelope DL 110 x 220 mm
28 Envelope C3 324 x 458 mm
29 Envelope C4 229 x 324 mm
30 Envelope C5 162 x 229 mm
31 Envelope C6 114 x 162 mm
32 Envelope C65 114 x 229 mm
33 Envelope B4 250 x 353 mm
34 Envelope B5 176 x 250 mm
35 Envelope B6 176 x 125 mm
36 Envelope 110 x 230 mm
37 Monarch 3.875 x 7.5 in
38 Envelope 3 5/8 x 6 1/2 in
39 Fanfold 14 7/8 x 11 in
40 German Std Fanfold 8 1/2 x 12 in
41 German Legal Fanfold 8 1/2 x 13 in
256 User defined
The two most common paper sizes are C<1 = "US Letter"> and C<9 = A4>. Returns 9 by default.
=head2 get_start_page()
The C<get_start_page()> method returns the page number that printing will start from.
my $start_page = $worksheet->get_start_page();
Returns 0 if the property isn't set.
=head2 get_print_order()
The C<get_print_order()> method returns 0 if the worksheet print "page order" is "Down then over" (the default) or 1 if it is "Over then down".
my $print_order = $worksheet->get_print_order();
=head2 get_print_scale()
The C<get_print_scale()> method returns the workbook scale for printing. The print scale factor can be in the range 10 .. 400.
my $print_scale = $worksheet->get_print_scale();
Returns 100 by default.
=head2 get_fit_to_pages()
The C<get_fit_to_pages()> method returns the number of pages wide and high that the printed worksheet page will fit to.
my ($pages_wide, $pages_high) = $worksheet->get_fit_to_pages();
Returns (0, 0) if the property isn't set.
=head2 is_portrait()
The C<is_portrait()> method returns true if the worksheet has been set for printing in portrait mode.
my $is_portrait = $worksheet->is_portrait();
Returns 0 if the worksheet has been set for printing in horizontal mode.
=head2 is_centered_horizontally()
The C<is_centered_horizontally()> method returns true if the worksheet has been centered horizontally for printing.
my $is_centered_horizontally = $worksheet->is_centered_horizontally();
Returns 0 if the property isn't set.
=head2 is_centered_vertically()
The C<is_centered_vertically()> method returns true if the worksheet has been centered vertically for printing.
my $is_centered_vertically = $worksheet->is_centered_vertically();
Returns 0 if the property isn't set.
=head2 is_print_gridlines()
The C<is_print_gridlines()> method returns true if the worksheet print "gridlines" option is turned on.
my $is_print_gridlines = $worksheet->is_print_gridlines();
Returns 0 if the property isn't set.
=head2 is_print_row_col_headers()
The C<is_print_row_col_headers()> method returns true if the worksheet print "row and column headings" option is turned on.
my $is_print_row_col_headers = $worksheet->is_print_row_col_headers();
Returns 0 if the property isn't set.
=head2 is_print_black_and_white()
The C<is_print_black_and_white()> method returns true if the worksheet print "black and white" option is turned on.
my $is_print_black_and_white = $worksheet->is_print_black_and_white();
Returns 0 if the property isn't set.
=head2 is_print_draft()
The C<is_print_draft()> method returns true if the worksheet print "draft" option is turned on.
my $is_print_draft = $worksheet->is_print_draft();
Returns 0 if the property isn't set.
=head2 is_print_comments()
The C<is_print_comments()> method returns true if the worksheet print "comments" option is turned on.
my $is_print_comments = $worksheet->is_print_comments();
Returns 0 if the property isn't set.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_PARSEEXCEL_WORKSHEET
$fatpacked{"Spreadsheet/XLSX.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX';
package Spreadsheet::XLSX;
use 5.006000;
use strict;
use warnings;
use base 'Spreadsheet::ParseExcel::Workbook';
our $VERSION = '0.15';
use Archive::Zip;
use Spreadsheet::ParseExcel;
use Spreadsheet::XLSX::Fmt2007;
################################################################################
sub new {
my ($class, $filename, $converter) = @_;
my %shared_info; # shared_strings, styles, style_info, rels, converter
$shared_info{converter} = $converter;
my $self = bless Spreadsheet::ParseExcel::Workbook->new(), $class;
my $zip = __load_zip($filename);
$shared_info{shared_strings}= __load_shared_strings($zip, $shared_info{converter});
my ($styles, $style_info) = __load_styles($zip);
$shared_info{styles} = $styles;
$shared_info{style_info} = $style_info;
$shared_info{rels} = __load_rels($zip);
$self->_load_workbook($zip, \%shared_info);
return $self;
}
sub _load_workbook {
my ($self, $zip, $shared_info) = @_;
my $member_workbook = $zip->memberNamed('xl/workbook.xml') or die("xl/workbook.xml not found in this zip\n");
$self->{SheetCount} = 0;
$self->{FmtClass} = Spreadsheet::XLSX::Fmt2007->new;
$self->{Flg1904} = 0;
if ($member_workbook->contents =~ /date1904="1"/) {
$self->{Flg1904} = 1;
}
foreach ($member_workbook->contents =~ /\<(.*?)\/?\>/g) {
/^(\w+)\s+/;
my ($tag, $other) = ($1, $');
my @pairs = split /\" /, $other;
$tag eq 'sheet' or next;
my $sheet = {
MaxRow => 0,
MaxCol => 0,
MinRow => 1000000,
MinCol => 1000000,
};
foreach ($other =~ /(\S+=".*?")/gsm) {
my ($k, $v) = split /=?"/; #"
if ($k eq 'name') {
$sheet->{Name} = $v;
$sheet->{Name} = $shared_info->{converter}->convert($sheet->{Name}) if defined $shared_info->{converter};
} elsif ($k eq 'r:id') {
$sheet->{path} = $shared_info->{rels}->{$v};
}
}
my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet);
$self->{Worksheet}[$self->{SheetCount}] = $wsheet;
$self->{SheetCount} += 1;
}
foreach my $sheet (@{$self->{Worksheet}}) {
my $member_sheet = $zip->memberNamed("xl/$sheet->{path}") or next;
my ($row, $col);
my $parsing_v_tag = 0;
my $s = 0;
my $s2 = 0;
my $sty = 0;
foreach ($member_sheet->contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) {
if (/^\<c\s*.*?\s*r=\"([A-Z])([A-Z]?)(\d+)\"/) {
($row, $col) = __decode_cell_name($1, $2, $3);
$s = m/t=\"s\"/ ? 1 : 0;
$s2 = m/t=\"str\"/ ? 1 : 0;
$sty = m/s="([0-9]+)"/ ? $1 : 0;
} elsif (/^<v>/) {
$parsing_v_tag = 1;
} elsif (/^<\/v>/) {
$parsing_v_tag = 0;
} elsif (length($_) && $parsing_v_tag) {
my $v = $s ? $shared_info->{shared_strings}->[$_] : $_;
if ($v eq "</c>") {
$v = "";
}
my $type = "Text";
my $thisstyle = "";
if (not($s) && not($s2)) {
$type = "Numeric";
if (defined $sty && defined $shared_info->{styles}->[$sty]) {
$thisstyle = $shared_info->{style_info}->{$shared_info->{styles}->[$sty]};
if ($thisstyle =~ /\b(mmm|m|d|yy|h|hh|mm|ss)\b/) {
$type = "Date";
}
}
}
$sheet->{MaxRow} = $row if $sheet->{MaxRow} < $row;
$sheet->{MaxCol} = $col if $sheet->{MaxCol} < $col;
$sheet->{MinRow} = $row if $sheet->{MinRow} > $row;
$sheet->{MinCol} = $col if $sheet->{MinCol} > $col;
if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric") {
$v = $1 / (10**$2); # this handles scientific notation for very small numbers
}
my $cell = Spreadsheet::ParseExcel::Cell->new(
Val => $v,
Format => $thisstyle,
Type => $type
);
$cell->{_Value} = $self->{FmtClass}->ValFmt($cell, $self);
if ($type eq "Date") {
if ($v < 1) { #then this is Excel time field
$cell->{Type} = "Text";
}
$cell->{Val} = $cell->{_Value};
}
$sheet->{Cells}[$row][$col] = $cell;
}
}
$sheet->{MinRow} = 0 if $sheet->{MinRow} > $sheet->{MaxRow};
$sheet->{MinCol} = 0 if $sheet->{MinCol} > $sheet->{MaxCol};
}
return $self;
}
# Convert cell name in the format AA1 to a row and column number.
sub __decode_cell_name {
my ($letter1, $letter2, $digits) = @_;
my $col = ord($letter1) - 65;
if ($letter2) {
$col++;
$col *= 26;
$col += (ord($letter2) - 65);
}
my $row = $digits - 1;
return ($row, $col);
}
sub __load_shared_strings {
my ($zip, $converter) = @_;
my $member_shared_strings = $zip->memberNamed('xl/sharedStrings.xml');
my @shared_strings = ();
if ($member_shared_strings) {
my $mstr = $member_shared_strings->contents;
$mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/>
foreach my $si ($mstr =~ /<si.*?>(.*?)<\/si/gsm) {
my $str;
foreach my $t ($si =~ /<t.*?>(.*?)<\/t/gsm) {
$t = $converter->convert($t) if defined $converter;
$str .= $t;
}
push @shared_strings, $str;
}
}
return \@shared_strings;
}
sub __load_styles {
my ($zip) = @_;
my $member_styles = $zip->memberNamed('xl/styles.xml');
my @styles = ();
my %style_info = ();
if ($member_styles) {
my $formatter = Spreadsheet::XLSX::Fmt2007->new();
foreach my $t ($member_styles->contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { #"
push @styles, $t;
}
my $default = $1 || '';
foreach my $t1 (@styles) {
$member_styles->contents =~ /numFmtId="$t1" formatCode="([^"]*)/;
my $formatCode = $1 || '';
if ($formatCode eq $default || not($formatCode)) {
if ($t1 == 9 || $t1 == 10) {
$formatCode = '0.00000%';
} elsif ($t1 == 14) {
$formatCode = 'yyyy-mm-dd';
} else {
$formatCode = '';
}
# $formatCode = $formatter->FmtStringDef($t1);
}
$style_info{$t1} = $formatCode;
$default = $1 || '';
}
}
return (\@styles, \%style_info);
}
sub __load_rels {
my ($zip) = @_;
my $member_rels = $zip->memberNamed('xl/_rels/workbook.xml.rels') or die("xl/_rels/workbook.xml.rels not found in this zip\n");
my %rels = ();
foreach ($member_rels->contents =~ /\<Relationship (.*?)\/?\>/g) {
my ($id, $target);
($id) = /Id="(.*?)"/;
($target) = /Target="(.*?)"/;
if (defined $id and defined $target) {
$rels{$id} = $target;
}
}
return \%rels;
}
sub __load_zip {
my ($filename) = @_;
my $zip = Archive::Zip->new();
if (ref $filename) {
$zip->readFromFileHandle($filename) == Archive::Zip::AZ_OK or die("Cannot open data as Zip archive");
} else {
$zip->read($filename) == Archive::Zip::AZ_OK or die("Cannot open $filename as Zip archive");
}
return $zip;
}
1;
__END__
=head1 NAME
Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files;
=head1 SYNOPSIS
use Text::Iconv;
my $converter = Text::Iconv -> new ("utf-8", "windows-1251");
# Text::Iconv is not really required.
# This can be any object with the convert method. Or nothing.
use Spreadsheet::XLSX;
my $excel = Spreadsheet::XLSX -> new ('test.xlsx', $converter);
foreach my $sheet (@{$excel -> {Worksheet}}) {
printf("Sheet: %s\n", $sheet->{Name});
$sheet -> {MaxRow} ||= $sheet -> {MinRow};
foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) {
$sheet -> {MaxCol} ||= $sheet -> {MinCol};
foreach my $col ($sheet -> {MinCol} .. $sheet -> {MaxCol}) {
my $cell = $sheet -> {Cells} [$row] [$col];
if ($cell) {
printf("( %s , %s ) => %s\n", $row, $col, $cell -> {Val});
}
}
}
}
=head1 DESCRIPTION
This module is a (quick and dirty) emulation of Spreadsheet::ParseExcel for
Excel 2007 (.xlsx) file format. It supports styles and many of Excel's quirks,
but not all. It populates the classes from Spreadsheet::ParseExcel for interoperability;
including Workbook, Worksheet, and Cell.
=head1 SEE ALSO
=over 2
=item Text::CSV_XS, Text::CSV_PP
http://search.cpan.org/~hmbrand/
A pure perl version is available on http://search.cpan.org/~makamaka/
=item Spreadsheet::ParseExcel
http://search.cpan.org/~kwitknr/
=item Spreadsheet::ReadSXC
http://search.cpan.org/~terhechte/
=item Spreadsheet::BasicRead
http://search.cpan.org/~gng/ for xlscat likewise functionality (Excel only)
=item Spreadsheet::ConvertAA
http://search.cpan.org/~nkh/ for an alternative set of cell2cr () /
cr2cell () pair
=item Spreadsheet::Perl
http://search.cpan.org/~nkh/ offers a Pure Perl implementation of a
spreadsheet engine. Users that want this format to be supported in
Spreadsheet::Read are hereby motivated to offer patches. It's not high
on my todo-list.
=item xls2csv
http://search.cpan.org/~ken/ offers an alternative for my C<xlscat -c>,
in the xls2csv tool, but this tool focusses on character encoding
transparency, and requires some other modules.
=item Spreadsheet::Read
http://search.cpan.org/~hmbrand/ read the data from a spreadsheet (interface
module)
=back
=head1 AUTHOR
Dmitry Ovsyanko, E<lt>do@eludia.ru<gt>, http://eludia.ru/wiki/
Patches by:
Steve Simms
Joerg Meltzer
Loreyna Yeung
Rob Polocz
Gregor Herrmann
H.Merijn Brand
endacoe
Pat Mariani
Sergey Pushkin
=head1 ACKNOWLEDGEMENTS
Thanks to TrackVia Inc. (http://www.trackvia.com) for paying for Rob Polocz working time.
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2008 by Dmitry Ovsyanko
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.
=cut
SPREADSHEET_XLSX
$fatpacked{"Spreadsheet/XLSX/Fmt2007.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_FMT2007';
# This code is adapted for Excel 2007 from:
# Spreadsheet::XLSX::FmtDefault
# by Kawai, Takanori (Hippo2000) 2001.2.2
# This Program is ALPHA version.
#==============================================================================
package Spreadsheet::XLSX::Fmt2007;
use strict;
use warnings;
use Spreadsheet::XLSX::Utility2007 qw(ExcelFmt);
our $VERSION = '0.13'; #
my %hFmtDefault = (
0x00 => '@',
0x01 => '0',
0x02 => '0.00',
0x03 => '#,##0',
0x04 => '#,##0.00',
0x05 => '($#,##0_);($#,##0)',
0x06 => '($#,##0_);[RED]($#,##0)',
0x07 => '($#,##0.00_);($#,##0.00_)',
0x08 => '($#,##0.00_);[RED]($#,##0.00_)',
0x09 => '0%',
0x0A => '0.00%',
0x0B => '0.00E+00',
0x0C => '# ?/?',
0x0D => '# ??/??',
0x0E => 'yyyy-mm-dd',
0x0F => 'd-mmm-yy',
0x10 => 'd-mmm',
0x11 => 'mmm-yy',
0x12 => 'h:mm AM/PM',
0x13 => 'h:mm:ss AM/PM',
0x14 => 'h:mm',
0x15 => 'h:mm:ss',
0x16 => 'm-d-yy h:mm',
#0x17-0x24 -- Differs in Natinal
0x25 => '(#,##0_);(#,##0)',
0x26 => '(#,##0_);[RED](#,##0)',
0x27 => '(#,##0.00);(#,##0.00)',
0x28 => '(#,##0.00);[RED](#,##0.00)',
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)',
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)',
0x2D => 'mm:ss',
0x2E => '[h]:mm:ss',
0x2F => 'mm:ss.0',
0x30 => '##0.0E+0',
0x31 => '@',
);
#------------------------------------------------------------------------------
# new (for Spreadsheet::XLSX::FmtDefault)
#------------------------------------------------------------------------------
sub new {
my ($sPkg, %hKey) = @_;
my $oThis = {};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::XLSX::FmtDefault)
#------------------------------------------------------------------------------
sub TextFmt {
my ($oThis, $sTxt, $sCode) = @_;
return $sTxt if ((!defined($sCode)) || ($sCode eq '_native_'));
return pack('U*', unpack('n*', $sTxt));
}
#------------------------------------------------------------------------------
# FmtStringDef (for Spreadsheet::XLSX::FmtDefault)
#------------------------------------------------------------------------------
sub FmtStringDef {
my ($oThis, $iFmtIdx, $oBook, $rhFmt) = @_;
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx};
if (!(defined($sFmtStr)) && defined($rhFmt)) {
$sFmtStr = $rhFmt->{$iFmtIdx};
}
$sFmtStr = $hFmtDefault{$iFmtIdx} unless ($sFmtStr);
return $sFmtStr;
}
#------------------------------------------------------------------------------
# FmtString (for Spreadsheet::XLSX::FmtDefault)
#------------------------------------------------------------------------------
sub FmtString {
my ($oThis, $oCell, $oBook) = @_;
my $sFmtStr; # = $oThis->FmtStringDef(
# $oBook->{Format}[$oCell->{FormatNo}]->{FmtIdx}, $oBook);
unless (defined($sFmtStr)) {
if ($oCell->{Type} eq 'Numeric') {
if ($oCell->{Format}) {
$sFmtStr = $oCell->{Format};
} elsif (int($oCell->{Val}) != $oCell->{Val}) {
$sFmtStr = '0.00';
} else {
$sFmtStr = '0';
}
} elsif ($oCell->{Type} eq 'Date') {
if ($oCell->{Format}) {
$sFmtStr = $oCell->{Format};
} elsif (int($oCell->{Val}) <= 0) {
$sFmtStr = 'h:mm:ss';
} else {
$sFmtStr = 'm-d-yy';
}
} else {
$sFmtStr = '@';
}
}
return $sFmtStr;
}
#------------------------------------------------------------------------------
# ValFmt (for Spreadsheet::XLSX::FmtDefault)
#------------------------------------------------------------------------------
sub ValFmt {
my ($oThis, $oCell, $oBook) = @_;
my ($Dt, $iFmtIdx, $iNumeric, $Flg1904);
if ($oCell->{Type} eq 'Text') {
$Dt = ((defined $oCell->{Val}) && ($oCell->{Val} ne '')) ? $oThis->TextFmt($oCell->{Val}, $oCell->{Code}) : '';
} else {
$Dt = $oCell->{Val};
}
$Flg1904 = $oBook->{Flg1904};
my $sFmtStr = $oThis->FmtString($oCell, $oBook);
return ExcelFmt($sFmtStr, $Dt, $Flg1904, $oCell->{Type});
}
#------------------------------------------------------------------------------
# ChkType (for Spreadsheet::XLSX::FmtDefault)
#------------------------------------------------------------------------------
sub ChkType {
my ($oPkg, $iNumeric, $iFmtIdx) = @_;
if ($iNumeric) {
if ( (($iFmtIdx >= 0x0E) && ($iFmtIdx <= 0x16))
|| (($iFmtIdx >= 0x2D) && ($iFmtIdx <= 0x2F))) {
return "Date";
} else {
return "Numeric";
}
} else {
return "Text";
}
}
1;
__END__
=head1 SYNOPSIS
$cell = $myworkbook->worksheet->{Cells}[$row][$col]
my $type = $cell->{Type}; # Date, Text, or Numeric
my $disp_value = $cell->Value; # displayed (formatted) value set in XLSX by $myFmt2007->ValFmt($cell, $workbook)
my $fund_value = $cell->{Val}; # fundemental (underlying) value
my $formatter;
if ($myworkbook->excel07) {
$formatter=Spreadsheet::XLSX::Fmt2007->new();
} else {
$formatter=Spreadsheet::ParseExcel::FmtDefault->new();
}
my $format_string = $formatter->FmtString($cell,$self->workbook);
=cut
SPREADSHEET_XLSX_FMT2007
$fatpacked{"Spreadsheet/XLSX/ParseExcel.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL';
package Spreadsheet::ParseExcel;
##############################################################################
#
# Spreadsheet::ParseExcel - Extract information from an Excel file.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2008 Takanori Kawai
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use 5.008;
use OLE::Storage_Lite;
use File::Basename qw(fileparse);
use IO::File;
use Config;
use Crypt::RC4;
use Digest::Perl::MD5;
our $VERSION = '0.65';
use Spreadsheet::ParseExcel::Workbook;
use Spreadsheet::ParseExcel::Worksheet;
use Spreadsheet::ParseExcel::Font;
use Spreadsheet::ParseExcel::Format;
use Spreadsheet::ParseExcel::Cell;
use Spreadsheet::ParseExcel::FmtDefault;
my $currentbook;
my @aColor = (
'000000', # 0x00
'FFFFFF', 'FFFFFF', 'FFFFFF', 'FFFFFF',
'FFFFFF', 'FFFFFF', 'FFFFFF', '000000', # 0x08
'FFFFFF', 'FF0000', '00FF00', '0000FF',
'FFFF00', 'FF00FF', '00FFFF', '800000', # 0x10
'008000', '000080', '808000', '800080',
'008080', 'C0C0C0', '808080', '9999FF', # 0x18
'993366', 'FFFFCC', 'CCFFFF', '660066',
'FF8080', '0066CC', 'CCCCFF', '000080', # 0x20
'FF00FF', 'FFFF00', '00FFFF', '800080',
'800000', '008080', '0000FF', '00CCFF', # 0x28
'CCFFFF', 'CCFFCC', 'FFFF99', '99CCFF',
'FF99CC', 'CC99FF', 'FFCC99', '3366FF', # 0x30
'33CCCC', '99CC00', 'FFCC00', 'FF9900',
'FF6600', '666699', '969696', '003366', # 0x38
'339966', '003300', '333300', '993300',
'993366', '333399', '333333', '000000' # 0x40
);
use constant verExcel95 => 0x500;
use constant verExcel97 => 0x600;
use constant verBIFF2 => 0x00;
use constant verBIFF3 => 0x02;
use constant verBIFF4 => 0x04;
use constant verBIFF5 => 0x08;
use constant verBIFF8 => 0x18;
use constant MS_BIFF_CRYPTO_NONE => 0;
use constant MS_BIFF_CRYPTO_XOR => 1;
use constant MS_BIFF_CRYPTO_RC4 => 2;
use constant sizeof_BIFF_8_FILEPASS => ( 6 + 3 * 16 );
use constant REKEY_BLOCK => 0x400;
# Error code for some of the common parsing errors.
use constant ErrorNone => 0;
use constant ErrorNoFile => 1;
use constant ErrorNoExcelData => 2;
use constant ErrorFileEncrypted => 3;
# Color index for the 'auto' color
use constant AutoColor => 64;
our %error_strings = (
ErrorNone, '', # 0
ErrorNoFile, 'File not found', # 1
ErrorNoExcelData, 'No Excel data found in file', # 2
ErrorFileEncrypted, 'File is encrypted', # 3
);
our %ProcTbl = (
#Develpers' Kit P291
0x14 => \&_subHeader, # Header
0x15 => \&_subFooter, # Footer
0x18 => \&_subName, # NAME(?)
0x1A => \&_subVPageBreak, # Vertical Page Break
0x1B => \&_subHPageBreak, # Horizontal Page Break
0x22 => \&_subFlg1904, # 1904 Flag
0x26 => \&_subMargin, # Left Margin
0x27 => \&_subMargin, # Right Margin
0x28 => \&_subMargin, # Top Margin
0x29 => \&_subMargin, # Bottom Margin
0x2A => \&_subPrintHeaders, # Print Headers
0x2B => \&_subPrintGridlines, # Print Gridlines
0x3C => \&_subContinue, # Continue
0x3D => \&_subWindow1, # Window1
0x43 => \&_subXF, # XF for Excel < 4.
0x0443 => \&_subXF, # XF for Excel = 4.
0x862 => \&_subSheetLayout, # Sheet Layout
0x1B8 => \&_subHyperlink, # HYPERLINK
#Develpers' Kit P292
0x55 => \&_subDefColWidth, # Consider
0x5C => \&_subWriteAccess, # WRITEACCESS
0x7D => \&_subColInfo, # Colinfo
0x7E => \&_subRK, # RK
0x81 => \&_subWSBOOL, # WSBOOL
0x83 => \&_subHcenter, # HCENTER
0x84 => \&_subVcenter, # VCENTER
0x85 => \&_subBoundSheet, # BoundSheet
0x92 => \&_subPalette, # Palette, fgp
0x99 => \&_subStandardWidth, # Standard Col
#Develpers' Kit P293
0xA1 => \&_subSETUP, # SETUP
0xBD => \&_subMulRK, # MULRK
0xBE => \&_subMulBlank, # MULBLANK
0xD6 => \&_subRString, # RString
#Develpers' Kit P294
0xE0 => \&_subXF, # ExTended Format
0xE5 => \&_subMergeArea, # MergeArea (Not Documented)
0xFC => \&_subSST, # Shared String Table
0xFD => \&_subLabelSST, # Label SST
#Develpers' Kit P295
0x201 => \&_subBlank, # Blank
0x202 => \&_subInteger, # Integer(Not Documented)
0x203 => \&_subNumber, # Number
0x204 => \&_subLabel, # Label
0x205 => \&_subBoolErr, # BoolErr
0x207 => \&_subString, # STRING
0x208 => \&_subRow, # RowData
0x221 => \&_subArray, # Array (Consider)
0x225 => \&_subDefaultRowHeight, # Consider
0x31 => \&_subFont, # Font
0x231 => \&_subFont, # Font
0x27E => \&_subRK, # RK
0x41E => \&_subFormat, # Format
0x06 => \&_subFormula, # Formula
0x406 => \&_subFormula, # Formula
0x009 => \&_subBOF, # BOF(BIFF2)
0x209 => \&_subBOF, # BOF(BIFF3)
0x409 => \&_subBOF, # BOF(BIFF4)
0x809 => \&_subBOF, # BOF(BIFF5-8)
);
our $BIGENDIAN;
our $PREFUNC;
our $_use_perlio;
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->new
#------------------------------------------------------------------------------
sub new {
my ( $class, %hParam ) = @_;
if ( not defined $_use_perlio ) {
if ( exists $Config{useperlio}
&& defined $Config{useperlio}
&& $Config{useperlio} eq "define" )
{
$_use_perlio = 1;
}
else {
$_use_perlio = 0;
require IO::Scalar;
import IO::Scalar;
}
}
# Check ENDIAN(Little: Intel etc. BIG: Sparc etc)
$BIGENDIAN =
( defined $hParam{Endian} ) ? $hParam{Endian}
: ( unpack( "H08", pack( "L", 2 ) ) eq '02000000' ) ? 0
: 1;
my $self = {};
bless $self, $class;
$self->{GetContent} = \&_subGetContent;
if ( $hParam{EventHandlers} ) {
$self->SetEventHandlers( $hParam{EventHandlers} );
}
else {
$self->SetEventHandlers( \%ProcTbl );
}
if ( $hParam{AddHandlers} ) {
foreach my $sKey ( keys( %{ $hParam{AddHandlers} } ) ) {
$self->SetEventHandler( $sKey, $hParam{AddHandlers}->{$sKey} );
}
}
$self->{CellHandler} = $hParam{CellHandler};
$self->{NotSetCell} = $hParam{NotSetCell};
$self->{Object} = $hParam{Object};
if ( defined $hParam{Password} ) {
$self->{Password} = $hParam{Password};
}
else {
$self->{Password} = 'VelvetSweatshop';
}
$self->{_error_status} = ErrorNone;
return $self;
}
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->SetEventHandler
#------------------------------------------------------------------------------
sub SetEventHandler {
my ( $self, $key, $sub_ref ) = @_;
$self->{FuncTbl}->{$key} = $sub_ref;
}
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->SetEventHandlers
#------------------------------------------------------------------------------
sub SetEventHandlers {
my ( $self, $rhTbl ) = @_;
$self->{FuncTbl} = undef;
foreach my $sKey ( keys %$rhTbl ) {
$self->{FuncTbl}->{$sKey} = $rhTbl->{$sKey};
}
}
#------------------------------------------------------------------------------
# Decryption routines
# based on sources of gnumeric (ms-biff.c ms-excel-read.c)
#------------------------------------------------------------------------------
sub md5state {
my ( $md5 ) = @_;
my $s = '';
for ( my $i = 0 ; $i < 4 ; $i++ ) {
my $v = $md5->{_state}[$i];
$s .= chr( $v & 0xff );
$s .= chr( ( $v >> 8 ) & 0xff );
$s .= chr( ( $v >> 16 ) & 0xff );
$s .= chr( ( $v >> 24 ) & 0xff );
}
return $s;
}
sub MakeKey {
my ( $block, $key, $valContext ) = @_;
my $pwarray = "\0" x 64;
substr( $pwarray, 0, 5 ) = substr( $valContext, 0, 5 );
substr( $pwarray, 5, 1 ) = chr( $block & 0xff );
substr( $pwarray, 6, 1 ) = chr( ( $block >> 8 ) & 0xff );
substr( $pwarray, 7, 1 ) = chr( ( $block >> 16 ) & 0xff );
substr( $pwarray, 8, 1 ) = chr( ( $block >> 24 ) & 0xff );
substr( $pwarray, 9, 1 ) = "\x80";
substr( $pwarray, 56, 1 ) = "\x48";
my $md5 = Digest::Perl::MD5->new();
$md5->add( $pwarray );
my $s = md5state( $md5 );
${$key} = Crypt::RC4->new( $s );
}
sub VerifyPassword {
my ( $password, $docid, $salt_data, $hashedsalt_data, $valContext ) = @_;
my $pwarray = "\0" x 64;
my $i;
my $md5 = Digest::Perl::MD5->new();
for ( $i = 0 ; $i < length( $password ) ; $i++ ) {
my $o = ord( substr( $password, $i, 1 ) );
substr( $pwarray, 2 * $i, 1 ) = chr( $o & 0xff );
substr( $pwarray, 2 * $i + 1, 1 ) = chr( ( $o >> 8 ) & 0xff );
}
substr( $pwarray, 2 * $i, 1 ) = chr( 0x80 );
substr( $pwarray, 56, 1 ) = chr( ( $i << 4 ) & 0xff );
$md5->add( $pwarray );
my $mdContext1 = md5state( $md5 );
my $offset = 0;
my $keyoffset = 0;
my $tocopy = 5;
$md5->reset;
while ( $offset != 16 ) {
if ( ( 64 - $offset ) < 5 ) {
$tocopy = 64 - $offset;
}
substr( $pwarray, $offset, $tocopy ) =
substr( $mdContext1, $keyoffset, $tocopy );
$offset += $tocopy;
if ( $offset == 64 ) {
$md5->add( $pwarray );
$keyoffset = $tocopy;
$tocopy = 5 - $tocopy;
$offset = 0;
next;
}
$keyoffset = 0;
$tocopy = 5;
substr( $pwarray, $offset, 16 ) = $docid;
$offset += 16;
}
substr( $pwarray, 16, 1 ) = "\x80";
substr( $pwarray, 17, 47 ) = "\0" x 47;
substr( $pwarray, 56, 1 ) = "\x80";
substr( $pwarray, 57, 1 ) = "\x0a";
$md5->add( $pwarray );
${$valContext} = md5state( $md5 );
my $key;
MakeKey( 0, \$key, ${$valContext} );
my $salt = $key->RC4( $salt_data );
my $hashedsalt = $key->RC4( $hashedsalt_data );
$salt .= "\x80" . "\0" x 47;
substr( $salt, 56, 1 ) = "\x80";
$md5->reset;
$md5->add( $salt );
my $mdContext2 = md5state( $md5 );
return ( $mdContext2 eq $hashedsalt );
}
sub SkipBytes {
my ( $q, $start, $count ) = @_;
my $scratch = "\0" x REKEY_BLOCK;
my $block;
$block = int( ( $start + $count ) / REKEY_BLOCK );
if ( $block != $q->{block} ) {
MakeKey( $q->{block} = $block, \$q->{rc4_key}, $q->{md5_ctxt} );
$count = ( $start + $count ) % REKEY_BLOCK;
}
$q->{rc4_key}->RC4( substr( $scratch, 0, $count ) );
return 1;
}
sub SetDecrypt {
my ( $q, $version, $password ) = @_;
if ( $q->{opcode} != 0x2f ) {
return 0;
}
if ( $password eq '' ) {
return 0;
}
# TODO old versions decryption
#if (version < MS_BIFF_V8 || q->data[0] == 0)
# return ms_biff_pre_biff8_query_set_decrypt (q, password);
if ( $q->{length} != sizeof_BIFF_8_FILEPASS ) {
return 0;
}
unless (
VerifyPassword(
$password,
substr( $q->{data}, 6, 16 ),
substr( $q->{data}, 22, 16 ),
substr( $q->{data}, 38, 16 ),
\$q->{md5_ctxt}
)
)
{
return 0;
}
$q->{encryption} = MS_BIFF_CRYPTO_RC4;
$q->{block} = -1;
# The first record after FILEPASS seems to be unencrypted
$q->{dont_decrypt_next_record} = 1;
# Pretend to decrypt the entire stream up till this point, it was
# encrypted, but do it anyway to keep the rc4 state in sync
SkipBytes( $q, 0, $q->{streamPos} );
return 1;
}
sub InitStream {
my ( $stream_data ) = @_;
my %q;
$q{opcode} = 0;
$q{length} = 0;
$q{data} = '';
$q{stream} = $stream_data; # data stream
$q{streamLen} = length( $stream_data ); # stream length
$q{streamPos} = 0; # stream position
$q{encryption} = 0;
$q{xor_key} = '';
$q{rc4_key} = '';
$q{md5_ctxt} = '';
$q{block} = 0;
$q{dont_decrypt_next_record} = 0;
return \%q;
}
sub QueryNext {
my ( $q ) = @_;
if ( $q->{streamPos} + 4 >= $q->{streamLen} ) {
return 0;
}
my $data = substr( $q->{stream}, $q->{streamPos}, 4 );
( $q->{opcode}, $q->{length} ) = unpack( 'v2', $data );
# No biff record should be larger than around 20,000.
if ( $q->{length} >= 20000 ) {
return 0;
}
if ( $q->{length} > 0 ) {
$q->{data} = substr( $q->{stream}, $q->{streamPos} + 4, $q->{length} );
}
else {
$q->{data} = undef;
$q->{dont_decrypt_next_record} = 1;
}
if ( $q->{encryption} == MS_BIFF_CRYPTO_RC4 ) {
if ( $q->{dont_decrypt_next_record} ) {
SkipBytes( $q, $q->{streamPos}, 4 + $q->{length} );
$q->{dont_decrypt_next_record} = 0;
}
else {
my $pos = $q->{streamPos};
my $data = $q->{data};
my $len = $q->{length};
my $res = '';
# Pretend to decrypt header.
SkipBytes( $q, $pos, 4 );
$pos += 4;
while ( $q->{block} != int( ( $pos + $len ) / REKEY_BLOCK ) ) {
my $step = REKEY_BLOCK - ( $pos % REKEY_BLOCK );
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $step ) );
$data = substr( $data, $step );
$pos += $step;
$len -= $step;
MakeKey( ++$q->{block}, \$q->{rc4_key}, $q->{md5_ctxt} );
}
$res .= $q->{rc4_key}->RC4( substr( $data, 0, $len ) );
$q->{data} = $res;
}
}
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_XOR ) {
# not implemented
return 0;
}
elsif ( $q->{encryption} == MS_BIFF_CRYPTO_NONE ) {
}
$q->{streamPos} += 4 + $q->{length};
return 1;
}
###############################################################################
#
# Parse()
#
# Parse the Excel file and convert it into a tree of objects..
#
sub parse {
my ( $self, $source, $formatter ) = @_;
my $workbook = Spreadsheet::ParseExcel::Workbook->new();
$currentbook = $workbook;
$workbook->{SheetCount} = 0;
$workbook->{CellHandler} = $self->{CellHandler};
$workbook->{NotSetCell} = $self->{NotSetCell};
$workbook->{Object} = $self->{Object};
$workbook->{aColor} = [ @aColor ];
my ( $biff_data, $data_length ) = $self->_get_content( $source, $workbook );
return undef if not $biff_data;
if ( $formatter ) {
$workbook->{FmtClass} = $formatter;
}
else {
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new();
}
# Parse the BIFF data.
my $stream = InitStream( $biff_data );
while ( QueryNext( $stream ) ) {
my $record = $stream->{opcode};
my $record_length = $stream->{length};
my $record_header = $stream->{data};
# If the file contains a FILEPASS record we assume that it is encrypted
# and cannot be parsed.
if ( $record == 0x002F ) {
unless ( SetDecrypt( $stream, '', $self->{Password} ) ) {
$self->{_error_status} = ErrorFileEncrypted;
return undef;
}
}
# Special case of a formula String with no string.
if ( $workbook->{_PrevPos}
&& ( defined $self->{FuncTbl}->{$record} )
&& ( $record != 0x207 ) )
{
my $iPos = $workbook->{_PrevPos};
$workbook->{_PrevPos} = undef;
my ( $row, $col, $format_index ) = @$iPos;
_NewCell(
$workbook, $row, $col,
Kind => 'Formula String',
Val => '',
FormatNo => $format_index,
Format => $workbook->{Format}[$format_index],
Numeric => 0,
Code => undef,
Book => $workbook,
);
}
# If the BIFF record matches 0x0*09 then it is a BOF record.
# We reset the _skip_chart flag to ensure we check the sheet type.
if ( ( $record & 0xF0FF ) == 0x09 ) {
$workbook->{_skip_chart} = 0;
}
if ( defined $self->{FuncTbl}->{$record} && !$workbook->{_skip_chart} )
{
$self->{FuncTbl}->{$record}
->( $workbook, $record, $record_length, $record_header );
}
$PREFUNC = $record if ( $record != 0x3C ); #Not Continue
last if defined $workbook->{_ParseAbort};
}
foreach my $worksheet (@{$workbook->{Worksheet}} ) {
# Install hyperlinks into each cell
# Range is undocumented for user; allows reuse of data
if ($worksheet->{HyperLinks}) {
foreach my $link (@{$worksheet->{HyperLinks}}) {
for( my $row = $link->[3]; $row <= $link->[4]; $row++ ) {
for( my $col = $link->[5]; $col <= $link->[6]; $col++ ) {
$worksheet->{Cells}[$row][$col]{Hyperlink} = $link;
}
}
}
}
}
return $workbook;
}
###############################################################################
#
# _get_content()
#
# Get the Excel BIFF content from the file or filehandle.
#
sub _get_content {
my ( $self, $source, $workbook ) = @_;
my ( $biff_data, $data_length );
# Reset the error status in case method is called more than once.
$self->{_error_status} = ErrorNone;
my $ref = ref($source);
if ( $ref ) {
if ( $ref eq 'SCALAR' ) {
# Specified by a scalar buffer.
( $biff_data, $data_length ) = $self->{GetContent}->( $source );
}
elsif ( $ref eq 'ARRAY' ) {
# Specified by file content
$workbook->{File} = undef;
my $sData = join( '', @$source );
( $biff_data, $data_length ) = $self->{GetContent}->( \$sData );
}
else {
# Assume filehandle
# For CGI.pm (Light FileHandle)
my $sBuff = '';
if ( eval { binmode( $source ) } ) {
my $sWk;
while ( read( $source, $sWk, 4096 ) ) {
$sBuff .= $sWk;
}
}
else {
# Assume IO::Wrap or some other filehandle-like OO-only object
my $sWk;
# IO::Wrap does not implement binmode
eval { $source->binmode() };
while ( $source->read( $sWk, 4096 ) ) {
$sBuff .= $sWk;
}
}
( $biff_data, $data_length ) = $self->{GetContent}->( \$sBuff );
}
}
else {
# Specified by filename .
$workbook->{File} = $source;
if ( !-e $source ) {
$self->{_error_status} = ErrorNoFile;
return undef;
}
( $biff_data, $data_length ) = $self->{GetContent}->( $source );
}
# If the read was successful return the data.
if ( $data_length ) {
return ( $biff_data, $data_length );
}
else {
$self->{_error_status} = ErrorNoExcelData;
return undef;
}
}
#------------------------------------------------------------------------------
# _subGetContent (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _subGetContent {
my ( $sFile ) = @_;
my $oOl = OLE::Storage_Lite->new( $sFile );
return ( undef, undef ) unless ( $oOl );
my @aRes = $oOl->getPpsSearch(
[
OLE::Storage_Lite::Asc2Ucs( 'Book' ),
OLE::Storage_Lite::Asc2Ucs( 'Workbook' )
],
1, 1
);
return ( undef, undef ) if ( $#aRes < 0 );
#Hack from Herbert
if ( $aRes[0]->{Data} ) {
return ( $aRes[0]->{Data}, length( $aRes[0]->{Data} ) );
}
#Same as OLE::Storage_Lite
my $oIo;
#1. $sFile is Ref of scalar
if ( ref( $sFile ) eq 'SCALAR' ) {
if ( $_use_perlio ) {
open $oIo, "<", \$sFile;
}
else {
$oIo = IO::Scalar->new;
$oIo->open( $sFile );
}
}
#2. $sFile is a IO::Handle object
elsif ( UNIVERSAL::isa( $sFile, 'IO::Handle' ) ) {
$oIo = $sFile;
binmode( $oIo );
}
#3. $sFile is a simple filename string
elsif ( !ref( $sFile ) ) {
$oIo = IO::File->new;
$oIo->open( "<$sFile" ) || return undef;
binmode( $oIo );
}
my $sWk;
my $sBuff = '';
while ( $oIo->read( $sWk, 4096 ) ) { #4_096 has no special meanings
$sBuff .= $sWk;
}
$oIo->close();
#Not Excel file (simple method)
return ( undef, undef ) if ( substr( $sBuff, 0, 1 ) ne "\x09" );
return ( $sBuff, length( $sBuff ) );
}
#------------------------------------------------------------------------------
# _subBOF (for Spreadsheet::ParseExcel) Developers' Kit : P303
#------------------------------------------------------------------------------
sub _subBOF {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iVer, $iDt ) = unpack( "v2", $sWk );
#Workbook Global
if ( $iDt == 0x0005 ) {
$oBook->{Version} = unpack( "v", $sWk );
$oBook->{BIFFVersion} =
( $oBook->{Version} == verExcel95 ) ? verBIFF5 : verBIFF8;
$oBook->{_CurSheet} = undef;
$oBook->{_CurSheet_} = -1;
}
#Worksheet or Dialogsheet
elsif ( $iDt != 0x0020 ) { #if($iDt == 0x0010)
if ( defined $oBook->{_CurSheet_} ) {
$oBook->{_CurSheet} = $oBook->{_CurSheet_} + 1;
$oBook->{_CurSheet_}++;
(
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetVersion},
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{SheetType},
)
= unpack( "v2", $sWk )
if ( length( $sWk ) > 4 );
}
else {
$oBook->{BIFFVersion} = int( $bOp / 0x100 );
if ( ( $oBook->{BIFFVersion} == verBIFF2 )
|| ( $oBook->{BIFFVersion} == verBIFF3 )
|| ( $oBook->{BIFFVersion} == verBIFF4 ) )
{
$oBook->{Version} = $oBook->{BIFFVersion};
$oBook->{_CurSheet} = 0;
$oBook->{Worksheet}[ $oBook->{SheetCount} ] =
Spreadsheet::ParseExcel::Worksheet->new(
_Name => '',
Name => '',
_Book => $oBook,
_SheetNo => $oBook->{SheetCount},
);
$oBook->{SheetCount}++;
}
}
}
else {
# Set flag to ignore all chart records until we reach another BOF.
$oBook->{_skip_chart} = 1;
}
}
#------------------------------------------------------------------------------
# _subBlank (for Spreadsheet::ParseExcel) DK:P303
#------------------------------------------------------------------------------
sub _subBlank {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
_NewCell(
$oBook, $iR, $iC,
Kind => 'BLANK',
Val => '',
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subInteger (for Spreadsheet::ParseExcel) Not in DK
#------------------------------------------------------------------------------
sub _subInteger {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF, $sTxt, $sDum );
( $iR, $iC, $iF, $sDum, $sTxt ) = unpack( "v3cv", $sWk );
_NewCell(
$oBook, $iR, $iC,
Kind => 'INTEGER',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subNumber (for Spreadsheet::ParseExcel) : DK: P354
#------------------------------------------------------------------------------
sub _subNumber {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my $dVal = _convDval( substr( $sWk, 6, 8 ) );
_NewCell(
$oBook, $iR, $iC,
Kind => 'Number',
Val => $dVal,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 1,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _convDval (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _convDval {
my ( $sWk ) = @_;
return
unpack( "d",
( $BIGENDIAN ) ? pack( "c8", reverse( unpack( "c8", $sWk ) ) ) : $sWk );
}
#------------------------------------------------------------------------------
# _subRString (for Spreadsheet::ParseExcel) DK:P405
#------------------------------------------------------------------------------
sub _subRString {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF, $iL, $sTxt );
( $iR, $iC, $iF, $iL ) = unpack( "v4", $sWk );
$sTxt = substr( $sWk, 8, $iL );
#Has STRUN
if ( length( $sWk ) > ( 8 + $iL ) ) {
_NewCell(
$oBook, $iR, $iC,
Kind => 'RString',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => '_native_', #undef,
Book => $oBook,
Rich => substr( $sWk, ( 8 + $iL ) + 1 ),
);
}
else {
_NewCell(
$oBook, $iR, $iC,
Kind => 'RString',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => '_native_',
Book => $oBook,
);
}
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subBoolErr (for Spreadsheet::ParseExcel) DK:P306
#------------------------------------------------------------------------------
sub _subBoolErr {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my ( $iVal, $iFlg ) = unpack( "cc", substr( $sWk, 6, 2 ) );
my $sTxt = DecodeBoolErr( $iVal, $iFlg );
_NewCell(
$oBook, $iR, $iC,
Kind => 'BoolError',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
###############################################################################
#
# _subRK()
#
# Decode the RK BIFF record.
#
sub _subRK {
my ( $workbook, $biff_number, $length, $data ) = @_;
my ( $row, $col, $format_index, $rk_number ) = unpack( 'vvvV', $data );
my $number = _decode_rk_number( $rk_number );
_NewCell(
$workbook, $row, $col,
Kind => 'RK',
Val => $number,
FormatNo => $format_index,
Format => $workbook->{Format}->[$format_index],
Numeric => 1,
Code => undef,
Book => $workbook,
);
# Store the max and min row/col values.
_SetDimension( $workbook, $row, $col, $col );
}
#------------------------------------------------------------------------------
# _subArray (for Spreadsheet::ParseExcel) DK:P297
#------------------------------------------------------------------------------
sub _subArray {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iBR, $iER, $iBC, $iEC ) = unpack( "v2c2", $sWk );
}
#------------------------------------------------------------------------------
# _subFormula (for Spreadsheet::ParseExcel) DK:P336
#------------------------------------------------------------------------------
sub _subFormula {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my ( $iFlg ) = unpack( "v", substr( $sWk, 12, 2 ) );
if ( $iFlg == 0xFFFF ) {
my ( $iKind ) = unpack( "c", substr( $sWk, 6, 1 ) );
my ( $iVal ) = unpack( "c", substr( $sWk, 8, 1 ) );
if ( ( $iKind == 1 ) or ( $iKind == 2 ) ) {
my $sTxt =
( $iKind == 1 )
? DecodeBoolErr( $iVal, 0 )
: DecodeBoolErr( $iVal, 1 );
_NewCell(
$oBook, $iR, $iC,
Kind => 'Formula Bool',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
}
else { # Result (Reserve Only)
$oBook->{_PrevPos} = [ $iR, $iC, $iF ];
}
}
else {
my $dVal = _convDval( substr( $sWk, 6, 8 ) );
_NewCell(
$oBook, $iR, $iC,
Kind => 'Formula Number',
Val => $dVal,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 1,
Code => undef,
Book => $oBook,
);
}
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subString (for Spreadsheet::ParseExcel) DK:P414
#------------------------------------------------------------------------------
sub _subString {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
#Position (not enough for ARRAY)
my $iPos = $oBook->{_PrevPos};
return undef unless ( $iPos );
$oBook->{_PrevPos} = undef;
my ( $iR, $iC, $iF ) = @$iPos;
my ( $iLen, $sTxt, $sCode );
if ( $oBook->{BIFFVersion} == verBIFF8 ) {
my ( $raBuff, $iLen ) = _convBIFF8String( $oBook, $sWk, 1 );
$sTxt = $raBuff->[0];
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef;
}
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) {
$sCode = '_native_';
$iLen = unpack( "v", $sWk );
$sTxt = substr( $sWk, 2, $iLen );
}
else {
$sCode = '_native_';
$iLen = unpack( "c", $sWk );
$sTxt = substr( $sWk, 1, $iLen );
}
_NewCell(
$oBook, $iR, $iC,
Kind => 'String',
Val => $sTxt,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => $sCode,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subLabel (for Spreadsheet::ParseExcel) DK:P344
#------------------------------------------------------------------------------
sub _subLabel {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF ) = unpack( "v3", $sWk );
my ( $sLbl, $sCode );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
my ( $raBuff, $iLen, $iStPos, $iLenS ) =
_convBIFF8String( $oBook, substr( $sWk, 6 ), 1 );
$sLbl = $raBuff->[0];
$sCode = ( $raBuff->[1] ) ? 'ucs2' : undef;
}
#Before BIFF8
else {
$sLbl = substr( $sWk, 8 );
$sCode = '_native_';
}
_NewCell(
$oBook, $iR, $iC,
Kind => 'Label',
Val => $sLbl,
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => $sCode,
Book => $oBook,
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
###############################################################################
#
# _subMulRK()
#
# Decode the Multiple RK BIFF record.
#
sub _subMulRK {
my ( $workbook, $biff_number, $length, $data ) = @_;
# JMN: I don't know why this is here.
return if $workbook->{SheetCount} <= 0;
my ( $row, $first_col ) = unpack( "v2", $data );
my $last_col = unpack( "v", substr( $data, length( $data ) - 2, 2 ) );
# Iterate over the RK array and decode the data.
my $pos = 4;
for my $col ( $first_col .. $last_col ) {
my $data = substr( $data, $pos, 6 );
my ( $format_index, $rk_number ) = unpack 'vV', $data;
my $number = _decode_rk_number( $rk_number );
_NewCell(
$workbook, $row, $col,
Kind => 'MulRK',
Val => $number,
FormatNo => $format_index,
Format => $workbook->{Format}->[$format_index],
Numeric => 1,
Code => undef,
Book => $workbook,
);
$pos += 6;
}
# Store the max and min row/col values.
_SetDimension( $workbook, $row, $first_col, $last_col );
}
#------------------------------------------------------------------------------
# _subMulBlank (for Spreadsheet::ParseExcel) DK:P349
#------------------------------------------------------------------------------
sub _subMulBlank {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iSc ) = unpack( "v2", $sWk );
my $iEc = unpack( "v", substr( $sWk, length( $sWk ) - 2, 2 ) );
my $iPos = 4;
for ( my $iC = $iSc ; $iC <= $iEc ; $iC++ ) {
my $iF = unpack( 'v', substr( $sWk, $iPos, 2 ) );
_NewCell(
$oBook, $iR, $iC,
Kind => 'MulBlank',
Val => '',
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => undef,
Book => $oBook,
);
$iPos += 2;
}
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iSc, $iEc );
}
#------------------------------------------------------------------------------
# _subLabelSST (for Spreadsheet::ParseExcel) DK: P345
#------------------------------------------------------------------------------
sub _subLabelSST {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iR, $iC, $iF, $iIdx ) = unpack( 'v3V', $sWk );
_NewCell(
$oBook, $iR, $iC,
Kind => 'PackedIdx',
Val => $oBook->{PkgStr}[$iIdx]->{Text},
FormatNo => $iF,
Format => $oBook->{Format}[$iF],
Numeric => 0,
Code => ( $oBook->{PkgStr}[$iIdx]->{Unicode} ) ? 'ucs2' : undef,
Book => $oBook,
Rich => $oBook->{PkgStr}[$iIdx]->{Rich},
);
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iC, $iC );
}
#------------------------------------------------------------------------------
# _subFlg1904 (for Spreadsheet::ParseExcel) DK:P296
#------------------------------------------------------------------------------
sub _subFlg1904 {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
$oBook->{Flg1904} = unpack( "v", $sWk );
}
#------------------------------------------------------------------------------
# _subRow (for Spreadsheet::ParseExcel) DK:P403
#------------------------------------------------------------------------------
sub _subRow {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
#0. Get Worksheet info (MaxRow, MaxCol, MinRow, MinCol)
my ( $iR, $iSc, $iEc, $iHght, $undef1, $undef2, $iGr, $iXf ) =
unpack( "v8", $sWk );
$iEc--;
if ( $iGr & 0x20 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHidden}[$iR] = 1;
}
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RowHeight}[$iR] = $iHght / 20;
#2.MaxRow, MaxCol, MinRow, MinCol
_SetDimension( $oBook, $iR, $iSc, $iEc );
}
#------------------------------------------------------------------------------
# _SetDimension (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _SetDimension {
my ( $oBook, $iR, $iSc, $iEc ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
#2.MaxRow, MaxCol, MinRow, MinCol
#2.1 MinRow
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} = $iR
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinRow} <= $iR );
#2.2 MaxRow
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} = $iR
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxRow} > $iR );
#2.3 MinCol
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} = $iSc
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MinCol} <= $iSc );
#2.4 MaxCol
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} = $iEc
unless ( defined $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} )
and ( $oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{MaxCol} > $iEc );
}
#------------------------------------------------------------------------------
# _subDefaultRowHeight (for Spreadsheet::ParseExcel) DK: P318
#------------------------------------------------------------------------------
sub _subDefaultRowHeight {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
#1. RowHeight
my ( $iDum, $iHght ) = unpack( "v2", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{DefRowHeight} = $iHght / 20;
}
#------------------------------------------------------------------------------
# _subStandardWidth(for Spreadsheet::ParseExcel) DK:P413
#------------------------------------------------------------------------------
sub _subStandardWidth {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my $iW = unpack( "v", $sWk );
$oBook->{StandardWidth} = _convert_col_width( $oBook, $iW );
}
###############################################################################
#
# _subDefColWidth()
#
# Read the DEFCOLWIDTH Biff record. This gives the width in terms of chars
# and is different from the width in the COLINFO record.
#
sub _subDefColWidth {
my ( $self, $record, $length, $data ) = @_;
my $width = unpack 'v', $data;
# Adjustment for default Arial 10 width.
$width = 8.43 if $width == 8;
$self->{Worksheet}->[ $self->{_CurSheet} ]->{DefColWidth} = $width;
}
###############################################################################
#
# _convert_col_width()
#
# Converts from the internal Excel column width units to user units seen in the
# interface. It is first necessary to convert the internal width to pixels and
# then to user units. The conversion is specific to a default font of Arial 10.
# TODO, the conversion should be extended to other fonts and sizes.
#
sub _convert_col_width {
my $self = shift;
my $excel_width = shift;
# Convert from Excel units to pixels (rounded up).
my $pixels = int( 0.5 + $excel_width * 7 / 256 );
# Convert from pixels to user units.
# The conversion is different for columns <= 1 user unit (12 pixels).
my $user_width;
if ( $pixels <= 12 ) {
$user_width = $pixels / 12;
}
else {
$user_width = ( $pixels - 5 ) / 7;
}
# Round up to 2 decimal places.
$user_width = int( $user_width * 100 + 0.5 ) / 100;
return $user_width;
}
#------------------------------------------------------------------------------
# _subColInfo (for Spreadsheet::ParseExcel) DK:P309
#------------------------------------------------------------------------------
sub _subColInfo {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless defined $oBook->{_CurSheet};
my ( $iSc, $iEc, $iW, $iXF, $iGr ) = unpack( "v5", $sWk );
for ( my $i = $iSc ; $i <= $iEc ; $i++ ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColWidth}[$i] =
_convert_col_width( $oBook, $iW );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColFmtNo}[$i] = $iXF;
if ( $iGr & 0x01 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{ColHidden}[$i] = 1;
}
}
}
#------------------------------------------------------------------------------
# _subWindow1 Window information P 273
#------------------------------------------------------------------------------
sub _subWindow1 {
my ( $workbook, $op, $len, $wk ) = @_;
return if ( $workbook->{BIFFVersion} <= verBIFF4() );
my (
$hpos, $vpos, $width,
$height, $options, $active,
$firsttab, $numselected, $tabbarwidth
) = unpack( "v9", $wk );
$workbook->{ActiveSheet} = $active;
}
#------------------------------------------------------------------------------
# _subSheetLayout OpenOffice 5.96 (P207)
#------------------------------------------------------------------------------
sub _subSheetLayout {
my ( $workbook, $op, $len, $wk ) = @_;
my @unused;
(
my $rc,
@unused[ 1 .. 10 ],
@unused[ 11 .. 14 ],
my $color, @unused[ 15, 16 ]
) = unpack( "vC10C4vC2", $wk );
return unless ( $rc == 0x0862 );
$workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{TabColor} = $color;
}
#------------------------------------------------------------------------------
# _subHyperlink OpenOffice 5.96 (P182)
#
# Also see: http://msdn.microsoft.com/en-us/library/gg615407(v=office.14).aspx
#------------------------------------------------------------------------------
# Helper: Extract a GID, returns as text string
sub _getguid {
my( $wk ) = @_;
my( $text, $guidl, $guids1, $guids2, @guidb );
( $guidl, $guids1, $guids2, @guidb[0..7] ) = unpack( 'Vv2C8', $wk );
$text = sprintf( '%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X', $guidl, $guids1, $guids2, @guidb);
return $text;
}
# Helper: Extract a counted (16-bit) unicode string, returns string,
# updates $offset
# $zterm == 1 if string is null-terminated.
# $bc if length is in bytes (not chars)
sub _getustr {
my( $wk, $offset, $zterm, $bc ) = @_;
my $len = unpack( 'V', substr( $wk, $offset ) );
$offset += 4;
if( $bc ) {
$len /= 2;
}
$len -= $zterm;
my $text = join( '', map { chr $_ } unpack( "v$len", substr( $wk, $offset ) ) );
$text =~ s/\0.*\z// if( $zterm );
$_[1] = ( $offset += ($len + $zterm) *2 );
return $text;
}
# HYPERLINK record
sub _subHyperlink {
my ( $workbook, $op, $len, $wk ) = @_;
# REF
my( $srow, $erow, $scol, $ecol ) = unpack( 'v4', $wk );
my $guid = _getguid( substr( $wk, 8 ) );
return unless( $guid eq '79EAC9D0-BAF9-11CE-8C82-00AA004BA90B' );
my( $stmvers, $flags ) = unpack( 'VV', substr( $wk, 24 ) );
return if( $flags & 0x60 || $stmvers != 2 );
my $offset = 32;
my( $desc,$frame, $link, $mark );
if( ($flags & 0x14) == 0x14 ) {
$desc = _getustr( $wk, $offset, 1, 0 );
}
if( $flags & 0x80 ) {
$frame = _getustr( $wk, $offset, 1, 0 );
}
$link = '';
if( $flags & 0x100 ) {
# UNC path
$link = 'file:///' . _getustr( $wk, $offset, 1, 0 );
} elsif( $flags & 0x1 ) {
# Has link (URI)
$guid = _getguid( substr( $wk, $offset ) );
$offset += 16;
if( $guid eq '79EAC9E0-BAF9-11CE-8C82-00AA004BA90B' ) {
# URI
$link = _getustr( $wk, $offset, 1, 1 );
} elsif( $guid eq '00000303-0000-0000-C000-000000000046' ) {
# Local file
$link = 'file:///';
# !($flags & 2) = 'relative path'
if( !($flags & 0x2) ) {
my $file = $workbook->{File};
if( defined $file && length $file ) {
$link .= (fileparse($file))[1];
}
else {
$link .= '%REL%'
}
}
my $dirn = unpack( 'v', substr( $wk, $offset ) );
$offset += 2;
$link .= '..\\' x $dirn;
my $namelen = unpack( 'V', substr( $wk, $offset ) );
$offset += 4;
my $name = unpack( 'Z*', substr( $wk, $offset ) );
$offset += $namelen;
$offset += 24;
my $size = unpack( 'V', substr( $wk, $offset ) );
$offset += 4;
if( $size ) {
my $xlen = unpack( 'V', substr( $wk, $offset ) ) / 2;
$name = join( '', map { chr $_} unpack( "v$xlen", substr( $wk, $offset+4+2) ) );
$offset += $size;
}
$link .= $name;
} else {
return;
}
}
# Text mark (Fragment identifier)
if( $flags & 0x8 ) {
# Cellrefs contain reserved characters, so url-encode
my $fragment = _getustr( $wk, $offset, 1 );
$fragment =~ s/([^\w.~-])/sprintf( '%%%02X', ord( $1 ) )/gems;
$link .= '#' . $fragment;
}
# Update loop at end of parse() if this changes
push @{ $workbook->{Worksheet}[ $workbook->{_CurSheet} ]->{HyperLinks} }, [
$desc, $link, $frame, $srow, $erow, $scol, $ecol ];
}
#------------------------------------------------------------------------------
# _subSST (for Spreadsheet::ParseExcel) DK:P413
#------------------------------------------------------------------------------
sub _subSST {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
_subStrWk( $oBook, substr( $sWk, 8 ) );
}
#------------------------------------------------------------------------------
# _subContinue (for Spreadsheet::ParseExcel) DK:P311
#------------------------------------------------------------------------------
sub _subContinue {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
#if(defined $self->{FuncTbl}->{$bOp}) {
# $self->{FuncTbl}->{$PREFUNC}->($oBook, $bOp, $bLen, $sWk);
#}
_subStrWk( $oBook, $sWk, 1 ) if ( $PREFUNC == 0xFC );
}
#------------------------------------------------------------------------------
# _subWriteAccess (for Spreadsheet::ParseExcel) DK:P451
#------------------------------------------------------------------------------
sub _subWriteAccess {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return if ( defined $oBook->{_Author} );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
$oBook->{Author} = _convBIFF8String( $oBook, $sWk );
}
#Before BIFF8
else {
my ( $iLen ) = unpack( "c", $sWk );
$oBook->{Author} =
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' );
}
}
#------------------------------------------------------------------------------
# _convBIFF8String (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _convBIFF8String {
my ( $oBook, $sWk, $iCnvFlg ) = @_;
my ( $iLen, $iFlg ) = unpack( "vc", $sWk );
my ( $iHigh, $iExt, $iRich ) = ( $iFlg & 0x01, $iFlg & 0x04, $iFlg & 0x08 );
my ( $iStPos, $iExtCnt, $iRichCnt, $sStr );
#2. Rich and Ext
if ( $iRich && $iExt ) {
$iStPos = 9;
( $iRichCnt, $iExtCnt ) = unpack( 'vV', substr( $sWk, 3, 6 ) );
}
elsif ( $iRich ) { #Only Rich
$iStPos = 5;
$iRichCnt = unpack( 'v', substr( $sWk, 3, 2 ) );
$iExtCnt = 0;
}
elsif ( $iExt ) { #Only Ext
$iStPos = 7;
$iRichCnt = 0;
$iExtCnt = unpack( 'V', substr( $sWk, 3, 4 ) );
}
else { #Nothing Special
$iStPos = 3;
$iExtCnt = 0;
$iRichCnt = 0;
}
#3.Get String
if ( $iHigh ) { #Compressed
$iLen *= 2;
$sStr = substr( $sWk, $iStPos, $iLen );
_SwapForUnicode( \$sStr );
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, 'ucs2' )
unless ( $iCnvFlg );
}
else { #Not Compressed
$sStr = substr( $sWk, $iStPos, $iLen );
$sStr = $oBook->{FmtClass}->TextFmt( $sStr, undef ) unless ( $iCnvFlg );
}
#4. return
if ( wantarray ) {
#4.1 Get Rich and Ext
if ( length( $sWk ) < $iStPos + $iLen + $iRichCnt * 4 + $iExtCnt ) {
return (
[ undef, $iHigh, undef, undef ],
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt,
$iStPos, $iLen
);
}
else {
return (
[
$sStr,
$iHigh,
substr( $sWk, $iStPos + $iLen, $iRichCnt * 4 ),
substr( $sWk, $iStPos + $iLen + $iRichCnt * 4, $iExtCnt )
],
$iStPos + $iLen + $iRichCnt * 4 + $iExtCnt,
$iStPos, $iLen
);
}
}
else {
return $sStr;
}
}
#------------------------------------------------------------------------------
# _subXF (for Spreadsheet::ParseExcel) DK:P453
#------------------------------------------------------------------------------
sub _subXF {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iFnt, $iIdx );
my (
$iLock, $iHidden, $iStyle, $i123, $iAlH, $iWrap,
$iAlV, $iJustL, $iRotate, $iInd, $iShrink, $iMerge,
$iReadDir, $iBdrD, $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB,
$iBdrSD, $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB, $iBdrCD,
$iFillP, $iFillCF, $iFillCB
);
if ( $oBook->{BIFFVersion} == verBIFF4 ) {
# Minimal support for Excel 4. We just get the font and format indices
# so that the cell data value can be formatted.
( $iFnt, $iIdx, ) = unpack( "CC", $sWk );
}
elsif ( $oBook->{BIFFVersion} == verBIFF8 ) {
my ( $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn );
( $iFnt, $iIdx, $iGen, $iAlign, $iGen2, $iBdr1, $iBdr2, $iBdr3, $iPtn )
= unpack( "v7Vv", $sWk );
$iLock = ( $iGen & 0x01 ) ? 1 : 0;
$iHidden = ( $iGen & 0x02 ) ? 1 : 0;
$iStyle = ( $iGen & 0x04 ) ? 1 : 0;
$i123 = ( $iGen & 0x08 ) ? 1 : 0;
$iAlH = ( $iAlign & 0x07 );
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0;
$iAlV = ( $iAlign & 0x70 ) / 0x10;
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0;
$iRotate = ( ( $iAlign & 0xFF00 ) / 0x100 ) & 0x00FF;
$iRotate = 90 if ( $iRotate == 255 );
$iRotate = 90 - $iRotate if ( $iRotate > 90 );
$iInd = ( $iGen2 & 0x0F );
$iShrink = ( $iGen2 & 0x10 ) ? 1 : 0;
$iMerge = ( $iGen2 & 0x20 ) ? 1 : 0;
$iReadDir = ( ( $iGen2 & 0xC0 ) / 0x40 ) & 0x03;
$iBdrSL = $iBdr1 & 0x0F;
$iBdrSR = ( ( $iBdr1 & 0xF0 ) / 0x10 ) & 0x0F;
$iBdrST = ( ( $iBdr1 & 0xF00 ) / 0x100 ) & 0x0F;
$iBdrSB = ( ( $iBdr1 & 0xF000 ) / 0x1000 ) & 0x0F;
$iBdrCL = ( ( $iBdr2 & 0x7F ) ) & 0x7F;
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F;
$iBdrD = ( ( $iBdr2 & 0xC000 ) / 0x4000 ) & 0x3;
$iBdrCT = ( ( $iBdr3 & 0x7F ) ) & 0x7F;
$iBdrCB = ( ( $iBdr3 & 0x3F80 ) / 0x80 ) & 0x7F;
$iBdrCD = ( ( $iBdr3 & 0x1FC000 ) / 0x4000 ) & 0x7F;
$iBdrSD = ( ( $iBdr3 & 0x1E00000 ) / 0x200000 ) & 0xF;
$iFillP = ( ( $iBdr3 & 0xFC000000 ) / 0x4000000 ) & 0x3F;
$iFillCF = ( $iPtn & 0x7F );
$iFillCB = ( ( $iPtn & 0x3F80 ) / 0x80 ) & 0x7F;
}
else {
my ( $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 );
( $iFnt, $iIdx, $iGen, $iAlign, $iPtn, $iPtn2, $iBdr1, $iBdr2 ) =
unpack( "v8", $sWk );
$iLock = ( $iGen & 0x01 ) ? 1 : 0;
$iHidden = ( $iGen & 0x02 ) ? 1 : 0;
$iStyle = ( $iGen & 0x04 ) ? 1 : 0;
$i123 = ( $iGen & 0x08 ) ? 1 : 0;
$iAlH = ( $iAlign & 0x07 );
$iWrap = ( $iAlign & 0x08 ) ? 1 : 0;
$iAlV = ( $iAlign & 0x70 ) / 0x10;
$iJustL = ( $iAlign & 0x80 ) ? 1 : 0;
$iRotate = ( ( $iAlign & 0x300 ) / 0x100 ) & 0x3;
$iFillCF = ( $iPtn & 0x7F );
$iFillCB = ( ( $iPtn & 0x1F80 ) / 0x80 ) & 0x7F;
$iFillP = ( $iPtn2 & 0x3F );
$iBdrSB = ( ( $iPtn2 & 0x1C0 ) / 0x40 ) & 0x7;
$iBdrCB = ( ( $iPtn2 & 0xFE00 ) / 0x200 ) & 0x7F;
$iBdrST = ( $iBdr1 & 0x07 );
$iBdrSL = ( ( $iBdr1 & 0x38 ) / 0x8 ) & 0x07;
$iBdrSR = ( ( $iBdr1 & 0x1C0 ) / 0x40 ) & 0x07;
$iBdrCT = ( ( $iBdr1 & 0xFE00 ) / 0x200 ) & 0x7F;
$iBdrCL = ( $iBdr2 & 0x7F ) & 0x7F;
$iBdrCR = ( ( $iBdr2 & 0x3F80 ) / 0x80 ) & 0x7F;
}
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new(
FontNo => $iFnt,
Font => $oBook->{Font}[$iFnt],
FmtIdx => $iIdx,
Lock => $iLock,
Hidden => $iHidden,
Style => $iStyle,
Key123 => $i123,
AlignH => $iAlH,
Wrap => $iWrap,
AlignV => $iAlV,
JustLast => $iJustL,
Rotate => $iRotate,
Indent => $iInd,
Shrink => $iShrink,
Merge => $iMerge,
ReadDir => $iReadDir,
BdrStyle => [ $iBdrSL, $iBdrSR, $iBdrST, $iBdrSB ],
BdrColor => [ $iBdrCL, $iBdrCR, $iBdrCT, $iBdrCB ],
BdrDiag => [ $iBdrD, $iBdrSD, $iBdrCD ],
Fill => [ $iFillP, $iFillCF, $iFillCB ],
);
}
#------------------------------------------------------------------------------
# _subFormat (for Spreadsheet::ParseExcel) DK: P336
#------------------------------------------------------------------------------
sub _subFormat {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my $sFmt;
if ( $oBook->{BIFFVersion} <= verBIFF5 ) {
$sFmt = substr( $sWk, 3, unpack( 'c', substr( $sWk, 2, 1 ) ) );
$sFmt = $oBook->{FmtClass}->TextFmt( $sFmt, '_native_' );
}
else {
$sFmt = _convBIFF8String( $oBook, substr( $sWk, 2 ) );
}
my $format_index = unpack( 'v', substr( $sWk, 0, 2 ) );
# Excel 4 and earlier used an index of 0 to indicate that a built-in format
# that was stored implicitly.
if ( $oBook->{BIFFVersion} <= verBIFF4 && $format_index == 0 ) {
$format_index = keys %{ $oBook->{FormatStr} };
}
$oBook->{FormatStr}->{$format_index} = $sFmt;
}
#------------------------------------------------------------------------------
# _subPalette (for Spreadsheet::ParseExcel) DK: P393
#------------------------------------------------------------------------------
sub _subPalette {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
for ( my $i = 0 ; $i < unpack( 'v', $sWk ) ; $i++ ) {
# push @aColor, unpack('H6', substr($sWk, $i*4+2));
$oBook->{aColor}[ $i + 8 ] = unpack( 'H6', substr( $sWk, $i * 4 + 2 ) );
}
}
#------------------------------------------------------------------------------
# _subFont (for Spreadsheet::ParseExcel) DK:P333
#------------------------------------------------------------------------------
sub _subFont {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline, $sFntName );
my ( $bBold, $bItalic, $bUnderline, $bStrikeout );
if ( $oBook->{BIFFVersion} == verBIFF8 ) {
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) =
unpack( "v5c", $sWk );
my ( $iSize, $iHigh ) = unpack( 'cc', substr( $sWk, 14, 2 ) );
if ( $iHigh ) {
$sFntName = substr( $sWk, 16, $iSize * 2 );
_SwapForUnicode( \$sFntName );
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, 'ucs2' );
}
else {
$sFntName = substr( $sWk, 16, $iSize );
$sFntName = $oBook->{FmtClass}->TextFmt( $sFntName, '_native_' );
}
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0;
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0;
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0;
$bUnderline = ( $iUnderline ) ? 1 : 0;
}
elsif ( $oBook->{BIFFVersion} == verBIFF5 ) {
( $iHeight, $iAttr, $iCIdx, $iBold, $iSuper, $iUnderline ) =
unpack( "v5c", $sWk );
$sFntName =
$oBook->{FmtClass}
->TextFmt( substr( $sWk, 15, unpack( "c", substr( $sWk, 14, 1 ) ) ),
'_native_' );
$bBold = ( $iBold >= 0x2BC ) ? 1 : 0;
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0;
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0;
$bUnderline = ( $iUnderline ) ? 1 : 0;
}
else {
( $iHeight, $iAttr ) = unpack( "v2", $sWk );
$iCIdx = undef;
$iSuper = 0;
$bBold = ( $iAttr & 0x01 ) ? 1 : 0;
$bItalic = ( $iAttr & 0x02 ) ? 1 : 0;
$bUnderline = ( $iAttr & 0x04 ) ? 1 : 0;
$bStrikeout = ( $iAttr & 0x08 ) ? 1 : 0;
$sFntName = substr( $sWk, 5, unpack( "c", substr( $sWk, 4, 1 ) ) );
}
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new(
Height => $iHeight / 20.0,
Attr => $iAttr,
Color => $iCIdx,
Super => $iSuper,
UnderlineStyle => $iUnderline,
Name => $sFntName,
Bold => $bBold,
Italic => $bItalic,
Underline => $bUnderline,
Strikeout => $bStrikeout,
);
#Skip Font[4]
push @{ $oBook->{Font} }, {} if ( scalar( @{ $oBook->{Font} } ) == 4 );
}
#------------------------------------------------------------------------------
# _subBoundSheet (for Spreadsheet::ParseExcel): DK: P307
#------------------------------------------------------------------------------
sub _subBoundSheet {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my ( $iPos, $iGr, $iKind ) = unpack( "Lc2", $sWk );
$iKind &= 0x0F;
return if ( ( $iKind != 0x00 ) && ( $iKind != 0x01 ) );
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
my ( $iSize, $iUni ) = unpack( "cc", substr( $sWk, 6, 2 ) );
my $sWsName = substr( $sWk, 8 );
if ( $iUni & 0x01 ) {
_SwapForUnicode( \$sWsName );
$sWsName = $oBook->{FmtClass}->TextFmt( $sWsName, 'ucs2' );
}
$oBook->{Worksheet}[ $oBook->{SheetCount} ] =
Spreadsheet::ParseExcel::Worksheet->new(
Name => $sWsName,
Kind => $iKind,
_Pos => $iPos,
_Book => $oBook,
_SheetNo => $oBook->{SheetCount},
SheetHidden => $iGr & 0x03
);
}
else {
$oBook->{Worksheet}[ $oBook->{SheetCount} ] =
Spreadsheet::ParseExcel::Worksheet->new(
Name =>
$oBook->{FmtClass}->TextFmt( substr( $sWk, 7 ), '_native_' ),
Kind => $iKind,
_Pos => $iPos,
_Book => $oBook,
_SheetNo => $oBook->{SheetCount},
SheetHidden => $iGr & 0x03
);
}
$oBook->{SheetCount}++;
}
#------------------------------------------------------------------------------
# _subHeader (for Spreadsheet::ParseExcel) DK: P340
#------------------------------------------------------------------------------
sub _subHeader {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $sW;
if ( !defined $sWk ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} = undef;
return;
}
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
$sW = _convBIFF8String( $oBook, $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} =
( $sW eq "\x00" ) ? undef : $sW;
}
#Before BIFF8
else {
my ( $iLen ) = unpack( "c", $sWk );
$sW =
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Header} =
( $sW eq "\x00\x00\x00" ) ? undef : $sW;
}
}
#------------------------------------------------------------------------------
# _subFooter (for Spreadsheet::ParseExcel) DK: P335
#------------------------------------------------------------------------------
sub _subFooter {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $sW;
if ( !defined $sWk ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} = undef;
return;
}
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
$sW = _convBIFF8String( $oBook, $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} =
( $sW eq "\x00" ) ? undef : $sW;
}
#Before BIFF8
else {
my ( $iLen ) = unpack( "c", $sWk );
$sW =
$oBook->{FmtClass}->TextFmt( substr( $sWk, 1, $iLen ), '_native_' );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Footer} =
( $sW eq "\x00\x00\x00" ) ? undef : $sW;
}
}
#------------------------------------------------------------------------------
# _subHPageBreak (for Spreadsheet::ParseExcel) DK: P341
#------------------------------------------------------------------------------
sub _subHPageBreak {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my @aBreak;
my $iCnt = unpack( "v", $sWk );
return undef unless ( defined $oBook->{_CurSheet} );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iRow, $iColB, $iColE ) =
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) );
# push @aBreak, [$iRow, $iColB, $iColE];
push @aBreak, $iRow;
}
}
#Before BIFF8
else {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iRow ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) );
push @aBreak, $iRow;
# push @aBreak, [$iRow, 0, 255];
}
}
@aBreak = sort { $a <=> $b } @aBreak;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HPageBreak} = \@aBreak;
}
#------------------------------------------------------------------------------
# _subVPageBreak (for Spreadsheet::ParseExcel) DK: P447
#------------------------------------------------------------------------------
sub _subVPageBreak {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my @aBreak;
my $iCnt = unpack( "v", $sWk );
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iCol, $iRowB, $iRowE ) =
unpack( 'v3', substr( $sWk, 2 + $i * 6, 6 ) );
push @aBreak, $iCol;
# push @aBreak, [$iCol, $iRowB, $iRowE];
}
}
#Before BIFF8
else {
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iCol ) = unpack( 'v', substr( $sWk, 2 + $i * 2, 2 ) );
push @aBreak, $iCol;
# push @aBreak, [$iCol, 0, 65535];
}
}
@aBreak = sort { $a <=> $b } @aBreak;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VPageBreak} = \@aBreak;
}
#------------------------------------------------------------------------------
# _subMargin (for Spreadsheet::ParseExcel) DK: P306, 345, 400, 440
#------------------------------------------------------------------------------
sub _subMargin {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
# The "Mergin" options are a workaround for a backward compatible typo.
my $dWk = _convDval( substr( $sWk, 0, 8 ) );
if ( $bOp == 0x26 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{LeftMargin} = $dWk;
}
elsif ( $bOp == 0x27 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{RightMargin} = $dWk;
}
elsif ( $bOp == 0x28 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{TopMargin} = $dWk;
}
elsif ( $bOp == 0x29 ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMergin} = $dWk;
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{BottomMargin} = $dWk;
}
}
#------------------------------------------------------------------------------
# _subHcenter (for Spreadsheet::ParseExcel) DK: P340
#------------------------------------------------------------------------------
sub _subHcenter {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{HCenter} = $iWk;
}
#------------------------------------------------------------------------------
# _subVcenter (for Spreadsheet::ParseExcel) DK: P447
#------------------------------------------------------------------------------
sub _subVcenter {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{VCenter} = $iWk;
}
#------------------------------------------------------------------------------
# _subPrintGridlines (for Spreadsheet::ParseExcel) DK: P397
#------------------------------------------------------------------------------
sub _subPrintGridlines {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintGrid} = $iWk;
}
#------------------------------------------------------------------------------
# _subPrintHeaders (for Spreadsheet::ParseExcel) DK: P397
#------------------------------------------------------------------------------
sub _subPrintHeaders {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iWk = unpack( "v", $sWk );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PrintHeaders} = $iWk;
}
#------------------------------------------------------------------------------
# _subSETUP (for Spreadsheet::ParseExcel) DK: P409
#------------------------------------------------------------------------------
sub _subSETUP {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
# Workaround for some apps and older Excels that don't write a
# complete SETUP record.
return undef if $bLen != 34;
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ];
my $iGrBit;
(
$oWkS->{PaperSize}, $oWkS->{Scale}, $oWkS->{PageStart},
$oWkS->{FitWidth}, $oWkS->{FitHeight}, $iGrBit,
$oWkS->{Res}, $oWkS->{VRes},
) = unpack( 'v8', $sWk );
$oWkS->{HeaderMargin} = _convDval( substr( $sWk, 16, 8 ) );
$oWkS->{FooterMargin} = _convDval( substr( $sWk, 24, 8 ) );
$oWkS->{Copis} = unpack( 'v2', substr( $sWk, 32, 2 ) );
$oWkS->{LeftToRight} = ( ( $iGrBit & 0x01 ) ? 1 : 0 );
$oWkS->{Landscape} = ( ( $iGrBit & 0x02 ) ? 1 : 0 );
$oWkS->{NoPls} = ( ( $iGrBit & 0x04 ) ? 1 : 0 );
$oWkS->{NoColor} = ( ( $iGrBit & 0x08 ) ? 1 : 0 );
$oWkS->{Draft} = ( ( $iGrBit & 0x10 ) ? 1 : 0 );
$oWkS->{Notes} = ( ( $iGrBit & 0x20 ) ? 1 : 0 );
$oWkS->{NoOrient} = ( ( $iGrBit & 0x40 ) ? 1 : 0 );
$oWkS->{UsePage} = ( ( $iGrBit & 0x80 ) ? 1 : 0 );
# The NoPls flag indicates that the values have not been taken from an
# actual printer and thus may not be accurate.
# Set default scale if NoPls otherwise it may be an invalid value of 0XFF.
$oWkS->{Scale} = 100 if $oWkS->{NoPls};
# Workaround for a backward compatible typo.
$oWkS->{HeaderMergin} = $oWkS->{HeaderMargin};
$oWkS->{FooterMergin} = $oWkS->{FooterMargin};
}
#------------------------------------------------------------------------------
# _subName (for Spreadsheet::ParseExcel) DK: P350
#------------------------------------------------------------------------------
sub _subName {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
my (
$iGrBit, $cKey, $cCh, $iCce, $ixAls,
$iTab, $cchCust, $cchDsc, $cchHep, $cchStatus
) = unpack( 'vc2v3c4', $sWk );
#Builtin Name + Length == 1
if ( ( $iGrBit & 0x20 ) && ( $cCh == 1 ) ) {
#BIFF8
if ( $oBook->{BIFFVersion} >= verBIFF8 ) {
my $iName = unpack( 'n', substr( $sWk, 14 ) );
my $iSheet = unpack( 'v', substr( $sWk, 8 ) ) - 1;
# Workaround for mal-formed Excel workbooks where Print_Title is
# set as Global (i.e. itab = 0). Note, this will have to be
# treated differently when we get around to handling global names.
return undef if $iSheet == -1;
if ( $iName == 6 ) { #PrintArea
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) );
$oBook->{PrintArea}[$iSheet] = $raArea;
}
elsif ( $iName == 7 ) { #Title
my ( $iSheetW, $raArea ) = _ParseNameArea( substr( $sWk, 16 ) );
my @aTtlR = ();
my @aTtlC = ();
foreach my $raI ( @$raArea ) {
if ( $raI->[3] == 0xFF ) { #Row Title
push @aTtlR, [ $raI->[0], $raI->[2] ];
}
else { #Col Title
push @aTtlC, [ $raI->[1], $raI->[3] ];
}
}
$oBook->{PrintTitle}[$iSheet] =
{ Row => \@aTtlR, Column => \@aTtlC };
}
}
else {
my $iName = unpack( 'c', substr( $sWk, 14 ) );
if ( $iName == 6 ) { #PrintArea
my ( $iSheet, $raArea ) =
_ParseNameArea95( substr( $sWk, 15 ) );
$oBook->{PrintArea}[$iSheet] = $raArea;
}
elsif ( $iName == 7 ) { #Title
my ( $iSheet, $raArea ) =
_ParseNameArea95( substr( $sWk, 15 ) );
my @aTtlR = ();
my @aTtlC = ();
foreach my $raI ( @$raArea ) {
if ( $raI->[3] == 0xFF ) { #Row Title
push @aTtlR, [ $raI->[0], $raI->[2] ];
}
else { #Col Title
push @aTtlC, [ $raI->[1], $raI->[3] ];
}
}
$oBook->{PrintTitle}[$iSheet] =
{ Row => \@aTtlR, Column => \@aTtlC };
}
}
}
}
#------------------------------------------------------------------------------
# ParseNameArea (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d)
#------------------------------------------------------------------------------
sub _ParseNameArea {
my ( $sObj ) = @_;
my ( $iOp );
my @aRes = ();
$iOp = unpack( 'C', $sObj );
my $iSheet;
if ( $iOp == 0x3b ) {
my ( $iWkS, $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v5', substr( $sObj, 1 ) );
$iSheet = $iWkS;
push @aRes, [ $iRs, $iCs, $iRe, $iCe ];
}
elsif ( $iOp == 0x29 ) {
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) );
my $iSt = 0;
while ( $iSt < $iLen ) {
my ( $iOpW, $iWkS, $iRs, $iRe, $iCs, $iCe ) =
unpack( 'cv5', substr( $sObj, $iSt + 3, 11 ) );
if ( $iOpW == 0x3b ) {
$iSheet = $iWkS;
push @aRes, [ $iRs, $iCs, $iRe, $iCe ];
}
if ( $iSt == 0 ) {
$iSt += 11;
}
else {
$iSt += 12; #Skip 1 byte;
}
}
}
return ( $iSheet, \@aRes );
}
#------------------------------------------------------------------------------
# ParseNameArea95 (for Spreadsheet::ParseExcel) DK: 494 (ptgAread3d)
#------------------------------------------------------------------------------
sub _ParseNameArea95 {
my ( $sObj ) = @_;
my ( $iOp );
my @aRes = ();
$iOp = unpack( 'C', $sObj );
my $iSheet;
if ( $iOp == 0x3b ) {
$iSheet = unpack( 'v', substr( $sObj, 11, 2 ) );
my ( $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v2C2', substr( $sObj, 15, 6 ) );
push @aRes, [ $iRs, $iCs, $iRe, $iCe ];
}
elsif ( $iOp == 0x29 ) {
my $iLen = unpack( 'v', substr( $sObj, 1, 2 ) );
my $iSt = 0;
while ( $iSt < $iLen ) {
my $iOpW = unpack( 'c', substr( $sObj, $iSt + 3, 6 ) );
$iSheet = unpack( 'v', substr( $sObj, $iSt + 14, 2 ) );
my ( $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v2C2', substr( $sObj, $iSt + 18, 6 ) );
push @aRes, [ $iRs, $iCs, $iRe, $iCe ] if ( $iOpW == 0x3b );
if ( $iSt == 0 ) {
$iSt += 21;
}
else {
$iSt += 22; #Skip 1 byte;
}
}
}
return ( $iSheet, \@aRes );
}
#------------------------------------------------------------------------------
# _subBOOL (for Spreadsheet::ParseExcel) DK: P452
#------------------------------------------------------------------------------
sub _subWSBOOL {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{PageFit} =
( ( unpack( 'v', $sWk ) & 0x100 ) ? 1 : 0 );
}
#------------------------------------------------------------------------------
# _subMergeArea (for Spreadsheet::ParseExcel) DK: (Not)
#------------------------------------------------------------------------------
sub _subMergeArea {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
return undef unless ( defined $oBook->{_CurSheet} );
my $iCnt = unpack( "v", $sWk );
my $oWkS = $oBook->{Worksheet}[ $oBook->{_CurSheet} ];
$oWkS->{MergedArea} = [] unless ( defined $oWkS->{MergedArea} );
for ( my $i = 0 ; $i < $iCnt ; $i++ ) {
my ( $iRs, $iRe, $iCs, $iCe ) =
unpack( 'v4', substr( $sWk, $i * 8 + 2, 8 ) );
for ( my $iR = $iRs ; $iR <= $iRe ; $iR++ ) {
for ( my $iC = $iCs ; $iC <= $iCe ; $iC++ ) {
$oWkS->{Cells}[$iR][$iC]->{Merged} = 1
if ( defined $oWkS->{Cells}[$iR][$iC] );
}
}
push @{ $oWkS->{MergedArea} }, [ $iRs, $iCs, $iRe, $iCe ];
}
}
#------------------------------------------------------------------------------
# DecodeBoolErr (for Spreadsheet::ParseExcel) DK: P306
#------------------------------------------------------------------------------
sub DecodeBoolErr {
my ( $iVal, $iFlg ) = @_;
if ( $iFlg ) { # ERROR
if ( $iVal == 0x00 ) {
return "#NULL!";
}
elsif ( $iVal == 0x07 ) {
return "#DIV/0!";
}
elsif ( $iVal == 0x0F ) {
return "#VALUE!";
}
elsif ( $iVal == 0x17 ) {
return "#REF!";
}
elsif ( $iVal == 0x1D ) {
return "#NAME?";
}
elsif ( $iVal == 0x24 ) {
return "#NUM!";
}
elsif ( $iVal == 0x2A ) {
return "#N/A!";
}
else {
return "#ERR";
}
}
else {
return ( $iVal ) ? "TRUE" : "FALSE";
}
}
###############################################################################
#
# _decode_rk_number()
#
# Convert an encoded RK number into a real number. The RK encoding is
# explained in some detail in the MS docs. It is a way of storing applicable
# ints and doubles in 32bits (30 data + 2 info bits) in order to save space.
#
sub _decode_rk_number {
my $rk_number = shift;
my $number;
# Check the main RK type.
if ( $rk_number & 0x02 ) {
# RK Type 2 and 4, a packed integer.
# Shift off the info bits.
$number = $rk_number >> 2;
# Convert from unsigned to signed if required.
$number -= 0x40000000 if $number & 0x20000000;
}
else {
# RK Type 1 and 3, a truncated IEEE Double.
# Pack the RK number into the high 30 bits of an IEEE double.
$number = pack "VV", 0x0000, $rk_number & 0xFFFFFFFC;
# Reverse the packed IEEE double on big-endian machines.
$number = reverse $number if $BIGENDIAN;
# Unpack the number.
$number = unpack "d", $number;
}
# RK Types 3 and 4 were multiplied by 100 prior to encoding.
$number /= 100 if $rk_number & 0x01;
return $number;
}
###############################################################################
#
# _subStrWk()
#
# Extract the workbook strings from the SST (Shared String Table) record and
# any following CONTINUE records.
#
# The workbook strings are initially contained in the SST block but may also
# occupy one or more CONTINUE blocks. Reading the CONTINUE blocks is made a
# little tricky by the fact that they can contain an additional initial byte
# if a string is continued from a previous block.
#
# Parsing is further complicated by the fact that the continued section of the
# string may have a different encoding (ASCII or UTF-8) from the previous
# section. Excel does this to save space.
#
sub _subStrWk {
my ( $self, $biff_data, $is_continue ) = @_;
if ( $is_continue ) {
# We are reading a CONTINUE record.
if ( $self->{_buffer} eq '' ) {
# A CONTINUE block with no previous SST.
$self->{_buffer} .= $biff_data;
}
elsif ( !defined $self->{_string_continued} ) {
# The CONTINUE block starts with a new (non-continued) string.
# Strip the Grbit byte and store the string data.
$self->{_buffer} .= substr $biff_data, 1;
}
else {
# A CONTINUE block that starts with a continued string.
# The first byte (Grbit) of the CONTINUE record indicates if (0)
# the continued string section is single bytes or (1) double bytes.
my $grbit = ord $biff_data;
my ( $str_position, $str_length ) = @{ $self->{_previous_info} };
my $buff_length = length $self->{_buffer};
if ( $buff_length >= ( $str_position + $str_length ) ) {
# Not in a string.
$self->{_buffer} .= $biff_data;
}
elsif ( ( $self->{_string_continued} & 0x01 ) == ( $grbit & 0x01 ) )
{
# Same encoding as the previous block of the string.
$self->{_buffer} .= substr( $biff_data, 1 );
}
else {
# Different encoding to the previous block of the string.
if ( $grbit & 0x01 ) {
# Current block is UTF-16, previous was ASCII.
my ( undef, $cch ) = unpack 'vc', $self->{_buffer};
substr( $self->{_buffer}, 2, 1 ) = pack( 'C', $cch | 0x01 );
# Convert the previous ASCII, single character, portion of
# the string into a double character UTF-16 string by
# inserting zero bytes.
for (
my $i = ( $buff_length - $str_position ) ;
$i >= 1 ;
$i--
)
{
substr( $self->{_buffer}, $str_position + $i, 0 ) =
"\x00";
}
}
else {
# Current block is ASCII, previous was UTF-16.
# Convert the current ASCII, single character, portion of
# the string into a double character UTF-16 string by
# inserting null bytes.
my $change_length =
( $str_position + $str_length ) - $buff_length;
# Length of the current CONTINUE record data.
my $biff_length = length $biff_data;
# Restrict the portion to be changed to the current block
# if the string extends over more than one block.
if ( $change_length > ( $biff_length - 1 ) * 2 ) {
$change_length = ( $biff_length - 1 ) * 2;
}
# Insert the null bytes.
for ( my $i = ( $change_length / 2 ) ; $i >= 1 ; $i-- ) {
substr( $biff_data, $i + 1, 0 ) = "\x00";
}
}
# Strip the Grbit byte and store the string data.
$self->{_buffer} .= substr $biff_data, 1;
}
}
}
else {
# Not a CONTINUE block therefore an SST block.
$self->{_buffer} .= $biff_data;
}
# Reset the state variables.
$self->{_string_continued} = undef;
$self->{_previous_info} = undef;
# Extract out any full strings from the current buffer leaving behind a
# partial string that is continued into the next block, or an empty
# buffer is no string is continued.
while ( length $self->{_buffer} >= 4 ) {
my ( $str_info, $length, $str_position, $str_length ) =
_convBIFF8String( $self, $self->{_buffer}, 1 );
if ( defined $str_info->[0] ) {
push @{ $self->{PkgStr} },
{
Text => $str_info->[0],
Unicode => $str_info->[1],
Rich => $str_info->[2],
Ext => $str_info->[3],
};
$self->{_buffer} = substr( $self->{_buffer}, $length );
}
else {
$self->{_string_continued} = $str_info->[1];
$self->{_previous_info} = [ $str_position, $str_length ];
last;
}
}
}
#------------------------------------------------------------------------------
# _SwapForUnicode (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _SwapForUnicode {
my ( $sObj ) = @_;
# for(my $i = 0; $i<length($$sObj); $i+=2){
for ( my $i = 0 ; $i < ( int( length( $$sObj ) / 2 ) * 2 ) ; $i += 2 ) {
my $sIt = substr( $$sObj, $i, 1 );
substr( $$sObj, $i, 1 ) = substr( $$sObj, $i + 1, 1 );
substr( $$sObj, $i + 1, 1 ) = $sIt;
}
}
#------------------------------------------------------------------------------
# _NewCell (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub _NewCell {
my ( $oBook, $iR, $iC, %rhKey ) = @_;
my ( $sWk, $iLen );
return undef unless ( defined $oBook->{_CurSheet} );
my $FmtClass = $oBook->{FmtClass};
$rhKey{Type} =
$FmtClass->ChkType( $rhKey{Numeric}, $rhKey{Format}{FmtIdx} );
my $FmtStr = $oBook->{FormatStr}{ $rhKey{Format}{FmtIdx} };
# Set "Date" type if required for numbers in a MulRK BIFF block.
if ( defined $FmtStr && $rhKey{Type} eq "Numeric" ) {
# Match a range of possible date formats. Note: this isn't important
# except for reporting. The number will still be converted to a date
# by ExcelFmt() even if 'Type' isn't set to 'Date'.
if ( $FmtStr =~ m{^[dmy][-\\/dmy]*$}i ) {
$rhKey{Type} = "Date";
}
}
my $oCell = Spreadsheet::ParseExcel::Cell->new(
Val => $rhKey{Val},
FormatNo => $rhKey{FormatNo},
Format => $rhKey{Format},
Code => $rhKey{Code},
Type => $rhKey{Type},
);
$oCell->{_Kind} = $rhKey{Kind};
$oCell->{_Value} = $FmtClass->ValFmt( $oCell, $oBook );
if ( $rhKey{Rich} ) {
my @aRich = ();
my $sRich = $rhKey{Rich};
for ( my $iWk = 0 ; $iWk < length( $sRich ) ; $iWk += 4 ) {
my ( $iPos, $iFnt ) = unpack( 'v2', substr( $sRich, $iWk ) );
push @aRich, [ $iPos, $oBook->{Font}[$iFnt] ];
}
$oCell->{Rich} = \@aRich;
}
if ( defined $oBook->{CellHandler} ) {
if ( defined $oBook->{Object} ) {
no strict;
ref( $oBook->{CellHandler} ) eq "CODE"
? $oBook->{CellHandler}->(
$_Object, $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell
)
: $oBook->{CellHandler}->callback( $_Object, $oBook, $oBook->{_CurSheet},
$iR, $iC, $oCell );
}
else {
$oBook->{CellHandler}->( $oBook, $oBook->{_CurSheet}, $iR, $iC, $oCell );
}
}
unless ( $oBook->{NotSetCell} ) {
$oBook->{Worksheet}[ $oBook->{_CurSheet} ]->{Cells}[$iR][$iC] = $oCell;
}
return $oCell;
}
#------------------------------------------------------------------------------
# ColorIdxToRGB (for Spreadsheet::ParseExcel)
#
# Returns for most recently opened book for compatibility, use
# Workbook::color_idx_to_rgb instead
#
#------------------------------------------------------------------------------
sub ColorIdxToRGB {
my ( $sPkg, $iIdx ) = @_;
unless( defined $currentbook ) {
return ( ( defined $aColor[$iIdx] ) ? $aColor[$iIdx] : $aColor[0] );
}
return $currentbook->color_idx_to_rgb( $iIdx );
}
###############################################################################
#
# error().
#
# Return an error string for a failed parse().
#
sub error {
my $self = shift;
my $parse_error = $self->{_error_status};
if ( exists $error_strings{$parse_error} ) {
return $error_strings{$parse_error};
}
else {
return 'Unknown parse error';
}
}
###############################################################################
#
# error_code().
#
# Return an error code for a failed parse().
#
sub error_code {
my $self = shift;
return $self->{_error_status};
}
###############################################################################
#
# Mapping between legacy method names and new names.
#
{
no warnings; # Ignore warnings about variables used only once.
*Parse = *parse;
}
1;
__END__
=head1 NAME
Spreadsheet::ParseExcel - Read information from an Excel file.
=head1 SYNOPSIS
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die $parser->error(), ".\n";
}
for my $worksheet ( $workbook->worksheets() ) {
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
for my $row ( $row_min .. $row_max ) {
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col );
next unless $cell;
print "Row, Col = ($row, $col)\n";
print "Value = ", $cell->value(), "\n";
print "Unformatted = ", $cell->unformatted(), "\n";
print "\n";
}
}
}
=head1 DESCRIPTION
The Spreadsheet::ParseExcel module can be used to read information from Excel 95-2003 binary files.
The module cannot read files in the Excel 2007 Open XML XLSX format. See the L<Spreadsheet::XLSX> module instead.
=head1 Parser
=head2 new()
The C<new()> method is used to create a new C<Spreadsheet::ParseExcel> parser object.
my $parser = Spreadsheet::ParseExcel->new();
It is possible to pass a password to decrypt an encrypted file:
$parser = Spreadsheet::ParseExcel->new( Password => 'secret' );
Only the default Excel encryption scheme is currently supported. See L</Decryption>.
As an advanced feature it is also possible to pass a call-back handler to the parser to control the parsing of the spreadsheet.
$parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1,
);
The call-back can be used to ignore certain cells or to reduce memory usage. See the section L<Reducing the memory usage of Spreadsheet::ParseExcel> for more information.
=head2 parse($filename, $formatter)
The Parser C<parse()> method returns a L</Workbook> object.
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
If an error occurs C<parse()> returns C<undef>. In general, programs should contain a test for failed parsing as follows:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die $parser->error(), ".\n";
}
The C<$filename> parameter is generally the file to be parsed. However, it can also be a filehandle or a scalar reference.
The optional C<$formatter> parameter can be an reference to a L</Formatter Class> to format the value of cells. This is useful for parsing workbooks with Unicode or Asian characters:
my $parser = Spreadsheet::ParseExcel->new();
my $formatter = Spreadsheet::ParseExcel::FmtJapan->new();
my $workbook = $parser->parse( 'Book1.xls', $formatter );
The L<Spreadsheet::ParseExcel::FmtJapan> formatter also supports Unicode. If you encounter any encoding problems with the default formatter try that instead.
=head2 error()
The Parser C<error()> method returns an error string if a C<parse()> fails:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die $parser->error(), ".\n";
}
If you wish to generate you own error string you can use the C<error_code()> method instead (see below). The C<error()> and C<error_code()> values are as follows:
error() error_code()
======= ============
'' 0
'File not found' 1
'No Excel data found in file' 2
'File is encrypted' 3
The C<error_code()> method is explained below.
Spreadsheet::ParseExcel will try to decrypt an encrypted Excel file using the default password or a user supplied password passed to C<new()>, see above. If these fail the module will return the C<'File is encrypted'> error. Only the default Excel encryption scheme is currently supported, see L</Decryption>.
=head2 error_code()
The Parser C<error_code()> method returns an error code if a C<parse()> fails:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
if ( !defined $workbook ) {
die "Got error code ", $parser->error_code, ".\n";
}
This can be useful if you wish to employ you own error strings or error handling methods.
=head1 Workbook
A C<Spreadsheet::ParseExcel::Workbook> is created via the C<Spreadsheet::ParseExcel> C<parse()> method:
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
The main methods of the Workbook class are:
$workbook->worksheets()
$workbook->worksheet()
$workbook->worksheet_count()
$workbook->get_filename()
These more commonly used methods of the Workbook class are outlined below. The other, less commonly used, methods are documented in L<Spreadsheet::ParseExcel::Worksheet>.
=head2 worksheets()
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook:
for my $worksheet ( $workbook->worksheets() ) {
...
}
=head2 worksheet()
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
Returns C<undef> if the sheet name or index doesn't exist.
=head2 worksheet_count()
The C<worksheet_count()> method returns the number of Worksheet objects in the Workbook.
my $worksheet_count = $workbook->worksheet_count();
=head2 get_filename()
The C<get_filename()> method returns the name of the Excel file of C<undef> if the data was read from a filehandle rather than a file.
my $filename = $workbook->get_filename();
=head2 Other Workbook Methods
For full documentation of the methods available via a Workbook object see L<Spreadsheet::ParseExcel::Workbook>.
=head1 Worksheet
The C<Spreadsheet::ParseExcel::Worksheet> class encapsulates the properties of an Excel worksheet.
A Worksheet object is obtained via the L</worksheets()> or L</worksheet()> methods.
for my $worksheet ( $workbook->worksheets() ) {
...
}
# Or:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
The most commonly used methods of the Worksheet class are:
$worksheet->get_cell()
$worksheet->row_range()
$worksheet->col_range()
$worksheet->get_name()
The Spreadsheet::ParseExcel::Worksheet class exposes a lot of methods but in general very few are required unless you are writing an advanced filter.
The most commonly used methods are detailed below. The others are documented in L<Spreadsheet::ParseExcel::Worksheet>.
=head2 get_cell($row, $col)
Return the L</Cell> object at row C<$row> and column C<$col> if it is defined. Otherwise returns undef.
my $cell = $worksheet->get_cell($row, $col);
=head2 row_range()
Returns a two-element list C<($min, $max)> containing the minimum and maximum defined rows in the worksheet. If there is no row defined C<$max> is smaller than C<$min>.
my ( $row_min, $row_max ) = $worksheet->row_range();
=head2 col_range()
Returns a two-element list C<($min, $max)> containing the minimum and maximum of defined columns in the worksheet. If there is no column defined C<$max> is smaller than C<$min>.
my ( $col_min, $col_max ) = $worksheet->col_range();
=head2 get_name()
The C<get_name()> method returns the name of the worksheet, such as 'Sheet1'.
my $name = $worksheet->get_name();
=head2 Other Worksheet Methods
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Worksheet>.
=head1 Cell
The C<Spreadsheet::ParseExcel::Cell> class has the following main methods.
$cell->value()
$cell->unformatted()
=head2 value()
The C<value()> method returns the formatted value of the cell.
my $value = $cell->value();
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30.
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below.
For a defined C<$cell> the C<value()> method will always return a value.
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>.
=head2 unformatted()
The C<unformatted()> method returns the unformatted value of the cell.
my $unformatted = $cell->unformatted();
Returns the cell value without a numeric format. See the C<value()> method above.
=head2 Other Cell Methods
For other, less commonly used, Worksheet methods see L<Spreadsheet::ParseExcel::Cell>.
=head1 Format
The C<Spreadsheet::ParseExcel::Format> class has the following properties:
=head2 Format properties
$format->{Font}
$format->{AlignH}
$format->{AlignV}
$format->{Indent}
$format->{Wrap}
$format->{Shrink}
$format->{Rotate}
$format->{JustLast}
$format->{ReadDir}
$format->{BdrStyle}
$format->{BdrColor}
$format->{BdrDiag}
$format->{Fill}
$format->{Lock}
$format->{Hidden}
$format->{Style}
These properties are generally only of interest to advanced users. Casual users can skip this section.
=head2 $format->{Font}
Returns the L</Font> object for the Format.
=head2 $format->{AlignH}
Returns the horizontal alignment of the format where the value has the following meaning:
0 => No alignment
1 => Left
2 => Center
3 => Right
4 => Fill
5 => Justify
6 => Center across
7 => Distributed/Equal spaced
=head2 $format->{AlignV}
Returns the vertical alignment of the format where the value has the following meaning:
0 => Top
1 => Center
2 => Bottom
3 => Justify
4 => Distributed/Equal spaced
=head2 $format->{Indent}
Returns the indent level of the C<Left> horizontal alignment.
=head2 $format->{Wrap}
Returns true if textwrap is on.
=head2 $format->{Shrink}
Returns true if "Shrink to fit" is set for the format.
=head2 $format->{Rotate}
Returns the text rotation. In Excel97+, it returns the angle in degrees of the text rotation.
In Excel95 or earlier it returns a value as follows:
0 => No rotation
1 => Top down
2 => 90 degrees anti-clockwise,
3 => 90 clockwise
=head2 $format->{JustLast}
Return true if the "justify last" property is set for the format.
=head2 $format->{ReadDir}
Returns the direction that the text is read from.
=head2 $format->{BdrStyle}
Returns an array ref of border styles as follows:
[ $left, $right, $top, $bottom ]
=head2 $format->{BdrColor}
Returns an array ref of border color indexes as follows:
[ $left, $right, $top, $bottom ]
=head2 $format->{BdrDiag}
Returns an array ref of diagonal border kind, style and color index as follows:
[$kind, $style, $color ]
Where kind is:
0 => None
1 => Right-Down
2 => Right-Up
3 => Both
=head2 $format->{Fill}
Returns an array ref of fill pattern and color indexes as follows:
[ $pattern, $front_color, $back_color ]
=head2 $format->{Lock}
Returns true if the cell is locked.
=head2 $format->{Hidden}
Returns true if the cell is Hidden.
=head2 $format->{Style}
Returns true if the format is a Style format.
=head1 Font
I<Spreadsheet::ParseExcel::Font>
Format class has these properties:
=head1 Font Properties
$font->{Name}
$font->{Bold}
$font->{Italic}
$font->{Height}
$font->{Underline}
$font->{UnderlineStyle}
$font->{Color}
$font->{Strikeout}
$font->{Super}
=head2 $font->{Name}
Returns the name of the font, for example 'Arial'.
=head2 $font->{Bold}
Returns true if the font is bold.
=head2 $font->{Italic}
Returns true if the font is italic.
=head2 $font->{Height}
Returns the size (height) of the font.
=head2 $font->{Underline}
Returns true if the font in underlined.
=head2 $font->{UnderlineStyle}
Returns the style of an underlined font where the value has the following meaning:
0 => None
1 => Single
2 => Double
33 => Single accounting
34 => Double accounting
=head2 $font->{Color}
Returns the color index for the font. The mapping to an RGB color is defined by each workbook.
The index can be converted to a RGB string using the C<$workbook->ColorIdxToRGB()> Parser method.
(Older versions of C<Spreadsheet::ParseExcel> provided the C<ColorIdxToRGB> class method, which is deprecated.)
=head2 $font->{Strikeout}
Returns true if the font has the strikeout property set.
=head2 $font->{Super}
Returns one of the following values if the superscript or subscript property of the font is set:
0 => None
1 => Superscript
2 => Subscript
=head1 Formatter Class
Formatters can be passed to the C<parse()> method to deal with Unicode or Asian formatting.
Spreadsheet::ParseExcel includes 2 formatter classes. C<FmtDefault> and C<FmtJapanese>. It is also possible to create a user defined formatting class.
The formatter class C<Spreadsheet::ParseExcel::Fmt*> should provide the following functions:
=head2 ChkType($self, $is_numeric, $format_index)
Method to check the type of data in the cell. Should return C<Date>, C<Numeric> or C<Text>. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $is_numeric
If true, the value seems to be number.
=item $format_index
The index number for the cell Format object.
=back
=head2 TextFmt($self, $string_data, $string_encoding)
Converts the string data in the cell into the correct encoding. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $string_data
The original string/text data.
=item $string_encoding
The character encoding of original string/text.
=back
=head2 ValFmt($self, $cell, $workbook)
Convert the original unformatted cell value into the appropriate formatted value. For instance turn a number into a formatted date. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $cell
A scalar reference to the Cell object.
=item $workbook
A scalar reference to the Workbook object.
=back
=head2 FmtString($self, $cell, $workbook)
Get the format string for the Cell. It is passed the following parameters:
=over
=item $self
A scalar reference to the Formatter object.
=item $cell
A scalar reference to the Cell object.
=item $workbook
A scalar reference to the Workbook object.
=back
=head1 Reducing the memory usage of Spreadsheet::ParseExcel
In some cases a C<Spreadsheet::ParseExcel> application may consume a lot of memory when processing a large Excel file and, as a result, may fail to complete. The following explains why this can occur and how to resolve it.
C<Spreadsheet::ParseExcel> processes an Excel file in two stages. In the first stage it extracts the Excel binary stream from the OLE container file using C<OLE::Storage_Lite>. In the second stage it parses the binary stream to read workbook, worksheet and cell data which it then stores in memory. The majority of the memory usage is required for storing cell data.
The reason for this is that as the Excel file is parsed and each cell is encountered a cell handling function creates a relatively large nested cell object that contains the cell value and all of the data that relates to the cell formatting. For large files (a 10MB Excel file on a 256MB system) this overhead can cause the system to grind to a halt.
However, in a lot of cases when an Excel file is being processed the only information that is required are the cell values. In these cases it is possible to avoid most of the memory overhead by specifying your own cell handling function and by telling Spreadsheet::ParseExcel not to store the parsed cell data. This is achieved by passing a cell handler function to C<new()> when creating the parse object. Here is an example.
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
my $workbook = $parser->parse('file.xls');
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Do something useful with the formatted cell value
print $cell->value(), "\n";
}
The user specified cell handler is passed as a code reference to C<new()> along with the parameter C<NotSetCell> which tells Spreadsheet::ParseExcel not to store the parsed cell. Note, you don't have to iterate over the rows and columns, this happens automatically as part of the parsing.
The cell handler is passed 5 arguments. The first, C<$workbook>, is a reference to the C<Spreadsheet::ParseExcel::Workbook> object that represent the parsed workbook. This can be used to access any of the C<Spreadsheet::ParseExcel::Workbook> methods, see L</Workbook>. The second C<$sheet_index> is the zero-based index of the worksheet being parsed. The third and fourth, C<$row> and C<$col>, are the zero-based row and column number of the cell. The fifth, C<$cell>, is a reference to the C<Spreadsheet::ParseExcel::Cell> object. This is used to extract the data from the cell. See L</Cell> for more information.
This technique can be useful if you are writing an Excel to database filter since you can put your DB calls in the cell handler.
If you don't want all of the data in the spreadsheet you can add some control logic to the cell handler. For example we can extend the previous example so that it only prints the first 10 rows of the first two worksheets in the parsed workbook by adding some C<if()> statements to the cell handler:
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
my $workbook = $parser->parse('file.xls');
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Skip some worksheets and rows (inefficiently).
return if $sheet_index >= 3;
return if $row >= 10;
# Do something with the formatted cell value
print $cell->value(), "\n";
}
However, this still processes the entire workbook. If you wish to save some additional processing time you can abort the parsing after you have read the data that you want, using the workbook C<ParseAbort> method:
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
my $workbook = $parser->parse('file.xls');
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Skip some worksheets and rows (more efficiently).
if ( $sheet_index >= 1 and $row >= 10 ) {
$workbook->ParseAbort(1);
return;
}
# Do something with the formatted cell value
print $cell->value(), "\n";
}
=head1 Decryption
If a workbook is "protected" then Excel will encrypt the file whether a password is supplied or not. As of version 0.59 Spreadsheet::ParseExcel supports decrypting Excel workbooks using a default or user supplied password. However, only the following encryption scheme is supported:
Office 97/2000 Compatible encryption
The following encryption methods are not supported:
Weak Encryption (XOR)
RC4, Microsoft Base Cryptographic Provider v1.0
RC4, Microsoft Base DSS and Diffie-Hellman Cryptographic Provider
RC4, Microsoft DH SChannel Cryptographic Provider
RC4, Microsoft Enhanced Cryptographic Provider v1.0
RC4, Microsoft Enhanced DSS and Diffie-Hellman Cryptographic Provider
RC4, Microsoft Enhanced RSA and AES Cryptographic Provider
RC4, Microsoft RSA SChannel Cryptographic Provider
RC4, Microsoft Strong Cryptographic Provider
See the following for more information on Excel encryption: L<http://office.microsoft.com/en-us/office-2003-resource-kit/important-aspects-of-password-and-encryption-protection-HA001140311.aspx>.
=head1 KNOWN PROBLEMS
=over
=item * Issues reported by users: L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel>
=item * This module cannot read the values of formulas from files created with Spreadsheet::WriteExcel unless the user specified the values when creating the file (which is generally not the case). The reason for this is that Spreadsheet::WriteExcel writes the formula but not the formula result since it isn't in a position to calculate arbitrary Excel formulas without access to Excel's formula engine.
=item * If Excel has date fields where the specified format is equal to the system-default for the short-date locale, Excel does not store the format, but defaults to an internal format which is system dependent. In these cases ParseExcel uses the date format 'yyyy-mm-dd'.
=back
=head1 REPORTING A BUG
Bugs can be reported via rt.cpan.org. See the following for instructions on bug reporting for Spreadsheet::ParseExcel
L<http://rt.cpan.org/Public/Dist/Display.html?Name=Spreadsheet-ParseExcel>
=head1 SEE ALSO
=over
=item * xls2csv by Ken Prows L<http://search.cpan.org/~ken/xls2csv-1.06/script/xls2csv>.
=item * xls2csv and xlscat by H.Merijn Brand (these utilities are part of Spreadsheet::Read, see below).
=item * excel2txt by Ken Youens-Clark, L<http://search.cpan.org/~kclark/excel2txt/excel2txt>. This is an excellent example of an Excel filter using Spreadsheet::ParseExcel. It can produce CSV, Tab delimited, Html, XML and Yaml.
=item * XLSperl by Jon Allen L<http://search.cpan.org/~jonallen/XLSperl/bin/XLSperl>. This application allows you to use Perl "one-liners" with Microsoft Excel files.
=item * Spreadsheet::XLSX L<http://search.cpan.org/~dmow/Spreadsheet-XLSX/lib/Spreadsheet/XLSX.pm> by Dmitry Ovsyanko. A module with a similar interface to Spreadsheet::ParseExcel for parsing Excel 2007 XLSX OpenXML files.
=item * Spreadsheet::Read L<http://search.cpan.org/~hmbrand/Spreadsheet-Read/Read.pm> by H.Merijn Brand. A single interface for reading several different spreadsheet formats.
=item * Spreadsheet::WriteExcel L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm>. A perl module for creating new Excel files.
=item * Spreadsheet::ParseExcel::SaveParser L<http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/lib/Spreadsheet/ParseExcel/SaveParser.pm>. This is a combination of Spreadsheet::ParseExcel and Spreadsheet::WriteExcel and it allows you to "rewrite" an Excel file. See the following example L<http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm#MODIFYING_AND_REWRITING_EXCEL_FILES>. It is part of the Spreadsheet::ParseExcel distro.
=item * Text::CSV_XS L<http://search.cpan.org/~hmbrand/Text-CSV_XS/CSV_XS.pm> by H.Merijn Brand. A fast and rigorous module for reading and writing CSV data. Don't consider rolling your own CSV handling, use this module instead.
=back
=head1 MAILING LIST
There is a Google group for discussing and asking questions about Spreadsheet::ParseExcel. This is a good place to search to see if your question has been asked before: L<http://groups-beta.google.com/group/spreadsheet-parseexcel/>
=head1 DONATIONS
If you'd care to donate to the Spreadsheet::ParseExcel project, you can do so via PayPal: L<http://tinyurl.com/7ayes>
=head1 TODO
=over
=item * The current maintenance work is directed towards making the documentation more useful, improving and simplifying the API, and improving the maintainability of the code base. After that new features will be added.
=item * Fix open bugs and documentation for SaveParser.
=item * Add Formula support, Hyperlink support, Named Range support.
=item * Improve Spreadsheet::ParseExcel::SaveParser compatibility with Spreadsheet::WriteExcel.
=item * Improve Unicode and other encoding support. This will probably require dropping support for perls prior to 5.8+.
=back
=head1 ACKNOWLEDGEMENTS
From Kawai Takanori:
First of all, I would like to acknowledge the following valuable programs and modules:
XHTML, OLE::Storage and Spreadsheet::WriteExcel.
In no particular order: Yamaji Haruna, Simamoto Takesi, Noguchi Harumi, Ikezawa Kazuhiro, Suwazono Shugo, Hirofumi Morisada, Michael Edwards, Kim Namusk, Slaven Rezic, Grant Stevens, H.Merijn Brand and many many people + Kawai Mikako.
Alexey Mazurin added the decryption facility.
=head1 DISCLAIMER OF WARRANTY
Because this software is licensed free of charge, there is no warranty for the software, to the extent permitted by applicable law. Except when otherwise stated in writing the copyright holders and/or other parties provide the software "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the software is with you. Should the software prove defective, you assume the cost of all necessary servicing, repair, or correction.
In no event unless required by applicable law or agreed to in writing will any copyright holder, or any other party who may modify and/or redistribute the software as permitted by the above licence, be liable to you for damages, including any general, special, incidental, or consequential damages arising out of the use or inability to use the software (including but not limited to loss of data or data being rendered inaccurate or losses sustained by you or third parties or a failure of the software to operate with any other software), even if such holder or other party has been advised of the possibility of such damages.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori (Hippo2000) kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved. This is free software. You may distribute under the terms of either the GNU General Public License or the Artistic License.
=cut
SPREADSHEET_XLSX_PARSEEXCEL
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Cell.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_CELL';
package Spreadsheet::ParseExcel::Cell;
###############################################################################
#
# Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my ( $package, %properties ) = @_;
my $self = \%properties;
bless $self, $package;
}
###############################################################################
#
# value()
#
# Returns the formatted value of the cell.
#
sub value {
my $self = shift;
return $self->{_Value};
}
###############################################################################
#
# unformatted()
#
# Returns the unformatted value of the cell.
#
sub unformatted {
my $self = shift;
return $self->{Val};
}
###############################################################################
#
# get_format()
#
# Returns the Format object for the cell.
#
sub get_format {
my $self = shift;
return $self->{Format};
}
###############################################################################
#
# type()
#
# Returns the type of cell such as Text, Numeric or Date.
#
sub type {
my $self = shift;
return $self->{Type};
}
###############################################################################
#
# encoding()
#
# Returns the character encoding of the cell.
#
sub encoding {
my $self = shift;
if ( !defined $self->{Code} ) {
return 1;
}
elsif ( $self->{Code} eq 'ucs2' ) {
return 2;
}
elsif ( $self->{Code} eq '_native_' ) {
return 3;
}
else {
return 0;
}
return $self->{Code};
}
###############################################################################
#
# is_merged()
#
# Returns true if the cell is merged.
#
sub is_merged {
my $self = shift;
return $self->{Merged};
}
###############################################################################
#
# get_rich_text()
#
# Returns an array ref of font information about each string block in a "rich",
# i.e. multi-format, string.
#
sub get_rich_text {
my $self = shift;
return $self->{Rich};
}
###############################################################################
#
# get_hyperlink {
#
# Returns an array ref of hyperlink information if the cell contains a hyperlink.
# Returns undef otherwise
#
# [0] : Description of link (You may want $cell->value, as it will have rich text)
# [1] : URL - the link expressed as a URL. N.B. relative URLs will be defaulted to
# the directory of the input file, if the input file name is known. Otherwise
# %REL% will be inserted as a place-holder. Depending on your application,
# you should either remove %REL% or replace it with the appropriate path.
# [2] : Target frame (or undef if none)
sub get_hyperlink {
my $self = shift;
return $self->{Hyperlink} if exists $self->{Hyperlink};
return undef;
}
#
###############################################################################
#
# Mapping between legacy method names and new names.
#
{
no warnings; # Ignore warnings about variables used only once.
*Value = \&value;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Cell - A class for Cell data and formatting.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 Methods
The following Cell methods are available:
$cell->value()
$cell->unformatted()
$cell->get_format()
$cell->type()
$cell->encoding()
$cell->is_merged()
$cell->get_rich_text()
$cell->get_hyperlink()
=head2 value()
The C<value()> method returns the formatted value of the cell.
my $value = $cell->value();
Formatted in this sense refers to the numeric format of the cell value. For example a number such as 40177 might be formatted as 40,117, 40117.000 or even as the date 2009/12/30.
If the cell doesn't contain a numeric format then the formatted and unformatted cell values are the same, see the C<unformatted()> method below.
For a defined C<$cell> the C<value()> method will always return a value.
In the case of a cell with formatting but no numeric or string contents the method will return the empty string C<''>.
=head2 unformatted()
The C<unformatted()> method returns the unformatted value of the cell.
my $unformatted = $cell->unformatted();
Returns the cell value without a numeric format. See the C<value()> method above.
=head2 get_format()
The C<get_format()> method returns the L<Spreadsheet::ParseExcel::Format> object for the cell.
my $format = $cell->get_format();
If a user defined format hasn't been applied to the cell then the default cell format is returned.
=head2 type()
The C<type()> method returns the type of cell such as Text, Numeric or Date. If the type was detected as Numeric, and the Cell Format matches C<m{^[dmy][-\\/dmy]*$}i>, it will be treated as a Date type.
my $type = $cell->type();
See also L<Dates and Time in Excel>.
=head2 encoding()
The C<encoding()> method returns the character encoding of the cell.
my $encoding = $cell->encoding();
This method is only of interest to developers. In general Spreadsheet::ParseExcel will return all character strings in UTF-8 regardless of the encoding used by Excel.
The C<encoding()> method returns one of the following values:
=over
=item * 0: Unknown format. This shouldn't happen. In the default case the format should be 1.
=item * 1: 8bit ASCII or single byte UTF-16. This indicates that the characters are encoded in a single byte. In Excel 95 and earlier This usually meant ASCII or an international variant. In Excel 97 it refers to a compressed UTF-16 character string where all of the high order bytes are 0 and are omitted to save space.
=item * 2: UTF-16BE.
=item * 3: Native encoding. In Excel 95 and earlier this encoding was used to represent multi-byte character encodings such as SJIS.
=back
=head2 is_merged()
The C<is_merged()> method returns true if the cell is merged.
my $is_merged = $cell->is_merged();
Returns C<undef> if the property isn't set.
=head2 get_rich_text()
The C<get_rich_text()> method returns an array ref of font information about each string block in a "rich", i.e. multi-format, string.
my $rich_text = $cell->get_rich_text();
The return value is an arrayref of arrayrefs in the form:
[
[ $start_position, $font_object ],
...,
]
Returns undef if the property isn't set.
=head2 get_hyperlink()
If a cell contains a hyperlink, the C<get_hyperlink()> method returns an array ref of information about it.
A cell can contain at most one hyperlink. If it does, it contains no other value.
Otherwise, it returns undef;
The array contains:
=over
=item * 0: Description (what's displayed); undef if not present
=item * 1: Link, converted to an appropriate URL - Note: Relative links are based on the input file. %REL% is used if the input file is unknown (e.g. a file handle or scalar)
=item * 2: Target - target frame (or undef if none)
=back
=head1 Dates and Time in Excel
Dates and times in Excel are represented by real numbers, for example "Jan 1 2001 12:30 PM" is represented by the number 36892.521.
The integer part of the number stores the number of days since the epoch and the fractional part stores the percentage of the day.
A date or time in Excel is just like any other number. The way in which it is displayed is controlled by the number format:
Number format $cell->value() $cell->unformatted()
============= ============== ==============
'dd/mm/yy' '28/02/08' 39506.5
'mm/dd/yy' '02/28/08' 39506.5
'd-m-yyyy' '28-2-2008' 39506.5
'dd/mm/yy hh:mm' '28/02/08 12:00' 39506.5
'd mmm yyyy' '28 Feb 2008' 39506.5
'mmm d yyyy hh:mm AM/PM' 'Feb 28 2008 12:00 PM' 39506.5
The L<Spreadsheet::ParseExcel::Utility> module contains a function called C<ExcelLocaltime> which will convert between an unformatted Excel date/time number and a C<localtime()> like array.
For date conversions using the CPAN C<DateTime> framework see L<DateTime::Format::Excel> http://search.cpan.org/search?dist=DateTime-Format-Excel
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_CELL
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Dump.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_DUMP';
package Spreadsheet::ParseExcel::Dump;
###############################################################################
#
# Spreadsheet::ParseExcel::Dump - A class for dumping Excel records.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
my %NameTbl = (
#P291
0x0A => 'EOF',
0x0C => 'CALCCOUNT',
0x0D => 'CALCMODE',
0x0E => 'PRECISION',
0x0F => 'REFMODE',
0x10 => 'DELTA',
0x11 => 'ITERATION',
0x12 => 'PROTECT',
0x13 => 'PASSWORD',
0x14 => 'HEADER',
0x15 => 'FOOTER',
0x16 => 'EXTERNCOUNT',
0x17 => 'EXTERNSHEET',
0x19 => 'WINDOWPROTECT',
0x1A => 'VERTICALPAGEBREAKS',
0x1B => 'HORIZONTALPAGEBREAKS',
0x1C => 'NOTE',
0x1D => 'SELECTION',
0x22 => '1904',
0x26 => 'LEFTMARGIN',
0x27 => 'RIGHTMARGIN',
0x28 => 'TOPMARGIN',
0x29 => 'BOTTOMMARGIN',
0x2A => 'PRINTHEADERS',
0x2B => 'PRINTGRIDLINES',
0x2F => 'FILEPASS',
0x3C => 'COUNTINUE',
0x3D => 'WINDOW1',
0x40 => 'BACKUP',
0x41 => 'PANE',
0x42 => 'CODEPAGE',
0x4D => 'PLS',
0x50 => 'DCON',
0x51 => 'DCONREF',
#P292
0x52 => 'DCONNAME',
0x55 => 'DEFCOLWIDTH',
0x59 => 'XCT',
0x5A => 'CRN',
0x5B => 'FILESHARING',
0x5C => 'WRITEACCES',
0x5D => 'OBJ',
0x5E => 'UNCALCED',
0x5F => 'SAVERECALC',
0x60 => 'TEMPLATE',
0x63 => 'OBJPROTECT',
0x7D => 'COLINFO',
0x7E => 'RK',
0x7F => 'IMDATA',
0x80 => 'GUTS',
0x81 => 'WSBOOL',
0x82 => 'GRIDSET',
0x83 => 'HCENTER',
0x84 => 'VCENTER',
0x85 => 'BOUNDSHEET',
0x86 => 'WRITEPROT',
0x87 => 'ADDIN',
0x88 => 'EDG',
0x89 => 'PUB',
0x8C => 'COUNTRY',
0x8D => 'HIDEOBJ',
0x90 => 'SORT',
0x91 => 'SUB',
0x92 => 'PALETTE',
0x94 => 'LHRECORD',
0x95 => 'LHNGRAPH',
0x96 => 'SOUND',
0x98 => 'LPR',
0x99 => 'STANDARDWIDTH',
0x9A => 'FNGROUPNAME',
0x9B => 'FILTERMODE',
0x9C => 'FNGROUPCOUNT',
#P293
0x9D => 'AUTOFILTERINFO',
0x9E => 'AUTOFILTER',
0xA0 => 'SCL',
0xA1 => 'SETUP',
0xA9 => 'COORDLIST',
0xAB => 'GCW',
0xAE => 'SCENMAN',
0xAF => 'SCENARIO',
0xB0 => 'SXVIEW',
0xB1 => 'SXVD',
0xB2 => 'SXV',
0xB4 => 'SXIVD',
0xB5 => 'SXLI',
0xB6 => 'SXPI',
0xB8 => 'DOCROUTE',
0xB9 => 'RECIPNAME',
0xBC => 'SHRFMLA',
0xBD => 'MULRK',
0xBE => 'MULBLANK',
0xBF => 'TOOLBARHDR',
0xC0 => 'TOOLBAREND',
0xC1 => 'MMS',
0xC2 => 'ADDMENU',
0xC3 => 'DELMENU',
0xC5 => 'SXDI',
0xC6 => 'SXDB',
0xCD => 'SXSTRING',
0xD0 => 'SXTBL',
0xD1 => 'SXTBRGIITM',
0xD2 => 'SXTBPG',
0xD3 => 'OBPROJ',
0xD5 => 'SXISDTM',
0xD6 => 'RSTRING',
0xD7 => 'DBCELL',
0xDA => 'BOOKBOOL',
0xDC => 'PARAMQRY',
0xDC => 'SXEXT',
0xDD => 'SCENPROTECT',
0xDE => 'OLESIZE',
#P294
0xDF => 'UDDESC',
0xE0 => 'XF',
0xE1 => 'INTERFACEHDR',
0xE2 => 'INTERFACEEND',
0xE3 => 'SXVS',
0xEA => 'TABIDCONF',
0xEB => 'MSODRAWINGGROUP',
0xEC => 'MSODRAWING',
0xED => 'MSODRAWINGSELECTION',
0xEF => 'PHONETICINFO',
0xF0 => 'SXRULE',
0xF1 => 'SXEXT',
0xF2 => 'SXFILT',
0xF6 => 'SXNAME',
0xF7 => 'SXSELECT',
0xF8 => 'SXPAIR',
0xF9 => 'SXFMLA',
0xFB => 'SXFORMAT',
0xFC => 'SST',
0xFD => 'LABELSST',
0xFF => 'EXTSST',
0x100 => 'SXVDEX',
0x103 => 'SXFORMULA',
0x122 => 'SXDBEX',
0x13D => 'TABID',
0x160 => 'USESELFS',
0x161 => 'DSF',
0x162 => 'XL5MODIFY',
0x1A5 => 'FILESHARING2',
0x1A9 => 'USERBVIEW',
0x1AA => 'USERVIEWBEGIN',
0x1AB => 'USERSVIEWEND',
0x1AD => 'QSI',
0x1AE => 'SUPBOOK',
0x1AF => 'PROT4REV',
0x1B0 => 'CONDFMT',
0x1B1 => 'CF',
0x1B2 => 'DVAL',
#P295
0x1B5 => 'DCONBIN',
0x1B6 => 'TXO',
0x1B7 => 'REFRESHALL',
0x1B8 => 'HLINK',
0x1BA => 'CODENAME',
0x1BB => 'SXFDBTYPE',
0x1BC => 'PROT4REVPASS',
0x1BE => 'DV',
0x200 => 'DIMENSIONS',
0x201 => 'BLANK',
0x202 => 'Integer', #Not Documented
0x203 => 'NUMBER',
0x204 => 'LABEL',
0x205 => 'BOOLERR',
0x207 => 'STRING',
0x208 => 'ROW',
0x20B => 'INDEX',
0x218 => 'NAME',
0x221 => 'ARRAY',
0x223 => 'EXTERNNAME',
0x225 => 'DEFAULTROWHEIGHT',
0x231 => 'FONT',
0x236 => 'TABLE',
0x23E => 'WINDOW2',
0x293 => 'STYLE',
0x406 => 'FORMULA',
0x41E => 'FORMAT',
0x18 => 'NAME',
0x06 => 'FORMULA',
0x09 => 'BOF(BIFF2)',
0x209 => 'BOF(BIFF3)',
0x409 => 'BOF(BIFF4)',
0x809 => 'BOF(BIFF5-7)',
0x31 => 'FONT', 0x27E => 'RK',
#Chart/Graph
0x1001 => 'UNITS',
0x1002 => 'CHART',
0x1003 => 'SERISES',
0x1006 => 'DATAFORMAT',
0x1007 => 'LINEFORMAT',
0x1009 => 'MAKERFORMAT',
0x100A => 'AREAFORMAT',
0x100B => 'PIEFORMAT',
0x100C => 'ATTACHEDLABEL',
0x100D => 'SERIESTEXT',
0x1014 => 'CHARTFORMAT',
0x1015 => 'LEGEND',
0x1016 => 'SERIESLIST',
0x1017 => 'BAR',
0x1018 => 'LINE',
0x1019 => 'PIE',
0x101A => 'AREA',
0x101B => 'SCATTER',
0x101C => 'CHARTLINE',
0x101D => 'AXIS',
0x101E => 'TICK',
0x101F => 'VALUERANGE',
0x1020 => 'CATSERRANGE',
0x1021 => 'AXISLINEFORMAT',
0x1022 => 'CHARTFORMATLINK',
0x1024 => 'DEFAULTTEXT',
0x1025 => 'TEXT',
0x1026 => 'FONTX',
0x1027 => 'OBJECTLINK',
0x1032 => 'FRAME',
0x1033 => 'BEGIN',
0x1034 => 'END',
0x1035 => 'PLOTAREA',
0x103A => '3D',
0x103C => 'PICF',
0x103D => 'DROPBAR',
0x103E => 'RADAR',
0x103F => 'SURFACE',
0x1040 => 'RADARAREA',
0x1041 => 'AXISPARENT',
0x1043 => 'LEGENDXN',
0x1044 => 'SHTPROPS',
0x1045 => 'SERTOCRT',
0x1046 => 'AXESUSED',
0x1048 => 'SBASEREF',
0x104A => 'SERPARENT',
0x104B => 'SERAUXTREND',
0x104E => 'IFMT',
0x104F => 'POS',
0x1050 => 'ALRUNS',
0x1051 => 'AI',
0x105B => 'SERAUXERRBAR',
0x105D => 'SERFMT',
0x1060 => 'FBI',
0x1061 => 'BOPPOP',
0x1062 => 'AXCEXT',
0x1063 => 'DAT',
0x1064 => 'PLOTGROWTH',
0x1065 => 'SINDEX',
0x1066 => 'GELFRAME',
0x1067 => 'BPOPPOPCUSTOM',
);
#------------------------------------------------------------------------------
# subDUMP (for Spreadsheet::ParseExcel)
#------------------------------------------------------------------------------
sub subDUMP {
my ( $oBook, $bOp, $bLen, $sWk ) = @_;
printf "%04X:%-23s (Len:%3d) : %s\n",
$bOp, OpName($bOp), $bLen, unpack( "H40", $sWk );
}
#------------------------------------------------------------------------------
# Spreadsheet::ParseExcel->OpName
#------------------------------------------------------------------------------
sub OpName {
my ($bOp) = @_;
return ( defined $NameTbl{$bOp} ) ? $NameTbl{$bOp} : 'undef';
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Dump - A class for dumping Excel records.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_DUMP
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtDefault.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTDEFAULT';
package Spreadsheet::ParseExcel::FmtDefault;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Spreadsheet::ParseExcel::Utility qw(ExcelFmt);
our $VERSION = '0.65';
my %hFmtDefault = (
0x00 => 'General',
0x01 => '0',
0x02 => '0.00',
0x03 => '#,##0',
0x04 => '#,##0.00',
0x05 => '($#,##0_);($#,##0)',
0x06 => '($#,##0_);[Red]($#,##0)',
0x07 => '($#,##0.00_);($#,##0.00_)',
0x08 => '($#,##0.00_);[Red]($#,##0.00_)',
0x09 => '0%',
0x0A => '0.00%',
0x0B => '0.00E+00',
0x0C => '# ?/?',
0x0D => '# ??/??',
0x0E => 'yyyy-mm-dd', # Was 'm-d-yy', which is bad as system default
0x0F => 'd-mmm-yy',
0x10 => 'd-mmm',
0x11 => 'mmm-yy',
0x12 => 'h:mm AM/PM',
0x13 => 'h:mm:ss AM/PM',
0x14 => 'h:mm',
0x15 => 'h:mm:ss',
0x16 => 'm-d-yy h:mm',
#0x17-0x24 -- Differs in Natinal
0x25 => '(#,##0_);(#,##0)',
0x26 => '(#,##0_);[Red](#,##0)',
0x27 => '(#,##0.00);(#,##0.00)',
0x28 => '(#,##0.00);[Red](#,##0.00)',
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)',
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)',
0x2D => 'mm:ss',
0x2E => '[h]:mm:ss',
0x2F => 'mm:ss.0',
0x30 => '##0.0E+0',
0x31 => '@',
);
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub new {
my ( $sPkg, %hKey ) = @_;
my $oThis = {};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $oThis, $sTxt, $sCode ) = @_;
return $sTxt if ( ( !defined($sCode) ) || ( $sCode eq '_native_' ) );
return pack( 'U*', unpack( 'n*', $sTxt ) );
}
#------------------------------------------------------------------------------
# FmtStringDef (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub FmtStringDef {
my ( $oThis, $iFmtIdx, $oBook, $rhFmt ) = @_;
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx};
if ( !( defined($sFmtStr) ) && defined($rhFmt) ) {
$sFmtStr = $rhFmt->{$iFmtIdx};
}
$sFmtStr = $hFmtDefault{$iFmtIdx} unless ($sFmtStr);
return $sFmtStr;
}
#------------------------------------------------------------------------------
# FmtString (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub FmtString {
my ( $oThis, $oCell, $oBook ) = @_;
my $sFmtStr =
$oThis->FmtStringDef( $oBook->{Format}[ $oCell->{FormatNo} ]->{FmtIdx},
$oBook );
# Special case for cells that use Lotus123 style leading
# apostrophe to designate text formatting.
if ( $oBook->{Format}[ $oCell->{FormatNo} ]->{Key123} ) {
$sFmtStr = '@';
}
unless ( defined($sFmtStr) ) {
if ( $oCell->{Type} eq 'Numeric' ) {
if ( int( $oCell->{Val} ) != $oCell->{Val} ) {
$sFmtStr = '0.00';
}
else {
$sFmtStr = '0';
}
}
elsif ( $oCell->{Type} eq 'Date' ) {
if ( int( $oCell->{Val} ) <= 0 ) {
$sFmtStr = 'h:mm:ss';
}
else {
$sFmtStr = 'yyyy-mm-dd';
}
}
else {
$sFmtStr = '@';
}
}
return $sFmtStr;
}
#------------------------------------------------------------------------------
# ValFmt (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub ValFmt {
my ( $oThis, $oCell, $oBook ) = @_;
my ( $Dt, $iFmtIdx, $iNumeric, $Flg1904 );
if ( $oCell->{Type} eq 'Text' ) {
$Dt =
( ( defined $oCell->{Val} ) && ( $oCell->{Val} ne '' ) )
? $oThis->TextFmt( $oCell->{Val}, $oCell->{Code} )
: '';
return $Dt;
}
else {
$Dt = $oCell->{Val};
$Flg1904 = $oBook->{Flg1904};
my $sFmtStr = $oThis->FmtString( $oCell, $oBook );
return ExcelFmt( $sFmtStr, $Dt, $Flg1904, $oCell->{Type} );
}
}
#------------------------------------------------------------------------------
# ChkType (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub ChkType {
my ( $oPkg, $iNumeric, $iFmtIdx ) = @_;
if ($iNumeric) {
if ( ( ( $iFmtIdx >= 0x0E ) && ( $iFmtIdx <= 0x16 ) )
|| ( ( $iFmtIdx >= 0x2D ) && ( $iFmtIdx <= 0x2F ) ) )
{
return "Date";
}
else {
return "Numeric";
}
}
else {
return "Text";
}
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtDefault - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_FMTDEFAULT
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtJapan.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN';
package Spreadsheet::ParseExcel::FmtJapan;
use utf8;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Encode qw(find_encoding decode);
use base 'Spreadsheet::ParseExcel::FmtDefault';
our $VERSION = '0.65';
my %FormatTable = (
0x00 => 'General',
0x01 => '0',
0x02 => '0.00',
0x03 => '#,##0',
0x04 => '#,##0.00',
0x05 => '(\\#,##0_);(\\#,##0)',
0x06 => '(\\#,##0_);[Red](\\#,##0)',
0x07 => '(\\#,##0.00_);(\\#,##0.00_)',
0x08 => '(\\#,##0.00_);[Red](\\#,##0.00_)',
0x09 => '0%',
0x0A => '0.00%',
0x0B => '0.00E+00',
0x0C => '# ?/?',
0x0D => '# ??/??',
# 0x0E => 'm/d/yy',
0x0E => 'yyyy/m/d',
0x0F => 'd-mmm-yy',
0x10 => 'd-mmm',
0x11 => 'mmm-yy',
0x12 => 'h:mm AM/PM',
0x13 => 'h:mm:ss AM/PM',
0x14 => 'h:mm',
0x15 => 'h:mm:ss',
# 0x16 => 'm/d/yy h:mm',
0x16 => 'yyyy/m/d h:mm',
#0x17-0x24 -- Differs in Natinal
0x1E => 'm/d/yy',
0x1F => 'yyyy"年"m"月"d"日"',
0x20 => 'h"時"mm"分"',
0x21 => 'h"時"mm"分"ss"秒"',
#0x17-0x24 -- Differs in Natinal
0x25 => '(#,##0_);(#,##0)',
0x26 => '(#,##0_);[Red](#,##0)',
0x27 => '(#,##0.00);(#,##0.00)',
0x28 => '(#,##0.00);[Red](#,##0.00)',
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
0x2A => '_(\\*#,##0_);_(\\*(#,##0);_(*"-"_);_(@_)',
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
0x2C => '_(\\*#,##0.00_);_(\\*(#,##0.00);_(*"-"??_);_(@_)',
0x2D => 'mm:ss',
0x2E => '[h]:mm:ss',
0x2F => 'mm:ss.0',
0x30 => '##0.0E+0',
0x31 => '@',
0x37 => 'yyyy"年"m"月"',
0x38 => 'm"月"d"日"',
0x39 => 'ge.m.d',
0x3A => 'ggge"年"m"月"d"日"',
);
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
sub new {
my ( $class, %args ) = @_;
my $encoding = $args{Code} || $args{encoding};
my $self = { Code => $encoding };
if($encoding){
$self->{encoding} = find_encoding($encoding eq 'sjis' ? 'cp932' : $encoding)
or do{
require Carp;
Carp::croak(qq{Unknown encoding '$encoding'});
};
}
return bless $self, $class;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $self, $text, $input_encoding ) = @_;
if(!defined $input_encoding){
$input_encoding = 'utf8';
}
elsif($input_encoding eq '_native_'){
$input_encoding = 'cp932'; # Shift_JIS in Microsoft products
}
$text = decode($input_encoding, $text);
return $self->{Code} ? $self->{encoding}->encode($text) : $text;
}
#------------------------------------------------------------------------------
# FmtStringDef (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
sub FmtStringDef {
my ( $self, $format_index, $book ) = @_;
return $self->SUPER::FmtStringDef( $format_index, $book, \%FormatTable );
}
#------------------------------------------------------------------------------
# CnvNengo (for Spreadsheet::ParseExcel::FmtJapan)
#------------------------------------------------------------------------------
# Convert A.D. into Japanese Nengo (aka Gengo)
my @Nengo = (
{
name => '平成', # Heisei
abbr_name => 'H',
base => 1988,
start => 19890108,
},
{
name => '昭和', # Showa
abbr_name => 'S',
base => 1925,
start => 19261225,
},
{
name => '大正', # Taisho
abbr_name => 'T',
base => 1911,
start => 19120730,
},
{
name => '明治', # Meiji
abbr_name => 'M',
base => 1867,
start => 18680908,
},
);
# Usage: CnvNengo(name => @tm) or CnvNeng(abbr_name => @tm)
sub CnvNengo {
my ( $kind, @tm ) = @_;
my $year = $tm[5] + 1900;
my $wk = ($year * 10000) + ($tm[4] * 100) + ($tm[3] * 1);
#my $wk = sprintf( '%04d%02d%02d', $year, $tm[4], $tm[3] );
foreach my $nengo(@Nengo){
if( $wk >= $nengo->{start} ){
return $nengo->{$kind} . ($year - $nengo->{base});
}
}
return $year;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtJapan - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtJapan2.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN2';
package Spreadsheet::ParseExcel::FmtJapan2;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Jcode;
use Unicode::Map;
use base 'Spreadsheet::ParseExcel::FmtJapan';
our $VERSION = '0.65';
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtJapan2)
#------------------------------------------------------------------------------
sub new {
my ( $sPkg, %hKey ) = @_;
my $oMap = Unicode::Map->new('CP932Excel');
die "NO MAP FILE CP932Excel!!"
unless ( -r Unicode::Map->mapping("CP932Excel") );
my $oThis = {
Code => $hKey{Code},
_UniMap => $oMap,
};
bless $oThis;
$oThis->SUPER::new(%hKey);
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtJapan2)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $oThis, $sTxt, $sCode ) = @_;
# $sCode = 'sjis' if((! defined($sCode)) || ($sCode eq '_native_'));
if ( $oThis->{Code} ) {
if ( !defined($sCode) ) {
$sTxt =~ s/(.)/\x00$1/sg;
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
}
elsif ( $sCode eq 'ucs2' ) {
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
}
return Jcode::convert( $sTxt, $oThis->{Code}, 'sjis' );
}
else {
return $sTxt;
}
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtJapan2 - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_FMTJAPAN2
$fatpacked{"Spreadsheet/XLSX/ParseExcel/FmtUnicode.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FMTUNICODE';
package Spreadsheet::ParseExcel::FmtUnicode;
###############################################################################
#
# Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Unicode::Map;
use base 'Spreadsheet::ParseExcel::FmtDefault';
our $VERSION = '0.65';
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtUnicode)
#------------------------------------------------------------------------------
sub new {
my ( $sPkg, %hKey ) = @_;
my $sMap = $hKey{Unicode_Map};
my $oMap;
$oMap = Unicode::Map->new($sMap) if $sMap;
my $oThis = {
Unicode_Map => $sMap,
_UniMap => $oMap,
};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtUnicode)
#------------------------------------------------------------------------------
sub TextFmt {
my ( $oThis, $sTxt, $sCode ) = @_;
if ( $oThis->{_UniMap} ) {
if ( !defined($sCode) ) {
my $sSv = $sTxt;
$sTxt =~ s/(.)/\x00$1/sg;
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
$sTxt = $sSv unless ($sTxt);
}
elsif ( $sCode eq 'ucs2' ) {
$sTxt = $oThis->{_UniMap}->from_unicode($sTxt);
}
# $sTxt = $oThis->{_UniMap}->from_unicode($sTxt)
# if(defined($sCode) && $sCode eq 'ucs2');
return $sTxt;
}
else {
return $sTxt;
}
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::FmtUnicode - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_FMTUNICODE
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Font.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FONT';
package Spreadsheet::ParseExcel::Font;
###############################################################################
#
# Spreadsheet::ParseExcel::Font - A class for Cell fonts.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
sub new {
my ( $class, %rhIni ) = @_;
my $self = \%rhIni;
bless $self, $class;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Font - A class for Cell fonts.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_FONT
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Format.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_FORMAT';
package Spreadsheet::ParseExcel::Format;
###############################################################################
#
# Spreadsheet::ParseExcel::Format - A class for Cell formats.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
our $VERSION = '0.65';
sub new {
my ( $class, %rhIni ) = @_;
my $self = \%rhIni;
bless $self, $class;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::Format - A class for Cell formats.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_FORMAT
$fatpacked{"Spreadsheet/XLSX/ParseExcel/SaveParser.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER';
package Spreadsheet::ParseExcel::SaveParser;
###############################################################################
#
# Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseExcel::SaveParser::Workbook;
use Spreadsheet::ParseExcel::SaveParser::Worksheet;
use Spreadsheet::WriteExcel;
use base 'Spreadsheet::ParseExcel';
our $VERSION = '0.65';
###############################################################################
#
# new()
#
sub new {
my ( $package, %params ) = @_;
$package->SUPER::new(%params);
}
###############################################################################
#
# Create()
#
sub Create {
my ( $self, $formatter ) = @_;
#0. New $workbook
my $workbook = Spreadsheet::ParseExcel::Workbook->new();
$workbook->{SheetCount} = 0;
# User specified formatter class.
if ($formatter) {
$workbook->{FmtClass} = $formatter;
}
else {
$workbook->{FmtClass} = Spreadsheet::ParseExcel::FmtDefault->new();
}
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook);
}
###############################################################################
#
# Parse()
#
sub Parse {
my ( $self, $sFile, $formatter ) = @_;
my $workbook = $self->SUPER::Parse( $sFile, $formatter );
return undef unless defined $workbook;
return Spreadsheet::ParseExcel::SaveParser::Workbook->new($workbook);
}
###############################################################################
#
# SaveAs()
#
sub SaveAs {
my ( $self, $workbook, $filename ) = @_;
$workbook->SaveAs($filename);
}
1;
__END__
=head1 NAME
Spreadsheet::ParseExcel::SaveParser - Rewrite an existing Excel file.
=head1 SYNOPSIS
Say we start with an Excel file that looks like this:
-----------------------------------------------------
| | A | B | C |
-----------------------------------------------------
| 1 | Hello | ... | ... | ...
| 2 | World | ... | ... | ...
| 3 | *Bold text* | ... | ... | ...
| 4 | ... | ... | ... | ...
| 5 | ... | ... | ... | ...
Then we process it with the following program:
#!/usr/bin/perl
use strict;
use warnings;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseExcel::SaveParser;
# Open an existing file with SaveParser
my $parser = Spreadsheet::ParseExcel::SaveParser->new();
my $template = $parser->Parse('template.xls');
# Get the first worksheet.
my $worksheet = $template->worksheet(0);
my $row = 0;
my $col = 0;
# Overwrite the string in cell A1
$worksheet->AddCell( $row, $col, 'New string' );
# Add a new string in cell B1
$worksheet->AddCell( $row, $col + 1, 'Newer' );
# Add a new string in cell C1 with the format from cell A3.
my $cell = $worksheet->get_cell( $row + 2, $col );
my $format_number = $cell->{FormatNo};
$worksheet->AddCell( $row, $col + 2, 'Newest', $format_number );
# Write over the existing file or write a new file.
$template->SaveAs('newfile.xls');
We should now have an Excel file that looks like this:
-----------------------------------------------------
| | A | B | C |
-----------------------------------------------------
| 1 | New string | Newer | *Newest* | ...
| 2 | World | ... | ... | ...
| 3 | *Bold text* | ... | ... | ...
| 4 | ... | ... | ... | ...
| 5 | ... | ... | ... | ...
=head1 DESCRIPTION
The C<Spreadsheet::ParseExcel::SaveParser> module rewrite an existing Excel file by reading it with C<Spreadsheet::ParseExcel> and rewriting it with C<Spreadsheet::WriteExcel>.
=head1 METHODS
=head1 Parser
=head2 new()
$parse = new Spreadsheet::ParseExcel::SaveParser();
Constructor.
=head2 Parse()
$workbook = $parse->Parse($sFileName);
$workbook = $parse->Parse($sFileName , $formatter);
Returns a L</Workbook> object. If an error occurs, returns undef.
The optional C<$formatter> is a Formatter Class to format the value of cells.
=head1 Workbook
The C<Parse()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Workbook> object.
This is a subclass of the L<Spreadsheet::ParseExcel::Workbook> and has the following methods:
=head2 worksheets()
Returns an array of L</Worksheet> objects. This was most commonly used to iterate over the worksheets in a workbook:
for my $worksheet ( $workbook->worksheets() ) {
...
}
=head2 worksheet()
The C<worksheet()> method returns a single C<Worksheet> object using either its name or index:
$worksheet = $workbook->worksheet('Sheet1');
$worksheet = $workbook->worksheet(0);
Returns C<undef> if the sheet name or index doesn't exist.
=head2 AddWorksheet()
$workbook = $workbook->AddWorksheet($name, %properties);
Create a new Worksheet object of type C<Spreadsheet::ParseExcel::Worksheet>.
The C<%properties> hash contains the properties of new Worksheet.
=head2 AddFont
$workbook = $workbook->AddFont(%properties);
Create new Font object of type C<Spreadsheet::ParseExcel::Font>.
The C<%properties> hash contains the properties of new Font.
=head2 AddFormat
$workbook = $workbook->AddFormat(%properties);
The C<%properties> hash contains the properties of new Font.
=head1 Worksheet
Spreadsheet::ParseExcel::SaveParser::Worksheet
Worksheet is a subclass of Spreadsheet::ParseExcel::Worksheet.
And has these methods :
The C<Worksbook::worksheet()> method returns a C<Spreadsheet::ParseExcel::SaveParser::Worksheet> object.
This is a subclass of the L<Spreadsheet::ParseExcel::Worksheet> and has the following methods:
=head1 AddCell
$workbook = $worksheet->AddCell($row, $col, $value, $format [$encoding]);
Create new Cell object of type C<Spreadsheet::ParseExcel::Cell>.
The C<$format> parameter is the format number rather than a full format object.
To specify just same as another cell,
you can set it like below:
$row = 0;
$col = 0;
$worksheet = $template->worksheet(0);
$cell = $worksheet->get_cell( $row, $col );
$format_number = $cell->{FormatNo};
$worksheet->AddCell($row +1, $coll, 'New data', $format_number);
=head1 TODO
Please note that this module is currently (versions 0.50-0.60) undergoing a major
restructuring and rewriting.
=head1 Known Problems
You can only rewrite the features that Spreadsheet::WriteExcel supports so
macros, graphs and some other features in the original Excel file will be lost.
Also, formulas aren't rewritten, only the result of a formula is written.
Only last print area will remain. (Others will be removed)
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2002 Kawai Takanori and Nippon-RAD Co. OP Division
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER
$fatpacked{"Spreadsheet/XLSX/ParseExcel/SaveParser/Workbook.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKBOOK';
package Spreadsheet::ParseExcel::SaveParser::Workbook;
###############################################################################
#
# Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
use base 'Spreadsheet::ParseExcel::Workbook';
our $VERSION = '0.65';
#==============================================================================
# Spreadsheet::ParseExcel::SaveParser::Workbook
#==============================================================================
sub new {
my ( $sPkg, $oBook ) = @_;
return undef unless ( defined $oBook );
my %oThis = %$oBook;
bless \%oThis, $sPkg;
# re-bless worksheets (and set their _Book properties !!!)
my $sWkP = ref($sPkg) || "$sPkg";
$sWkP =~ s/Workbook$/Worksheet/;
map { bless( $_, $sWkP ); } @{ $oThis{Worksheet} };
map { $_->{_Book} = \%oThis; } @{ $oThis{Worksheet} };
return \%oThis;
}
#------------------------------------------------------------------------------
# Parse (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub Parse {
my ( $sClass, $sFile, $oWkFmt ) = @_;
my $oBook = Spreadsheet::ParseExcel::Workbook->Parse( $sFile, $oWkFmt );
bless $oBook, $sClass;
# re-bless worksheets (and set their _Book properties !!!)
my $sWkP = ref($sClass) || "$sClass";
$sWkP =~ s/Workbook$/Worksheet/;
map { bless( $_, $sWkP ); } @{ $oBook->{Worksheet} };
map { $_->{_Book} = $oBook; } @{ $oBook->{Worksheet} };
return $oBook;
}
#------------------------------------------------------------------------------
# SaveAs (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub SaveAs {
my ( $oBook, $sName ) = @_;
# Create a new Excel workbook
my $oWrEx = Spreadsheet::WriteExcel->new($sName);
$oWrEx->compatibility_mode();
my %hFmt;
my $iNo = 0;
my @aAlH = (
'left', 'left', 'center', 'right',
'fill', 'justify', 'merge', 'equal_space'
);
my @aAlV = ( 'top', 'vcenter', 'bottom', 'vjustify', 'vequal_space' );
foreach my $pFmt ( @{ $oBook->{Format} } ) {
my $oFmt = $oWrEx->addformat(); # Add Formats
unless ( $pFmt->{Style} ) {
$hFmt{$iNo} = $oFmt;
my $rFont = $pFmt->{Font};
$oFmt->set_font( $rFont->{Name} );
$oFmt->set_size( $rFont->{Height} );
$oFmt->set_color( $rFont->{Color} );
$oFmt->set_bold( $rFont->{Bold} );
$oFmt->set_italic( $rFont->{Italic} );
$oFmt->set_underline( $rFont->{Underline} );
$oFmt->set_font_strikeout( $rFont->{Strikeout} );
$oFmt->set_font_script( $rFont->{Super} );
$oFmt->set_hidden( $rFont->{Hidden} ); #Add
$oFmt->set_locked( $pFmt->{Lock} );
$oFmt->set_align( $aAlH[ $pFmt->{AlignH} ] );
$oFmt->set_align( $aAlV[ $pFmt->{AlignV} ] );
$oFmt->set_rotation( $pFmt->{Rotate} );
$oFmt->set_num_format(
$oBook->{FmtClass}->FmtStringDef( $pFmt->{FmtIdx}, $oBook ) );
$oFmt->set_text_wrap( $pFmt->{Wrap} );
$oFmt->set_pattern( $pFmt->{Fill}->[0] );
$oFmt->set_fg_color( $pFmt->{Fill}->[1] )
if ( ( $pFmt->{Fill}->[1] >= 8 )
&& ( $pFmt->{Fill}->[1] <= 63 ) );
$oFmt->set_bg_color( $pFmt->{Fill}->[2] )
if ( ( $pFmt->{Fill}->[2] >= 8 )
&& ( $pFmt->{Fill}->[2] <= 63 ) );
$oFmt->set_left(
( $pFmt->{BdrStyle}->[0] > 7 ) ? 3 : $pFmt->{BdrStyle}->[0] );
$oFmt->set_right(
( $pFmt->{BdrStyle}->[1] > 7 ) ? 3 : $pFmt->{BdrStyle}->[1] );
$oFmt->set_top(
( $pFmt->{BdrStyle}->[2] > 7 ) ? 3 : $pFmt->{BdrStyle}->[2] );
$oFmt->set_bottom(
( $pFmt->{BdrStyle}->[3] > 7 ) ? 3 : $pFmt->{BdrStyle}->[3] );
$oFmt->set_left_color( $pFmt->{BdrColor}->[0] )
if ( ( $pFmt->{BdrColor}->[0] >= 8 )
&& ( $pFmt->{BdrColor}->[0] <= 63 ) );
$oFmt->set_right_color( $pFmt->{BdrColor}->[1] )
if ( ( $pFmt->{BdrColor}->[1] >= 8 )
&& ( $pFmt->{BdrColor}->[1] <= 63 ) );
$oFmt->set_top_color( $pFmt->{BdrColor}->[2] )
if ( ( $pFmt->{BdrColor}->[2] >= 8 )
&& ( $pFmt->{BdrColor}->[2] <= 63 ) );
$oFmt->set_bottom_color( $pFmt->{BdrColor}->[3] )
if ( ( $pFmt->{BdrColor}->[3] >= 8 )
&& ( $pFmt->{BdrColor}->[3] <= 63 ) );
}
$iNo++;
}
for ( my $iSheet = 0 ; $iSheet < $oBook->{SheetCount} ; $iSheet++ ) {
my $oWkS = $oBook->{Worksheet}[$iSheet];
my $oWrS = $oWrEx->addworksheet( $oWkS->{Name} );
#Landscape
if ( !$oWkS->{Landscape} ) { # Landscape (0:Horizontal, 1:Vertical)
$oWrS->set_landscape();
}
else {
$oWrS->set_portrait();
}
#Protect
if ( defined $oWkS->{Protect} )
{ # Protect ('':NoPassword, Password:Password)
if ( $oWkS->{Protect} ne '' ) {
$oWrS->protect( $oWkS->{Protect} );
}
else {
$oWrS->protect();
}
}
if ( $oWkS->{Scale} != 100 ) {
# Pages on fit with width and Heigt
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} );
#Print Scale and reset FitWidth/FitHeight
$oWrS->set_print_scale( $oWkS->{Scale} );
}
else {
#Print Scale
$oWrS->set_print_scale( $oWkS->{Scale} );
# Pages on fit with width and Heigt
$oWrS->fit_to_pages( $oWkS->{FitWidth}, $oWkS->{FitHeight} );
}
# Paper Size
$oWrS->set_paper( $oWkS->{PaperSize} );
# Margin
$oWrS->set_margin_left( $oWkS->{LeftMargin} );
$oWrS->set_margin_right( $oWkS->{RightMargin} );
$oWrS->set_margin_top( $oWkS->{TopMargin} );
$oWrS->set_margin_bottom( $oWkS->{BottomMargin} );
# HCenter
$oWrS->center_horizontally() if ( $oWkS->{HCenter} );
# VCenter
$oWrS->center_vertically() if ( $oWkS->{VCenter} );
# Header, Footer
$oWrS->set_header( $oWkS->{Header}, $oWkS->{HeaderMargin} );
$oWrS->set_footer( $oWkS->{Footer}, $oWkS->{FooterMargin} );
# Print Area
if ( ref( $oBook->{PrintArea}[$iSheet] ) eq 'ARRAY' ) {
my $raP;
for $raP ( @{ $oBook->{PrintArea}[$iSheet] } ) {
$oWrS->print_area(@$raP);
}
}
# Print Title
my $raW;
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Row} } ) {
$oWrS->repeat_rows(@$raW);
}
foreach $raW ( @{ $oBook->{PrintTitle}[$iSheet]->{Column} } ) {
$oWrS->repeat_columns(@$raW);
}
# Print Gridlines
if ( $oWkS->{PrintGrid} == 1 ) {
$oWrS->hide_gridlines(0);
}
else {
$oWrS->hide_gridlines(1);
}
# Print Headings
if ( $oWkS->{PrintHeaders} ) {
$oWrS->print_row_col_headers();
}
# Horizontal Page Breaks
$oWrS->set_h_pagebreaks( @{ $oWkS->{HPageBreak} } );
# Veritical Page Breaks
$oWrS->set_v_pagebreaks( @{ $oWkS->{VPageBreak} } );
# PageStart => $oWkS->{PageStart}, # Page number for start
# UsePage => $oWkS->{UsePage}, # Use own start page number
# NoColor => $oWkS->{NoColor}, # Print in black-white
# Draft => $oWkS->{Draft}, # Print in draft mode
# Notes => $oWkS->{Notes}, # Print notes
# LeftToRight => $oWkS->{LeftToRight}, # Left to Right
for (
my $iC = $oWkS->{MinCol} ;
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ;
$iC++
)
{
if ( defined $oWkS->{ColWidth}[$iC] ) {
if ( $oWkS->{ColWidth}[$iC] > 0 ) {
$oWrS->set_column( $iC, $iC, $oWkS->{ColWidth}[$iC] )
; #, undef, 1) ;
}
else {
$oWrS->set_column( $iC, $iC, 0, undef, 1 );
}
}
}
my $merged_areas = $oWkS->get_merged_areas();
my $merged_areas_h = {};
if ($merged_areas) {
foreach my $range (@$merged_areas) {
$merged_areas_h->{$range->[0]}{$range->[1]} = $range;
}
}
for (
my $iR = $oWkS->{MinRow} ;
defined $oWkS->{MaxRow} && $iR <= $oWkS->{MaxRow} ;
$iR++
)
{
$oWrS->set_row( $iR, $oWkS->{RowHeight}[$iR] );
for (
my $iC = $oWkS->{MinCol} ;
defined $oWkS->{MaxCol} && $iC <= $oWkS->{MaxCol} ;
$iC++
)
{
my $oWkC = $oWkS->{Cells}[$iR][$iC];
if ($oWkC) {
if ( $oWkC->{Merged} and exists $merged_areas_h->{$iR}{$iC} ) {
my $oFmtN = $oWrEx->addformat();
$oFmtN->copy( $hFmt{ $oWkC->{FormatNo} } );
$oWrS->merge_range (
@{$merged_areas_h->{$iR}{$iC}},
$oBook->{FmtClass}
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ),
$oFmtN
);
}
else {
$oWrS->write(
$iR,
$iC,
$oBook->{FmtClass}
->TextFmt( $oWkC->{Val}, $oWkC->{Code} ),
$hFmt{ $oWkC->{FormatNo} }
);
}
}
}
}
}
return $oWrEx;
}
#------------------------------------------------------------------------------
# AddWorksheet (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddWorksheet {
my ( $oBook, $sName, %hAttr ) = @_;
$oBook->AddFormat if ( $#{ $oBook->{Format} } < 0 );
$hAttr{Name} ||= $sName;
$hAttr{LeftMargin} ||= 0;
$hAttr{RightMargin} ||= 0;
$hAttr{TopMargin} ||= 0;
$hAttr{BottomMargin} ||= 0;
$hAttr{HeaderMargin} ||= 0;
$hAttr{FooterMargin} ||= 0;
$hAttr{FitWidth} ||= 0;
$hAttr{FitHeight} ||= 0;
$hAttr{PrintGrid} ||= 0;
my $oWkS = Spreadsheet::ParseExcel::SaveParser::Worksheet->new(%hAttr);
$oWkS->{_Book} = $oBook;
$oWkS->{_SheetNo} = $oBook->{SheetCount};
$oBook->{Worksheet}[ $oBook->{SheetCount} ] = $oWkS;
$oBook->{SheetCount}++;
return $oWkS; #$oBook->{SheetCount} - 1;
}
#------------------------------------------------------------------------------
# AddFont (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddFont {
my ( $oBook, %hAttr ) = @_;
$hAttr{Name} ||= 'Arial';
$hAttr{Height} ||= 10;
$hAttr{Bold} ||= 0;
$hAttr{Italic} ||= 0;
$hAttr{Underline} ||= 0;
$hAttr{Strikeout} ||= 0;
$hAttr{Super} ||= 0;
push @{ $oBook->{Font} }, Spreadsheet::ParseExcel::Font->new(%hAttr);
return $#{ $oBook->{Font} };
}
#------------------------------------------------------------------------------
# AddFormat (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddFormat {
my ( $oBook, %hAttr ) = @_;
$hAttr{Fill} ||= [ 0, 0, 0 ];
$hAttr{BdrStyle} ||= [ 0, 0, 0, 0 ];
$hAttr{BdrColor} ||= [ 0, 0, 0, 0 ];
$hAttr{AlignH} ||= 0;
$hAttr{AlignV} ||= 0;
$hAttr{Rotate} ||= 0;
$hAttr{Landscape} ||= 0;
$hAttr{FmtIdx} ||= 0;
if ( !defined( $hAttr{Font} ) ) {
my $oFont;
if ( defined $hAttr{FontNo} ) {
$oFont = $oBook->{Font}[ $hAttr{FontNo} ];
}
elsif ( !defined $oFont ) {
if ( $#{ $oBook->{Font} } >= 0 ) {
$oFont = $oBook->{Font}[0];
}
else {
my $iNo = $oBook->AddFont;
$oFont = $oBook->{Font}[$iNo];
}
}
$hAttr{Font} = $oFont;
}
push @{ $oBook->{Format} }, Spreadsheet::ParseExcel::Format->new(%hAttr);
return $#{ $oBook->{Format} };
}
#------------------------------------------------------------------------------
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Workbook)
#------------------------------------------------------------------------------
sub AddCell {
my ( $oBook, $iSheet, $iR, $iC, $sVal, $oCell, $sCode ) = @_;
my %rhKey;
$oCell ||= $oBook->{Worksheet}[$iSheet]
->{Cells}[$iR][$iC]->{FormatNo} || 0;
my $iFmt =
( UNIVERSAL::isa( $oCell, 'Spreadsheet::ParseExcel::Cell' ) )
? $oCell->{FormatNo}
: ( ref($oCell) ) ? 0
: $oCell + 0;
$rhKey{FormatNo} = $iFmt;
$rhKey{Format} = $oBook->{Format}[$iFmt];
$rhKey{Val} = $sVal;
$rhKey{Code} = $sCode || '_native_';
$oBook->{_CurSheet} = $iSheet;
my $merged_areas = $oBook->{Worksheet}[$iSheet]->get_merged_areas();
my $merged_areas_h = {};
if ($merged_areas) {
foreach my $range (@$merged_areas) {
$merged_areas_h->{$range->[0]}{$range->[1]} = $range;
}
}
my $oNewCell =
Spreadsheet::ParseExcel::_NewCell( $oBook, $iR, $iC, %rhKey );
Spreadsheet::ParseExcel::_SetDimension( $oBook, $iR, $iC, $iC );
$oNewCell->{Merged} = 1
if exists $merged_areas_h->{$iR}{$iC};
return $oNewCell;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::SaveParser::Workbook - A class for SaveParser Workbooks.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKBOOK
$fatpacked{"Spreadsheet/XLSX/ParseExcel/SaveParser/Worksheet.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKSHEET';
package Spreadsheet::ParseExcel::SaveParser::Worksheet;
###############################################################################
#
# Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
#==============================================================================
# Spreadsheet::ParseExcel::SaveParser::Worksheet
#==============================================================================
use base 'Spreadsheet::ParseExcel::Worksheet';
our $VERSION = '0.65';
sub new {
my ( $sClass, %rhIni ) = @_;
$sClass->SUPER::new(%rhIni); # returns object
}
#------------------------------------------------------------------------------
# AddCell (for Spreadsheet::ParseExcel::SaveParser::Worksheet)
#------------------------------------------------------------------------------
sub AddCell {
my ( $oSelf, $iR, $iC, $sVal, $oCell, $sCode ) = @_;
$oSelf->{_Book}
->AddCell( $oSelf->{_SheetNo}, $iR, $iC, $sVal, $oCell, $sCode );
}
#------------------------------------------------------------------------------
# Protect (for Spreadsheet::ParseExcel::SaveParser::Worksheet)
# - Password = undef -> No protect
# - Password = '' -> Protected. No password
# - Password = $pwd -> Protected. Password = $pwd
#------------------------------------------------------------------------------
sub Protect {
my ( $oSelf, $sPassword ) = @_;
$oSelf->{Protect} = $sPassword;
}
1;
__END__
=pod
=head1 NAME
Spreadsheet::ParseExcel::SaveParser::Worksheet - A class for SaveParser Worksheets.
=head1 SYNOPSIS
See the documentation for Spreadsheet::ParseExcel.
=head1 DESCRIPTION
This module is used in conjunction with Spreadsheet::ParseExcel. See the documentation for Spreadsheet::ParseExcel.
=head1 AUTHOR
Current maintainer 0.60+: Douglas Wilson dougw@cpan.org
Maintainer 0.40-0.59: John McNamara jmcnamara@cpan.org
Maintainer 0.27-0.33: Gabor Szabo szabgab@cpan.org
Original author: Kawai Takanori kwitknr@cpan.org
=head1 COPYRIGHT
Copyright (c) 2014 Douglas Wilson
Copyright (c) 2009-2013 John McNamara
Copyright (c) 2006-2008 Gabor Szabo
Copyright (c) 2000-2006 Kawai Takanori
All rights reserved.
You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.
=cut
SPREADSHEET_XLSX_PARSEEXCEL_SAVEPARSER_WORKSHEET
$fatpacked{"Spreadsheet/XLSX/ParseExcel/Utility.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'SPREADSHEET_XLSX_PARSEEXCEL_UTILITY';
package Spreadsheet::ParseExcel::Utility;
###############################################################################
#
# Spreadsheet::ParseExcel::Utility - Utility functions for ParseExcel.
#
# Used in conjunction with Spreadsheet::ParseExcel.
#
# Copyright (c) 2014 Douglas Wilson
# Copyright (c) 2009-2013 John McNamara
# Copyright (c) 2006-2008 Gabor Szabo
# Copyright (c) 2000-2006 Kawai Takanori
#
# perltidy with standard settings.
#
# Documentation after __END__
#
use strict;
use warnings;
require Exporter;
use vars qw(@ISA @EXPORT_OK);
@ISA = qw(Exporter);
@EXPORT_OK = qw(ExcelFmt LocaltimeExcel ExcelLocaltime
col2int int2col sheetRef xls2csv);
our $VERSION = '0.65';
my $qrNUMBER = qr/(^[+-]?\d+(\.\d+)?$)|(^[+-]?\d+\.?(\d*)[eE][+-](\d+))$/;
###############################################################################
#
# ExcelFmt()
#
# This function takes an Excel style number format and converts a number into
# that format. for example: 'hh:mm:ss AM/PM' + 0.01023148 = '12:14:44 AM'.
#
# It does this with a type of templating mechanism. The format string is parsed
# to identify tokens that need to be replaced and their position within the
# string is recorded. These can be thought of as placeholders. The number is
# then converted to the required formats and substituted into the placeholders.
#
# Interested parties should refer to the Excel documentation on cell formats for
# more information: http://office.microsoft.com/en-us/excel/HP051995001033.aspx
# The Microsoft documentation for the Excel Binary File Format, [MS-XLS].pdf,
# also contains a ABNF grammar for number format strings.
#
# Maintainers notes:
# ==================
#
# Note on format subsections:
# A format string can contain 4 possible sub-sections separated by semi-colons:
# Positive numbers, negative numbers, zero values, and text.
# For example: _(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)
#
# Note on conditional formats.
# A number format in Excel can have a conditional expression such as:
# [>9999999](000)000-0000;000-0000
# This is equivalent to the following in Perl:
# $format = $number > 9999999 ? '(000)000-0000' : '000-0000';
# Nested conditionals are also possible but we don't support them.
#
# Efficiency: The excessive use of substr() isn't very efficient. However,
# it probably doesn't merit rewriting this function with a parser or regular
# expressions and \G.
#
# TODO: I think the single quote handling may not be required. Check.
#
sub ExcelFmt {
my ( $format_str, $number, $is_1904, $number_type, $want_subformats ) = @_;
# Return text strings without further formatting.
return $number unless $number =~ $qrNUMBER;
# Handle OpenOffice.org GENERAL format.
$format_str = '@' if uc($format_str) eq "GENERAL";
# Check for a conditional at the start of the format. See notes above.
my $conditional;
if ( $format_str =~ /^\[([<>=][^\]]+)\](.*)$/ ) {
$conditional = $1;
$format_str = $2;
}
# Ignore the underscore token which is used to indicate a padding space.
$format_str =~ s/_/ /g;
# Split the format string into 4 possible sub-sections: positive numbers,
# negative numbers, zero values, and text. See notes above.
my @formats;
my $section = 0;
my $double_quote = 0;
my $single_quote = 0;
# Initial parsing of the format string to remove escape characters. This
# also handles quoted strings. See note about single quotes above.
CHARACTER:
for my $char ( split //, $format_str ) {
if ( $double_quote or $single_quote ) {
$formats[$section] .= $char;
$double_quote = 0 if $char eq '"';
$single_quote = 0;
next CHARACTER;
}
if ( $char eq ';' ) {
$section++;
next CHARACTER;
}
elsif ( $char eq '"' ) {
$double_quote = 1;
}
elsif ( $char eq '!' ) {
$single_quote = 1;
}
elsif ( $char eq '\\' ) {
$single_quote = 1;
}
elsif ( $char eq '(' ) {
next CHARACTER; # Ignore.
}
elsif ( $char eq ')' ) {
next CHARACTER; # Ignore.
}
# Convert upper case OpenOffice.org date/time formats to lowercase..
$char = lc($char) if $char =~ /[DMYHS]/;
$formats[$section] .= $char;
}
# Select the appropriate format from the 4 possible sub-sections:
# positive numbers, negative numbers, zero values, and text.
# We ignore the Text section since non-numeric values are returned
# unformatted at the start of the function.
my $format;
$section = 0;
if ( @formats == 1 ) {
$section = 0;
}
elsif ( @formats == 2 ) {
if ( $number < 0 ) {
$section = 1;
}
else {
$section = 0;
}
}
elsif ( @formats == 3 ) {
if ( $number == 0 ) {
$section = 2;
}
elsif ( $number < 0 ) {
$section = 1;
}
else {
$section = 0;
}
}
else {
$section = 0;
}
# Override the previous choice if the format is conditional.
if ($conditional) {
# TODO. Replace string eval with a function.
$section = eval "$number $conditional" ? 0 : 1;
}
# We now have the required format.
$format = $formats[$section];
# The format string can contain one of the following colours:
# [Black] [Blue] [Cyan] [Green] [Magenta] [Red] [White] [Yellow]
# or the string [ColorX] where x is a colour index from 1 to 56.
# We don't use the colour but we return it to the caller.
#
my $color = '';
if ( $format =~ s/^(\[[A-Za-z]{3,}(\d{1,2})?\])// ) {
$color = $1;
}
# Remove the locale, such as [$-409], from the format string.
my $locale = '';
if ( $format =~ s/^(\[\$?-F?\d+\])// ) {
$locale = $1;
}
# Replace currency locale, such as [$$-409], with $ in the format string.
# See the RT#60547 test cases in 21_number_format_user.t.
if ( $format =~ s/(\[\$([^-]+)(-\d+)?\])/$2/s ) {
$locale = $1;
}
# Remove leading # from '# ?/?', '# ??/??' fraction formats.
$format =~ s{# \?}{?}g;
# Parse the format string and create an AoA of placeholders that contain
# the parts of the string to be replaced. The format of the information
# stored is: [ $token, $start_pos, $end_pos, $option_info ].
#
my $format_mode = ''; # Either: '', 'number', 'date'
my $pos = 0; # Character position within format string.
my @placeholders = (); # Arefs with parts of the format to be replaced.
my $token = ''; # The actual format extracted from the total str.
my $start_pos; # A position variable. Initial parser position.
my $token_start = -1; # A position variable.
my $decimal_pos = -1; # Position of the punctuation char "." or ",".
my $comma_count = 0; # Count of the commas in the format.
my $is_fraction = 0; # Number format is a fraction.
my $is_currency = 0; # Number format is a currency.
my $is_percent = 0; # Number format is a percentage.
my $is_12_hour = 0; # Time format is using 12 hour clock.
my $seen_dot = 0; # Treat only the first "." as the decimal point.
# Parse the format.
PARSER:
while ( $pos < length $format ) {
$start_pos = $pos;
my $char = substr( $format, $pos, 1 );
# Ignore control format characters such as '#0+-.?eE,%'. However,
# only ignore '.' if it is the first one encountered. RT 45502.
if ( ( !$seen_dot && $char !~ /[#0\+\-\.\?eE\,\%]/ )
|| $char !~ /[#0\+\-\?eE\,\%]/ )
{
if ( $token_start != -1 ) {
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start ),
$decimal_pos, $pos - $token_start
];
$token_start = -1;
}
}
# Processing for quoted strings within the format. See notes above.
if ( $char eq '"' ) {
$double_quote = $double_quote ? 0 : 1;
$pos++;
next PARSER;
}
elsif ( $char eq '!' ) {
$single_quote = 1;
$pos++;
next PARSER;
}
elsif ( $char eq '\\' ) {
if ( $single_quote != 1 ) {
$single_quote = 1;
$pos++;
next PARSER;
}
}
if ( ( defined($double_quote) and ($double_quote) )
or ( defined($single_quote) and ($single_quote) )
or ( $seen_dot && $char eq '.' ) )
{
$single_quote = 0;
if (
( $format_mode ne 'date' )
and ( ( substr( $format, $pos, 2 ) eq "\x81\xA2" )
|| ( substr( $format, $pos, 2 ) eq "\x81\xA3" )
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA4" )
|| ( substr( $format, $pos, 2 ) eq "\xA2\xA5" ) )
)
{
# The above matches are currency symbols.
push @placeholders,
[ substr( $format, $pos, 2 ), length($token), 2 ];
$is_currency = 1;
$pos += 2;
}
else {
$pos++;
}
}
elsif (
( $char =~ /[#0\+\.\?eE\,\%]/ )
|| ( ( $format_mode ne 'date' )
and ( ( $char eq '-' ) || ( $char eq '(' ) || ( $char eq ')' ) )
)
)
{
$format_mode = 'number' unless $format_mode;
if ( substr( $format, $pos, 1 ) =~ /[#0]/ ) {
if (
substr( $format, $pos ) =~
/^([#0]+[\.]?[0#]*[eE][\+\-][0#]+)/ )
{
push @placeholders, [ $1, $pos, length($1) ];
$pos += length($1);
}
else {
if ( $token_start == -1 ) {
$token_start = $pos;
$decimal_pos = length($token);
}
}
}
elsif ( substr( $format, $pos, 1 ) eq '?' ) {
# Look for a fraction format like ?/? or ??/??
if ( $token_start != -1 ) {
push @placeholders,
[
substr(
$format, $token_start, $pos - $token_start + 1
),
$decimal_pos,
$pos - $token_start + 1
];
}
$token_start = $pos;
# Find the end of the fraction format.
FRACTION:
while ( $pos < length($format) ) {
if ( substr( $format, $pos, 1 ) eq '/' ) {
$is_fraction = 1;
}
elsif ( substr( $format, $pos, 1 ) eq '?' ) {
$pos++;
next FRACTION;
}
else {
if ( $is_fraction
&& ( substr( $format, $pos, 1 ) =~ /[0-9]/ ) )
{
# TODO: Could invert if() logic and remove this.
$pos++;
next FRACTION;
}
else {
last FRACTION;
}
}
$pos++;
}
$pos--;
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start + 1 ),
length($token), $pos - $token_start + 1
];
$token_start = -1;
}
elsif ( substr( $format, $pos, 3 ) =~ /^[eE][\+\-][0#]$/ ) {
if ( substr( $format, $pos ) =~ /([eE][\+\-][0#]+)/ ) {
push @placeholders, [ $1, $pos, length($1) ];
$pos += length($1);
}
$token_start = -1;
}
else {
if ( $token_start != -1 ) {
push @placeholders,
[
substr( $format, $token_start, $pos - $token_start ),
$decimal_pos, $pos - $token_start
];
$token_start = -1;
}
if ( substr( $format, $pos, 1 ) =~ /[\+\-]/ ) {
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
$is_currency = 1;
}
elsif ( substr( $format, $pos, 1 ) eq '.' ) {
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
$seen_dot = 1;
}
elsif ( substr( $format, $pos, 1 ) eq ',' ) {
$comma_count++;
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
}
elsif ( substr( $format, $pos, 1 ) eq '%' ) {
$is_percent = 1;
}
elsif (( substr( $format, $pos, 1 ) eq '(' )
|| ( substr( $format, $pos, 1 ) eq ')' ) )
{
push @placeholders,
[ substr( $format, $pos, 1 ), length($token), 1 ];
$is_currency = 1;
}
}
$pos++;
}
elsif ( $char =~ /[ymdhsapg]/i ) {
$format_mode = 'date' unless $format_mode;
if ( substr( $format, $pos, 5 ) =~ /am\/pm/i ) {
push @placeholders, [ 'am/pm', length($token), 5 ];
$is_12_hour = 1;
$pos += 5;
}
elsif ( substr( $format, $pos, 3 ) =~ /a\/p/i ) {
push @placeholders, [ 'a/p', length($token), 3 ];
$is_12_hour = 1;
$pos += 3;
}
elsif ( substr( $format, $pos, 5 ) eq 'mmmmm' ) {
push @placeholders, [ 'mmmmm', length($token), 5 ];
$pos += 5;
}
elsif (( substr( $format, $pos, 4 ) eq 'mmmm' )
|| ( substr( $format, $pos, 4 ) eq 'dddd' )
|| ( substr( $format, $pos, 4 ) eq 'yyyy' )
|| ( substr( $format, $pos, 4 ) eq 'ggge' ) )
{
push @placeholders,
[ substr( $format, $pos, 4 ), length($token), 4 ];
$pos += 4;
}
elsif (( substr( $format, $pos, 3 ) eq 'ddd' )
|| ( substr( $format, $pos, 3 ) eq 'mmm' )
|| ( substr( $format, $pos, 3 ) eq 'yyy' ) )
{
push @placeholders,
[ substr( $format, $pos, 3 ), length($token), 3 ];
$pos += 3;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment