TT#69950 Unbundle Excel::Reader::XLSX modules

This is a separate upstream distribution which we should not be
bundling.

Change-Id: I71c504725dfcc4caffa141895a8472355d493160
changes/89/34889/1
Guillem Jover 6 years ago
parent 8b934eb8b2
commit 0bb15f41f3

@ -21,12 +21,6 @@ my $builder = Module::Build->new(
},
runtime => {
requires => {
# Needed by Excel::Reader::XLSX
'Archive::Zip' => 0,
'OLE::Storage_Lite' => 0,
'XML::LibXML::Reader' => 0,
# Needed by NGCP::BulkProcessor
'Config::Any' => 0,
'DBD::CSV' => '0.26',
'DBD::SQLite' => '1.29',
@ -45,7 +39,7 @@ my $builder = Module::Build->new(
'Digest::MD5' => 0,
'Email::MIME' => 0,
'Email::MIME::Attachment::Stripper' => 0,
#'Excel::Reader::XLSX' => 0,
'Excel::Reader::XLSX' => 0,
'Gearman::Client' => 0,
'Gearman::Task' => 0,
'Gearman::Worker' => 0,
@ -90,7 +84,7 @@ my $builder = Module::Build->new(
},
},
},
add_to_cleanup => ['NGCP-BulkProcessor-*', 'Excel-Reader-*'],
add_to_cleanup => [ 'NGCP-BulkProcessor-*' ],
);
$builder->add_build_element('pl');
$builder->add_build_element('cfg');

15
debian/control vendored

@ -15,21 +15,6 @@ Build-Depends:
Standards-Version: 3.9.8
Homepage: https://www.sipwise.com/
Package: libexcel-reader-xlsx-perl
Architecture: all
Replaces:
ngcp-bulk-processor,
Depends:
${misc:Depends},
${perl:Depends},
libarchive-zip-perl,
libole-storage-lite-perl,
libxml-libxml-perl,
Description: module to parse Excel spreadsheets in xlsx format
The Excel::Reader::XLSX module is used to parse an Excel file in
the 2007+ XLSX format. The XLSX format is the Office Open XML (OOXML)
format used by Excel 2007 and later.
Package: libngcp-bulkprocessor-perl
Architecture: all
Replaces:

22
debian/copyright vendored

@ -22,25 +22,3 @@ License: GPL-3+
Comment:
On Debian systems, the full text of the GNU General Public License
version 3 can be found in the file '/usr/share/common-licenses/GPL-3'.
Files:
lib/Excel/*
Copyright:
Copyright © 2012 John McNamara <jmcnamara@cpan.org>
License: Artistic or GPL-1+
License: Artistic
This program is free software; you can redistribute it and/or modify
it under the terms of the Artistic License, which comes with Perl.
Comment:
On Debian systems, the complete text of the Artistic License can be
found in '/usr/share/common-licenses/Artistic'.
License: GPL-1+
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
Comment:
On Debian systems, the complete text of version 1 of the GNU General
Public License can be found in '/usr/share/common-licenses/GPL-1'.

@ -1 +0,0 @@
lib/Excel/ usr/share/perl5/

@ -1 +0,0 @@
debian/tmp/usr/share/man/man3/Excel*

@ -1,788 +0,0 @@
package Excel::Reader::XLSX;
###############################################################################
#
# WriteExcelXML.
#
# Excel::Reader::XLSX - Efficient data reader for the Excel XLSX file format.
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
use 5.008002;
use strict;
use warnings;
use Exporter;
use Archive::Zip;
use OLE::Storage_Lite;
use File::Temp;
use Excel::Reader::XLSX::Workbook;
use Excel::Reader::XLSX::Package::ContentTypes;
use Excel::Reader::XLSX::Package::SharedStrings;
# Modify Archive::Zip error handling reporting so we can catch errors.
Archive::Zip::setErrorHandler( sub { die shift } );
our @ISA = qw(Exporter);
our $VERSION = '0.00';
# Error codes for some common errors.
our $ERROR_none = 0;
our $ERROR_file_not_found = 1;
our $ERROR_file_is_xls = 2;
our $ERROR_file_is_encrypted = 3;
our $ERROR_file_is_unknown_ole = 4;
our $ERROR_file_zip_error = 5;
our $ERROR_file_missing_subfile = 6;
our $ERROR_file_has_no_content_types = 7;
our $ERROR_file_has_no_workbook = 8;
our @error_strings = (
'', # 0
'File not found', # 1
'File is xls not xlsx', # 2
'File is encrypted xlsx', # 3
'File is unknown OLE doc type', # 4
'File has zip error', # 5
'File is missing subfile', # 6
'File has no [Content_Types].xml', # 7
'File has no workbook.xml', # 8
);
###############################################################################
#
# new()
#
sub new {
my $class = shift;
my $self = {
_reader => undef,
_files => {},
_tempdir => undef,
_error_status => 0,
_error_extra_text => '',
};
bless $self, $class;
return $self;
}
###############################################################################
#
# read_file()
#
# Unzip the XLSX file and read the [Content_Types].xml file to get the
# structure of the contained XML files.
#
# Return a valid Workbook object if successful. If not return undef and set
# the error status.
#
sub read_file {
my $self = shift;
my $filename = shift;
# Check that the file exists.
if ( !-e $filename ) {
$self->{_error_status} = $ERROR_file_not_found;
$self->{_error_extra_text} = $filename;
return;
}
# Check for xls or encrypted OLE files.
my $ole_file = $self->_check_if_ole_file( $filename );
if ( $ole_file ) {
$self->{_error_status} = $ole_file;
$self->{_error_extra_text} = $filename;
return;
}
# Create a, locally scoped, temp dir to unzip the XLSX file into.
my $tempdir = File::Temp->newdir( DIR => $self->{_tempdir} );
# Archive::Zip requires a Unix directory separator to the end.
$tempdir .= '/' if $tempdir !~ m{/$};
# Create an Archive::Zip object to unzip the XLSX file.
my $zipfile = Archive::Zip->new();
# Read the XLSX zip file and catch any errors.
eval { $zipfile->read( $filename ) };
# Store the zip error and return.
if ( $@ ) {
my $error_text = $@;
chomp $error_text;
$self->{_error_status} = $ERROR_file_zip_error;
$self->{_error_extra_text} = $error_text;
return;
}
# Extract the XML files from the XLSX zip.
$zipfile->extractTree( '', $tempdir );
# The [Content_Types] is required as the root of the other files.
my $content_types_file = $tempdir . '[Content_Types].xml';
if ( !-e $content_types_file ) {
$self->{_error_status} = $ERROR_file_has_no_content_types;
return;
}
# Create a reader object to read the [Content_Types].
my $content_types = Excel::Reader::XLSX::Package::ContentTypes->new();
$content_types->_parse_file( $content_types_file );
# Read the filenames from the [Content_Types].
my %files = $content_types->_get_files();
# Check that the listed files actually exist.
my $files_exist = $self->_check_files_exist( $tempdir, %files );
if ( !$files_exist ) {
$self->{_error_status} = $ERROR_file_missing_subfile;
return;
}
# Verify that the workbook.xml file is listed.
if ( !$files{_workbook} ) {
$self->{_error_status} = $ERROR_file_has_no_workbook;
return;
}
# Create a reader object to read the sharedStrings.xml file.
my $shared_strings = Excel::Reader::XLSX::Package::SharedStrings->new();
# Read the sharedStrings if present. Only files with strings have one.
if ( $files{_shared_strings} ) {
$shared_strings->_parse_file( $tempdir . $files{_shared_strings} );
}
# Create a reader object for the workbook.xml file.
my $workbook = Excel::Reader::XLSX::Workbook->new(
$tempdir,
$shared_strings,
%files
);
# Read data from the workbook.xml file.
$workbook->_parse_file( $tempdir . $files{_workbook} );
# Store information in the reader object.
$self->{_files} = \%files;
$self->{_shared_strings} = $shared_strings;
$self->{_package_dir} = $tempdir;
$self->{_zipfile} = $zipfile;
return $workbook;
}
###############################################################################
#
# _check_files_exist()
#
# Verify that the subfiles read from the Content_Types actually exist;
#
sub _check_files_exist {
my $self = shift;
my $tempdir = shift;
my %files = @_;
my @filenames;
# Get the filenames for the files hash.
for my $key ( keys %files ) {
my $filename = $files{$key};
# Worksheets are stored in an aref.
if ( ref $filename ) {
push @filenames, @$filename;
}
else {
push @filenames, $filename;
}
}
# Verify that the files exist.
for my $filename ( @filenames ) {
if ( !-e $tempdir . $filename ) {
$self->{_error_extra_text} = $filename;
return;
}
}
return 1;
}
###############################################################################
#
# _check_if_ole_file()
#
# Check if the file in an OLE compound doc. This can happen in a few cases.
# This first is when the file is xls and not xlsx. The second is when the
# file is an encrypted xlsx file. We also handle the case of unknown OLE
# file types.
#
# Porting note. As a lightweight test you can check for OLE files by looking
# for the magic number 0xD0CF11E0 (docfile0) at the start of the file.
#
sub _check_if_ole_file {
my $self = shift;
my $filename = shift;
my $ole = OLE::Storage_Lite->new( $filename );
my $pps = $ole->getPpsTree();
# If getPpsTree() failed then this isn't an OLE file.
return if !$pps;
# Loop through the PPS children below the root.
for my $child_pps ( @{ $pps->{Child} } ) {
my $pps_name = OLE::Storage_Lite::Ucs2Asc( $child_pps->{Name} );
# Match an Excel xls file.
if ( $pps_name eq 'Workbook' || $pps_name eq 'Book' ) {
return $ERROR_file_is_xls;
}
# Match an encrypted Excel xlsx file.
if ( $pps_name eq 'EncryptedPackage' ) {
return $ERROR_file_is_encrypted;
}
}
return $ERROR_file_is_unknown_ole;
}
###############################################################################
#
# error().
#
# Return an error string for a failed read.
#
sub error {
my $self = shift;
my $error_index = $self->{_error_status};
my $error = $error_strings[$error_index];
if ( $self->{_error_extra_text} ) {
$error .= ': ' . $self->{_error_extra_text};
}
return $error;
}
###############################################################################
#
# error_code().
#
# Return an error code for a failed read.
#
sub error_code {
my $self = shift;
return $self->{_error_status};
}
1;
__END__
=head1 NAME
Excel::Reader::XLSX - Efficient data reader for the Excel XLSX file format.
=head1 SYNOPSIS
The following is a simple Excel XLSX file reader using C<Excel::Reader::XLSX>:
use strict;
use warnings;
use Excel::Reader::XLSX;
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
if ( !defined $workbook ) {
die $reader->error(), "\n";
}
for my $worksheet ( $workbook->worksheets() ) {
my $sheetname = $worksheet->name();
print "Sheet = $sheetname\n";
while ( my $row = $worksheet->next_row() ) {
while ( my $cell = $row->next_cell() ) {
my $row = $cell->row();
my $col = $cell->col();
my $value = $cell->value();
print " Cell ($row, $col) = $value\n";
}
}
}
__END__
=head1 DESCRIPTION
C<Excel::Reader::XLSX> is a fast and lightweight parser for Excel XLSX files. XLSX is the Office Open XML, OOXML, format used by Excel 2007 and later.
B<Note: This software is designated as alpha quality until this notice is removed.> The API shouldn't change but functionality is currently limited.
=head1 Reader
The C<Excel::Reader::XLSX> constructor returns a Reader object that is used to read an Excel XLSX file:
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
die $reader->error() if !defined $workbook;
for my $worksheet ( $workbook->worksheets() ) {
while ( my $row = $worksheet->next_row() ) {
while ( my $cell = $row->next_cell() ) {
my $value = $cell->value();
...
}
}
}
The C<Excel::Reader::XLSX> object is used to return sub-objects that represent the functional parts of an Excel spreadsheet, L</Workbook>, L</Worksheet>, L</Row> and L</Cell>:
Reader
+- Workbook
+- Worksheet
+- Row
+- Cell
The C<Reader> object has the following methods:
read_file()
error()
error_code()
=head2 read_file()
The C<read_file> Reader method is used to read an Excel XLSX file and return a C<Workbook> object:
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
...
It is recommended that the success of the C<read_file()> method is always checked using one of the error checking methods below.
=head2 error()
The C<error()> Reader method returns an error string if C<read_file()> fails:
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
if ( !defined $workbook ) {
die $reader->error(), "\n";
}
...
The C<error()> strings and associated C<error_code()> numbers are:
error() error_code()
======= ============
'' 0
'File not found' 1
'File is xls not xlsx' 2
'File is encrypted xlsx' 3
'File is unknown OLE doc type' 4
'File has zip error' 5
'File is missing subfile' 6
'File has no [Content_Types].xml' 7
'File has no workbook.xml' 8
=head2 error_code()
The C<error_code()> Reader method returns an error code if C<read_file()> fails:
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
if ( !defined $workbook ) {
die "Got error code ", $parser->error_code, "\n";
}
This method is useful if you wish to use you own error strings or error handling methods.
=head1 Workbook
=head2 Workbook Methods
An C<Excel::Reader::XLSX> C<Workbook> object is returned by the Reader C<read_file()> method:
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
...
The C<Workbook> object has the following methods:
worksheets()
worksheet()
=head2 worksheets()
The Workbook C<worksheets()> method returns an array of
C<Worksheet> objects. This method is generally used to iterate through
all the worksheets in an Excel workbook and read the data:
for my $worksheet ( $workbook->worksheets() ) {
...
}
=head2 worksheet()
The Workbook C<worksheet()> method returns a single C<Worksheet>
object using the sheetname or the zero based index.
my $worksheet = $workbook->worksheet( 'Sheet1' );
# Or via the index.
my $worksheet = $workbook->worksheet( 0 );
=head1 Worksheet
=head2 Worksheet Methods
The C<Worksheet> object is returned from a L</Workbook> object and is used to access row data.
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
die $reader->error() if !defined $workbook;
for my $worksheet ( $workbook->worksheets() ) {
...
}
The C<Worksheet> object has the following methods:
next_row()
name()
index()
=head2 next_row()
The C<next_row()> method returns a L</Row> object representing the next
row in the worksheet.
my $row = $worksheet->next_row();
It returns C<undef> if there are no more rows containing data or formatting in the worksheet. This allows you to iterate over all the rows in a worksheet as follows:
while ( my $row = $worksheet->next_row() ) { ... }
Note, for efficiency the C<next_row()> method returns the next row in the file. This may not be the next sequential row. An option to read sequential rows, wheter they contain data or not will be added in a later release.
=head2 name()
The C<name()> method returns the name of the Worksheet object.
my $sheetname = $worksheet->name();
=head2 index()
The C<index()> method returns the zero-based index of the Worksheet
object.
my $sheet_index = $worksheet->index();
=head1 Row
=head2 Row Methods
The C<Row> object is returned from a L</Worksheet> object and is use to access cells in the worksheet.
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
die $reader->error() if !defined $workbook;
for my $worksheet ( $workbook->worksheets() ) {
while ( my $row = $worksheet->next_row() ) {
...
}
}
The C<Row> object has the following methods:
values()
next_cell()
row_number()
=head2 values()
The C<values())> method returns an array of values for a row from the first column up to the last column containing data. Cells with no data value return an empty string C<''>.
my @values = $row->values();
For example if we extracted data for the first row of the following spreadsheet we would get the values shown below:
-----------------------------------------------------------
| | A | B | C | D | ...
-----------------------------------------------------------
| 1 | | Foo | | Bar | ...
| 2 | | | | | ...
| 3 | | | | | ...
# Code:
...
my $row = $worksheet->next_row();
my @values = $row->values();
...
# @values contains ( '', 'Foo', '', 'Bar' )
=head2 next_cell()
The C<next_cell> method returns the next, non-blank cell in the current row.
my $cell = $row->next_cell();
It is usually used with a while loop. For example if we extracted data for the first row of the following spreadsheet we would get the values shown below:
-----------------------------------------------------------
| | A | B | C | D | ...
-----------------------------------------------------------
| 1 | | Foo | | Bar | ...
| 2 | | | | | ...
| 3 | | | | | ...
# Code:
...
while ( my $cell = $row->next_cell() ) {
my $value = $cell->value();
print $value, "\n";
}
...
# Output:
Foo
Bar
Note, for efficiency the C<next_cell()> method returns the next cell in the row. This may not be the next sequential cell. An option to read sequential cells, wheter they contain data or not will be added in a later release.
=head2 row_number()
The C<row_number()> method returns the zero-indexed row number for the current row:
my $row = $worksheet->next_row();
print $row->row_number(), "\n";
=head1 Cell
=head2 Cell Methods
The C<Cell> object is used to extract data from Excel cells:
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
die $reader->error() if !defined $workbook;
for my $worksheet ( $workbook->worksheets() ) {
while ( my $row = $worksheet->next_row() ) {
while ( my $cell = $row->next_cell() ) {
my $value = $cell->value();
...
}
}
}
The C<Cell> object has the following methods:
value()
row()
col()
For example if we extracted the data for the cells in the first row of the following spreadsheet we would get the values shown below:
-----------------------------------------------------------
| | A | B | C | D | ...
-----------------------------------------------------------
| 1 | | Foo | | Bar | ...
| 2 | | | | | ...
| 3 | | | | | ...
# Code:
...
while ( my $row = $worksheet->next_row() ) {
while ( my $cell = $row->next_cell() ) {
my $row = $cell->row();
my $col = $cell->col();
my $value = $cell->value();
print "Cell ($row, $col) = $value\n";
}
}
...
# Output:
Cell (0, 1) = Foo
Cell (0, 2) = Bar
=head2 value()
The Cell C<value()> method returns the unformatted value from the cell.
my $value = $cell->value();
The "value" of the cell can be a string or a number. In the case of a formula it returns the result of the formula and not the formal string. For dates it returns the numeric serial date.
=head2 row()
The Cell C<row()> method returns the zero-indexed row number of the cell.
my $row = $cell->row();
=head2 col()
The Cell C<col()> method returns the zero-indexed column number of the cell.
my $col = $cell->col();
=head1 EXAMPLE
Simple example of iterating through all worksheets in a workbook and printing out values from cells that contain data.
use strict;
use warnings;
use Excel::Reader::XLSX;
my $reader = Excel::Reader::XLSX->new();
my $workbook = $reader->read_file( 'Book1.xlsx' );
if ( !defined $workbook ) {
die $reader->error(), "\n";
}
for my $worksheet ( $workbook->worksheets() ) {
my $sheetname = $worksheet->name();
print "Sheet = $sheetname\n";
while ( my $row = $worksheet->next_row() ) {
while ( my $cell = $row->next_cell() ) {
my $row = $cell->row();
my $col = $cell->col();
my $value = $cell->value();
print " Cell ($row, $col) = $value\n";
}
}
}
=head1 RATIONALE
The rationale for this module is to have a fast memory efficient module for reading XLSX files. This is based on my experience of user requirements as the maintainer of Spreadsheet::ParseExcel.
=head1 SEE ALSO
Spreadsheet::XLSX, an XLSX reader using the old Spreadsheet::ParseExcel hash based interface: L<http://search.cpan.org/dist/Spreadsheet-XLSX/>.
SimpleXlsx, a "rudimentary extension to allow parsing of information stored in Microsoft Excel XLSX spreadsheets": L<http://search.cpan.org/dist/SimpleXlsx/>.
Excel::Writer::XLSX, an XLSX file writer based on the Spreadsheet::WriteExcel interface: L<http://search.cpan.org/dist/Excel-Writer-XLSX/>.
=head1 TODO
There are a lot of features still to be added. This module is very much a work in progress.
=over
=item * Reading from filehandles.
=item * Option to read sequential rows via C<next_row()>.
=item * Option to read dates instead of raw serial style numbers. This is actually harder than it would seem due to the XLSX format.
=item * Option to read formulas, urls, comments, images.
=item * Spreadsheet::ParseExcel style interface.
=item * Direct cell access.
=item * Cell format data.
=back
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 DISCLAIMER OF WARRANTY
Because this software is licensed free of charge, there is no warranty for the software, to the extent permitted by applicable law. Except when otherwise stated in writing the copyright holders and/or other parties provide the software "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the software is with you. Should the software prove defective, you assume the cost of all necessary servicing, repair, or correction.
In no event unless required by applicable law or agreed to in writing will any copyright holder, or any other party who may modify and/or redistribute the software as permitted by the above licence, be liable to you for damages, including any general, special, incidental, or consequential damages arising out of the use or inability to use the software (including but not limited to loss of data or data being rendered inaccurate or losses sustained by you or third parties or a failure of the software to operate with any other software), even if such holder or other party has been advised of the possibility of such damages.

@ -1,152 +0,0 @@
package Excel::Reader::XLSX::Cell;
###############################################################################
#
# Cell - A class for reading the Excel XLSX cells.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
use Carp;
use XML::LibXML::Reader;
use Excel::Reader::XLSX::Package::XMLreader;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_shared_strings} = shift;
$self->{_value} = '';
bless $self, $class;
return $self;
}
###############################################################################
#
# _init()
#
# Initialise a Cell object.
#
sub _init {
my $self = shift;
$self->{_value} = '';
$self->{_converted_string} = 0;
$self->{_has_formula} = 0;
}
###############################################################################
#
# value()
#
# Return the cell value.
#
sub value {
my $self = shift;
# If the cell type is a shared string convert the value index to a string.
if ( $self->{_type} eq 's' && !$self->{_converted_string} ) {
$self->{_value} =
$self->{_shared_strings}->_get_string( $self->{_value} );
# State variable so that multiple calls to value() don't need lookups.
$self->{_converted_string} = 1;
}
return $self->{_value};
}
###############################################################################
#
# row()
#
# Return the cell row number, zero-indexed.
#
sub row {
my $self = shift;
return $self->{_row};
}
###############################################################################
#
# col()
#
# Return the cell column number, zero indexed.
#
sub col {
my $self = shift;
return $self->{_col};
}
1;
__END__
=pod
=head1 NAME
Cell - A class for reading the Excel XLSX cells.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,161 +0,0 @@
package Excel::Reader::XLSX::Package::ContentTypes;
###############################################################################
#
# ContentTypes - A class for reading the Excel XLSX ContentTypes.xml file.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
use Exporter;
use Carp;
use XML::LibXML::Reader qw(:types);
use Excel::Reader::XLSX::Package::XMLreader;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_files} = {};
bless $self, $class;
return $self;
}
##############################################################################
#
# _read_node()
#
# Callback function to read the <Types> attributes of the ContentTypes file.
# We currently only read files/types that we are interested in.
#
sub _read_node {
my $self = shift;
my $node = shift;
# Only read the Override nodes.
return unless $node->name eq 'Override';
my $part_name = $node->getAttribute( 'PartName' );
my $content_type = $node->getAttribute( 'ContentType' );
# Strip leading directory separator from filename.
$part_name =~ s{^/}{};
if ( $part_name =~ /app\.xml$/ ) {
$self->{_files}->{_app} = $part_name;
return;
}
if ( $part_name =~ /core\.xml$/ ) {
$self->{_files}->{_core} = $part_name;
return;
}
if ( $part_name =~ /sharedStrings\.xml$/ ) {
$self->{_files}->{_shared_strings} = $part_name;
return;
}
if ( $part_name =~ /styles\.xml$/ ) {
$self->{_files}->{_styles} = $part_name;
return;
}
if ( $part_name =~ /workbook\.xml$/ ) {
# The workbook.xml.rels file isn't included in the ContentTypes but
# it is usually in the _rels dir at the same level at the workbook.xml.
my $workbook_rels = $part_name;
$workbook_rels =~ s{(workbook.xml)}{_rels/$1.rels};
$self->{_files}->{_workbook} = $part_name;
$self->{_files}->{_workbook_rels} = $workbook_rels;
return;
}
if ( $part_name =~ /sheet\d+\.xml$/ ) {
push @{ $self->{_files}->{_worksheets} }, $part_name;
return;
}
}
###############################################################################
#
# _get_files()
#
# Get a hash of of the files read from the ContentTypes file.
#
sub _get_files {
my $self = shift;
return %{ $self->{_files} };
}
1;
__END__
=pod
=head1 NAME
ContentTypes - A class for reading the Excel XLSX ContentTypes.xml file.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,129 +0,0 @@
## no critic
#package name differs from filename!
package Excel::Reader::XLSX::Package::Relationship;
###############################################################################
#
# Relationship - A class for reading the Excel XLSX Rels file.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
use Exporter;
use Carp;
use XML::LibXML::Reader qw(:types);
use Excel::Reader::XLSX::Package::XMLreader;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_rels} = {};
bless $self, $class;
return $self;
}
##############################################################################
#
# _read_node()
#
# Callback function to read the <Types> attributes of the Relationship file.
#
sub _read_node {
my $self = shift;
my $node = shift;
# Only read the Override nodes.
return unless $node->name eq 'Relationship';
my $id = $node->getAttribute( 'Id' );
my $type = $node->getAttribute( 'Type' );
my $target = $node->getAttribute( 'Target' );
my $target_mode = $node->getAttribute( 'TargetMode' );
$self->{_rels}->{$id} = {
_type => $type,
_target => $target,
_target_mode => $target_mode,
};
}
###############################################################################
#
# _get_relationships()
#
# Return a hash to the relationships.
#
sub _get_relationships {
my $self = shift;
return %{ $self->{_rels} };
}
1;
__END__
=pod
=head1 NAME
Relationship - A class for reading the Excel XLSX Rels file.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,180 +0,0 @@
package Excel::Reader::XLSX::Package::SharedStrings;
###############################################################################
#
# SharedStrings - A class for reading the Excel XLSX sharedStrings.xml file.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
use Exporter;
use Carp;
use XML::LibXML::Reader qw(:types);
use Excel::Reader::XLSX::Package::XMLreader;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
our $FULL_DEPTH = 1;
our $RICH_STRING = 1;
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_count} = 0;
$self->{_unique_count} = 0;
$self->{_strings} = [];
bless $self, $class;
return $self;
}
##############################################################################
#
# _read_all_nodes()
#
# Override callback function. TODO rename.
#
sub _read_all_nodes {
my $self = shift;
my $reader = $self->{_reader};
# Read the "shared string table" <sst> element for the count attributes.
if ( $reader->nextElement( 'sst' ) ) {
$self->{_count} = $reader->getAttribute( 'count' );
$self->{_unique_count} = $reader->getAttribute( 'uniqueCount' );
}
# Read the "string item" <si> elements.
while ( $reader->nextElement( 'si' ) ) {
my $string_node = $reader->copyCurrentNode( 1 );
my $text = $string_node->textContent();
push @{ $self->{_strings} }, $text;
# push @{ $self->{_strings} },
# [ $RICH_STRING, $string, $rich_string ];
}
}
##############################################################################
#
# _read_rich_string()
#
# Read a rich string from an <si> element. A rich string is a string with
# multiple formats. The rich string is stored as a series of text "runs"
# denoted by <r> child elements. This function returns the raw string
# without formatting and the xml string with formatting.
#
sub _read_rich_string {
my $self = shift;
my $node = shift;
my $string = '';
my $rich_string = '';
# Get the nodes for the text runs <r>.
for my $run_node ( $node->childNodes() ) {
next unless $run_node->nodeName eq 'r';
$rich_string .= $run_node->toString();
# Get the nodes for the text <t>.
for my $text_node ( $run_node->childNodes() ) {
next unless $text_node->nodeName eq 't';
$string .= $text_node->textContent();
}
}
return ( $string, $rich_string );
}
###############################################################################
#
# _get_string()
#
# Get the shared string at the indexed value.
#
sub _get_string {
my $self = shift;
my $index = shift;
# Return an empty string is the index is out of bounds.
return '' if $index < 0;
return '' if $index >= $self->{_unique_count};
my $string = $self->{_strings}->[$index];
# For rich strings return the unformatted part of the string.
if ( ref $string && $string->[0] == 1 ) {
$string = $string->[1];
}
return $string;
}
1;
__END__
=pod
=head1 NAME
SharedStrings - A class for reading the Excel XLSX sharedStrings.xml file.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,185 +0,0 @@
package Excel::Reader::XLSX::Package::XMLreader;
###############################################################################
#
# XMLreader - A class for reading Excel XLSX XML files.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
use Exporter;
use Carp;
use XML::LibXML::Reader qw(:types);
our @ISA = qw(Exporter);
our $VERSION = '0.00';
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = { _reader => undef };
bless $self, $class;
return $self;
}
##############################################################################
#
# _read_file()
#
# Create an XML::LibXML::Reader instance from a file.
#
sub _read_file {
my $self = shift;
my $filename = shift;
my $xml_reader = XML::LibXML::Reader->new(
location => $filename,
no_blanks => 1
);
$self->{_reader} = $xml_reader;
return $xml_reader;
}
##############################################################################
#
# _read_string()
#
# Create an XML::LibXML::Reader instance from a string. Used mainly for
# testing.
#
sub _read_string {
my $self = shift;
my $string = shift;
my $xml_reader = XML::LibXML::Reader->new(
string => $string,
no_blanks => 1
);
$self->{_reader} = $xml_reader;
return $xml_reader;
}
##############################################################################
#
# _read_filehandle()
#
# Create an XML::LibXML::Reader instance from a filehandle. Used mainly for
# testing.
#
sub _read_filehandle {
my $self = shift;
my $filehandle = shift;
my $xml_reader = XML::LibXML::Reader->new(
IO => $filehandle,
no_blanks => 1
);
$self->{_reader} = $xml_reader;
return $xml_reader;
}
##############################################################################
#
# _read_all_nodes()
#
# Read all the nodes of an Excel XML file using an XML::LibXML::Reader
# instance. Sub-classes will provide the _read_node() method.
#
sub _read_all_nodes {
my $self = shift;
while ( $self->{_reader}->read() ) {
$self->_read_node( $self->{_reader} );
}
}
##############################################################################
#
# _parse_file()
#
# Shortcut for the most common use case: _read_file() + _read_all_nodes().
#
sub _parse_file {
my $self = shift;
my $filename = shift;
my $xml_reader = $self->_read_file( $filename );
$self->_read_all_nodes();
return $xml_reader;
}
1;
__END__
=pod
=head1 NAME
XMLreader - A class for reading Excel XLSX XML files.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,284 +0,0 @@
package Excel::Reader::XLSX::Row;
###############################################################################
#
# Row - A class for reading Excel XLSX rows.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
## no critic
use Carp;
use XML::LibXML::Reader;
use Excel::Reader::XLSX::Cell;
use Excel::Reader::XLSX::Package::XMLreader;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
our $FULL_DEPTH = 1;
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_reader} = shift;
$self->{_shared_strings} = shift;
$self->{_cell} = shift;
bless $self, $class;
return $self;
}
###############################################################################
#
# _init()
#
# TODO.
#
sub _init {
my $self = shift;
$self->{_row_number} = shift;
$self->{_previous_row_number} = shift;
$self->{_row_is_empty} = $self->{_reader}->isEmptyElement();
$self->{_values} = undef;
# TODO. Make the cell initialisation a lazy load.
# Read the child cell nodes.
my $row_node = $self->{_reader}->copyCurrentNode( $FULL_DEPTH );
my @cell_nodes = $row_node->getChildrenByTagName( 'c' );
$self->{_cells} = \@cell_nodes;
$self->{_max_cell_index} = scalar @cell_nodes;
$self->{_next_cell_index} = 0;
}
###############################################################################
#
# next_cell()
#
# Get the cell cell in the current row.
#
sub next_cell {
my $self = shift;
my $cell;
return if $self->{_row_is_empty};
return if $self->{_next_cell_index} >= $self->{_max_cell_index};
my $cell_node = $self->{_cells}->[ $self->{_next_cell_index} ];
my $range = $cell_node->getAttribute( 'r' );
return unless $range;
# Create or re-use (for efficiency) a Cell object.
$cell = $self->{_cell};
$cell->_init();
( $cell->{_row}, $cell->{_col} ) = _range_to_rowcol( $range );
my $type = $cell_node->getAttribute( 't' );
$cell->{_type} = $type || '';
# Read the cell <c> child nodes.
for my $child_node ( $cell_node->childNodes() ) {
my $node_name = $child_node->nodeName();
if ( $node_name eq 'v' ) {
$cell->{_value} = $child_node->textContent();
$cell->{_has_value} = 1;
}
if ( $node_name eq 'is' ) {
$cell->{_value} = $child_node->textContent();
$cell->{_has_value} = 1;
}
elsif ( $node_name eq 'f' ) {
$cell->{_formula} = $child_node->textContent();
$cell->{_has_formula} = 1;
}
}
$self->{_next_cell_index}++;
return $cell;
}
###############################################################################
#
# values()
#
# Return an array of values for a row. The range is from the first cell up
# to the last cell. Returns '' for empty cells.
#
sub values {
my $self = shift;
my @values;
# The row values are cached to allow multiple calls. Return cached values
# if present.
if ( defined $self->{_values} ) {
return @{ $self->{_values} };
}
# Other wise read the values for the cells in the row.
# Store any cell values that exist.
while ( my $cell = $self->next_cell() ) {
my $col = $cell->col();
my $value = $cell->value();
$values[$col] = $value;
}
# Convert any undef values to an empty string.
for my $value ( @values ) {
$value = '' if !defined $value;
}
# Store the values to allow multiple calls return the same data.
$self->{_values} = \@values;
return @values;
}
###############################################################################
#
# row_number()
#
# Return the row number, zero-indexed.
#
sub row_number {
my $self = shift;
return $self->{_row_number};
}
###############################################################################
#
# previous_number()
#
# Return the zero-indexed row number of the previously found row. Returns -1
# if there was no previous number.
#
sub previous_number {
my $self = shift;
return $self->{_previous_row_number};
}
#
# Internal methods.
#
###############################################################################
#
# _range_to_rowcol($range)
#
# Convert an Excel A1 style ref to a zero indexed row and column.
#
sub _range_to_rowcol {
my ( $col, $row ) = split /(\d+)/, shift;
$row--;
my $length = length $col;
if ( $length == 1 ) {
$col = -65 + ord( $col );
}
elsif ( $length == 2 ) {
my @chars = split //, $col;
$col = -1729 + ord( $chars[1] ) + 26 * ord( $chars[0] );
}
else {
my @chars = split //, $col;
$col =
-44_993 +
ord( $chars[2] ) +
26 * ord( $chars[1] ) +
676 * ord( $chars[0] );
}
return $row, $col;
}
1;
__END__
=pod
=head1 NAME
Row - A class for reading Excel XLSX rows.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,265 +0,0 @@
package Excel::Reader::XLSX::Workbook;
###############################################################################
#
# Workbook - A class for reading the Excel XLSX workbook.xml file.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
use Exporter;
use Carp;
use XML::LibXML::Reader qw(:types);
use Excel::Reader::XLSX::Worksheet;
use Excel::Reader::XLSX::Package::Relationships;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
###############################################################################
#
# Public and private API methods.
#
###############################################################################
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $package_dir = shift;
my $shared_strings = shift;
my %files = @_;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_package_dir} = $package_dir;
$self->{_shared_strings} = $shared_strings;
$self->{_files} = \%files;
$self->{_worksheets} = undef;
$self->{_worksheet_properties} = [];
$self->{_worksheet_indices} = {};
# Set the root dir for the workbook and worksheets. Usually 'xl/'.
$self->{_workbook_root} = $self->{_files}->{_workbook};
$self->{_workbook_root} =~ s/workbook.xml$//;
bless $self, $class;
$self->_set_relationships();
return $self;
}
###############################################################################
#
# _set_relationships()
#
# Set up the Excel relationship links between package files and the
# internal ids.
#
sub _set_relationships {
my $self = shift;
my $filename = shift;
my $rels_file = Excel::Reader::XLSX::Package::Relationship->new();
$rels_file->_parse_file(
$self->{_package_dir} . $self->{_files}->{_workbook_rels} );
my %rels = $rels_file->_get_relationships();
$self->{_rels} = \%rels;
}
##############################################################################
#
# _read_node()
#
# Callback function to read the nodes of the Workbook.xml file.
#
sub _read_node {
my $self = shift;
my $node = shift;
# Only process the start elements.
return unless $node->nodeType() == XML_READER_TYPE_ELEMENT;
if ( $node->name eq 'sheet' ) {
my $name = $node->getAttribute( 'name' );
my $sheet_id = $node->getAttribute( 'sheetId' );
my $rel_id = $node->getAttribute( 'r:id' );
# Use the package relationship data to convert the r:id to a filename.
my $filename = $self->{_rels}->{$rel_id}->{_target};
# Store the properties to set up a Worksheet reader object.
push @{ $self->{_worksheet_properties} },
{
_name => $name,
_sheet_id => $sheet_id,
_index => $sheet_id - 1,
_rel_id => $rel_id,
_filename => $filename,
};
}
}
###############################################################################
#
# worksheets()
#
# Return an array of Worksheet objects.
#
sub worksheets {
my $self = shift;
# Read the worksheet data if it hasn't already been read.
if ( !defined $self->{_worksheets} ) {
$self->_read_worksheets();
}
return @{ $self->{_worksheets} };
}
###############################################################################
#
# worksheet()
#
# Return a Worksheet object based on its sheetname or index. Unknown sheet-
# names or out of range indices return an undef object.
#
sub worksheet {
my $self = shift;
my $index = shift;
my $name = $index;
# Ensure some parameter was passed.
return unless defined $index;
# Read the worksheet data if it hasn't already been read.
if ( !defined $self->{_worksheets} ) {
$self->_read_worksheets();
}
# Convert a valid sheetname to an index.
if ( exists $self->{_worksheet_indices}->{$name} ) {
$index = $self->{_worksheet_indices}->{$name};
}
# Check if it is a valid index.
return if $index !~ /^[-\d]+$/;
return $self->{_worksheets}->[$index];
}
###############################################################################
#
# Internal methods.
#
###############################################################################
###############################################################################
#
# _read_worksheets()
#
# Parse the workbook and set up the Worksheet objects.
#
sub _read_worksheets {
my $self = shift;
# Return if the worksheet data has already been read.
return if defined $self->{_worksheets};
# Iterate through the worksheet properties and set up a Worksheet object.
for my $sheet ( @{ $self->{_worksheet_properties} } ) {
# Create a new Worksheet reader.
my $worksheet = Excel::Reader::XLSX::Worksheet->new(
$self->{_shared_strings},
$sheet->{_name},
$sheet->{_index},
);
# Set up the file to read. We don't read data until it is required.
$worksheet->_read_file(
$self->{_package_dir}
. $self->{_workbook_root}
. $sheet->{_filename}
);
# Store the Worksheet reader objects.
push @{ $self->{_worksheets} }, $worksheet;
# Store the Worksheet index so it can be looked up by name.
$self->{_worksheet_indices}->{ $sheet->{_name} } = $sheet->{_index};
}
}
1;
__END__
=pod
=head1 NAME
Workbook - A class for reading the Excel XLSX workbook.xml file.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut

@ -1,199 +0,0 @@
package Excel::Reader::XLSX::Worksheet;
###############################################################################
#
# Worksheet - A class for reading the Excel XLSX sheet.xml file.
#
# Used in conjunction with Excel::Reader::XLSX
#
# Copyright 2012, John McNamara, jmcnamara@cpan.org
#
# Documentation after __END__
#
# perltidy with the following options: -mbl=2 -pt=0 -nola
use 5.008002;
use strict;
use warnings;
## no critic
use Carp;
use Excel::Reader::XLSX::Package::XMLreader;
use Excel::Reader::XLSX::Row;
our @ISA = qw(Excel::Reader::XLSX::Package::XMLreader);
our $VERSION = '0.00';
###############################################################################
#
# Public and private API methods.
#
###############################################################################
###############################################################################
#
# new()
#
# Constructor.
#
sub new {
my $class = shift;
my $self = Excel::Reader::XLSX::Package::XMLreader->new();
$self->{_shared_strings} = shift;
$self->{_name} = shift;
$self->{_index} = shift;
$self->{_previous_row_number} = -1;
bless $self, $class;
return $self;
}
###############################################################################
#
# _init_row()
#
# TODO.
#
sub _init_row {
my $self = shift;
# Store reusable Cell object to avoid repeated calls to Cell::new().
$self->{_cell} = Excel::Reader::XLSX::Cell->new( $self->{_shared_strings} );
# Store reusable Row object to avoid repeated calls to Row::new().
$self->{_row} = Excel::Reader::XLSX::Row->new(
$self->{_reader},
$self->{_shared_strings},
$self->{_cell},
);
$self->{_row_initialised} = 1;
}
###############################################################################
#
# next_row()
#
# Read the next available row in the worksheet.
#
sub next_row {
my $self = shift;
my $row = undef;
# Read the next "row" element in the file.
return unless $self->{_reader}->nextElement( 'row' );
# Read the row attributes.
my $row_reader = $self->{_reader};
my $row_number = $row_reader->getAttribute( 'r' );
# Zero index the row number.
if ( defined $row_number ) {
$row_number--;
}
else {
# If no 'r' attribute assume it is one more than the previous.
$row_number = $self->{_previous_row_number} + 1;
}
if ( !$self->{_row_initialised} ) {
$self->_init_row();
}
$row = $self->{_row};
$row->_init( $row_number, $self->{_previous_row_number}, );
$self->{_previous_row_number} = $row_number;
return $row;
}
###############################################################################
#
# name()
#
# Return the worksheet name.
#
sub name {
my $self = shift;
return $self->{_name};
}
###############################################################################
#
# index()
#
# Return the worksheet index.
#
sub index {
my $self = shift;
return $self->{_index};
}
###############################################################################
#
# Internal methods.
#
###############################################################################
1;
__END__
=pod
=head1 NAME
Worksheet - A class for reading the Excel XLSX sheet.xml file.
=head1 SYNOPSIS
See the documentation for L<Excel::Reader::XLSX>.
=head1 DESCRIPTION
This module is used in conjunction with L<Excel::Reader::XLSX>.
=head1 AUTHOR
John McNamara jmcnamara@cpan.org
=head1 COPYRIGHT
Copyright MMXII, John McNamara.
All Rights Reserved. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself.
=head1 LICENSE
Either the Perl Artistic Licence L<http://dev.perl.org/licenses/artistic.html> or the GPL L<http://www.opensource.org/licenses/gpl-license.php>.
=head1 DISCLAIMER OF WARRANTY
See the documentation for L<Excel::Reader::XLSX>.
=cut
Loading…
Cancel
Save