You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bulk-processor/lib/NGCP/BulkProcessor/Table.pm

235 lines
3.7 KiB

package NGCP::BulkProcessor::Table;
use strict;
## no critic
use Digest::MD5;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(get_rowhash);
sub new {
my $class = shift;
my $self = bless {}, $class;
$self->_set_data($_[0],$_[1]);
return $self;
}
sub _set_data {
my $self = shift;
my ($data,$dupecheck) = @_;
$self->clear();
if (defined $data and ref $data eq 'ARRAY') {
if ($dupecheck) {
foreach my $row (@$data) {
$self->addrow_ref_nodupe($row);
}
} else {
foreach my $row (@$data) {
$self->addrow_ref($row);
}
}
}
}
sub clear {
my $self = shift;
$self->{data} = [];
$self->{rowhashes} = {};
}
sub data_ref {
my $self = shift;
if ($_[0]) {
#if argument, set the value
$self->_set_data($_[0],$_[1]);
} else {
return $self->{data};
}
}
sub addrow {
my $self = shift;
#my @row = @_;
return $self->addrow_ref(\@_);
}
sub addrow_nodupe {
my $self = shift;
#my @row = @_;
return $self->addrow_ref_nodupe(\@_);
}
sub addrow_ref {
my $self = shift;
my $row_ref = shift;
my $rowhash = get_rowhash($row_ref);
my $itemcount = 0;
if (defined $rowhash) {
if (not exists $self->{rowhashes}->{$rowhash}) {
$self->{rowhashes}->{$rowhash} = 0;
}
$itemcount = $self->{rowhashes}->{$rowhash} + 1;
$self->{rowhashes}->{$rowhash} = $itemcount;
push @{$self->{data}},$row_ref;
}
return $itemcount;
}
sub addrow_ref_nodupe {
my $self = shift;
my $row_ref = shift;
my $rowhash = get_rowhash($row_ref);
my $itemcount = 0;
if (defined $rowhash) {
if (not exists $self->{rowhashes}->{$rowhash}) {
$self->{rowhashes}->{$rowhash} = 1;
$itemcount = 1;
push @{$self->{data}},$row_ref;
} else {
$itemcount = $self->{rowhashes}->{$rowhash};
}
}
return $itemcount;
}
sub rowexists {
my $self = shift;
#my @row = @_;
return $self->rowexists_ref(\@_);
}
sub rowexists_ref {
my $self = shift;
my $row_ref = shift;
my $rowhash = get_rowhash($row_ref);
my $itemcount = 0;
if (defined $rowhash) {
if (exists $self->{rowhashes}->{$rowhash}) {
return 1;
}
}
return 0;
}
sub get_rowhash {
my $row_ref = shift;
if (defined $row_ref and ref $row_ref eq 'ARRAY') {
my $md5 = Digest::MD5->new;
foreach my $element (@$row_ref) {
$md5->add($element);
}
return $md5->hexdigest;
} else {
return undef;
}
}
sub rowcount {
my $self = shift;
#my @rows = @{$self->{data}};
return scalar @{$self->{data}}; # + 1;
}
sub element {
my $self = shift;
return $self->{data}->[$_[0]]->[$_[1]];
}
sub getrow {
my $self = shift;
my $row_ref = $self->{data}->[$_[0]];
if ($row_ref) {
return @$row_ref;
} else {
return ();
}
}
sub getrow_ref {
my $self = shift;
my $row_ref = $self->{data}->[$_[0]];
if ($row_ref) {
return $row_ref;
} else {
return [];
}
}
sub getcol {
my $self = shift;
my @col = ();
for (my $i = 0; $i < $self->rowcount(); $i++) {
push(@col,$self->{data}->[$i]->[$_[0]]);
}
return @col;
}
sub getcol_ref {
my $self = shift;
my @col = $self->getcol($_[0]);
return \@col;
}
sub sortrows {
my $self = shift;
my $sortfunction = shift;
my @new_rows = sort $sortfunction @{$self->{data}};
#$self->_set_data(\@new_rows);
# since sorting can not affect uniqueness of rows and rowhashes, we just set:
$self->{data} = \@new_rows;
}
sub tostring {
my $self = shift;
my @rows = @{$self->{data}};
my $result = '';
my $row_ref;
for (my $i = 0; $i < scalar @rows; $i++) {
$row_ref = $rows[$i];
$result .= join($_[0],@$row_ref) . $_[1];
}
return substr($result,0,length($result) - length($_[1]));
}
1;