bulk-processor/lib/NGCP/BulkProcessor/Table.pm

package NGCP::BulkProcessor::Table;
use strict;

## no critic

use Digest::MD5;

require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(get_rowhash);

sub new {

  my $class = shift;
  my $self = bless {}, $class;
  $self->_set_data($_[0],$_[1]);
  return $self;

}

sub _set_data {

  my $self = shift;
  my ($data,$dupecheck) = @_;
  $self->clear();
  if (defined $data and ref $data eq 'ARRAY') {
    if ($dupecheck) {
      foreach my $row (@$data) {
        $self->addrow_ref_nodupe($row);
      }
    } else {
      foreach my $row (@$data) {
        $self->addrow_ref($row);
      }
    }
  }

}

sub clear {

  my $self = shift;
  $self->{data} = [];
  $self->{rowhashes} = {};

}

sub data_ref {

  my $self = shift;
  if ($_[0]) {
    #if argument, set the value
    $self->_set_data($_[0],$_[1]);
  } else {
    return $self->{data};
  }

}

sub addrow {

  my $self = shift;
  #my @row = @_;
  return $self->addrow_ref(\@_);

}

sub addrow_nodupe {

  my $self = shift;
  #my @row = @_;
  return $self->addrow_ref_nodupe(\@_);

}

sub addrow_ref {

  my $self = shift;
  my $row_ref = shift;
  my $rowhash = get_rowhash($row_ref);
  my $itemcount = 0;
  if (defined $rowhash) {
    if (not exists $self->{rowhashes}->{$rowhash}) {
      $self->{rowhashes}->{$rowhash} = 0;
    }
    $itemcount = $self->{rowhashes}->{$rowhash} + 1;
    $self->{rowhashes}->{$rowhash} = $itemcount;
    push @{$self->{data}},$row_ref;
  }
  return $itemcount;

}

sub addrow_ref_nodupe {

  my $self = shift;
  my $row_ref = shift;
  my $rowhash = get_rowhash($row_ref);
  my $itemcount = 0;
  if (defined $rowhash) {
    if (not exists $self->{rowhashes}->{$rowhash}) {
      $self->{rowhashes}->{$rowhash} = 1;
      $itemcount = 1;
      push @{$self->{data}},$row_ref;
    } else {
      $itemcount = $self->{rowhashes}->{$rowhash};
    }
  }
  return $itemcount;

}

sub rowexists {

  my $self = shift;
  #my @row = @_;
  return $self->rowexists_ref(\@_);

}

sub rowexists_ref {

  my $self = shift;
  my $row_ref = shift;
  my $rowhash = get_rowhash($row_ref);
  my $itemcount = 0;
  if (defined $rowhash) {
    if (exists $self->{rowhashes}->{$rowhash}) {
      return 1;
    }
  }
  return 0;

}

sub get_rowhash {

  my $row_ref = shift;
  if (defined $row_ref and ref $row_ref eq 'ARRAY') {
    my $md5 = Digest::MD5->new;
    foreach my $element (@$row_ref) {
      $md5->add($element);
    }
    return $md5->hexdigest;
  } else {
    return undef;
  }

}

sub rowcount {

  my $self = shift;
  #my @rows = @{$self->{data}};
  return scalar @{$self->{data}}; # + 1;

}

sub element {

  my $self = shift;
  return $self->{data}->[$_[0]]->[$_[1]];

}

sub getrow {

  my $self = shift;
  my $row_ref = $self->{data}->[$_[0]];
  if ($row_ref) {
    return @$row_ref;
  } else {
    return ();
  }

}

sub getrow_ref {

  my $self = shift;
  my $row_ref = $self->{data}->[$_[0]];
  if ($row_ref) {
    return $row_ref;
  } else {
    return [];
  }

}

sub getcol {

  my $self = shift;
  my @col = ();
  for (my $i = 0; $i < $self->rowcount(); $i++) {
    push(@col,$self->{data}->[$i]->[$_[0]]);
  }
  return @col;

}

sub getcol_ref {

  my $self = shift;
  my @col = $self->getcol($_[0]);
  return \@col;

}

sub sortrows {

  my $self = shift;
  my $sortfunction = shift;
  my @new_rows = sort $sortfunction @{$self->{data}};
  #$self->_set_data(\@new_rows);
  # since sorting can not affect uniqueness of rows and rowhashes, we just set:
  $self->{data} = \@new_rows;

}

sub tostring {

  my $self = shift;
  my @rows = @{$self->{data}};
  my $result = '';
  my $row_ref;
  for (my $i = 0; $i < scalar @rows; $i++) {
    $row_ref = $rows[$i];
    $result .= join($_[0],@$row_ref) . $_[1];
  }
  return substr($result,0,length($result) - length($_[1]));

}

1;