You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
268 lines
7.9 KiB
268 lines
7.9 KiB
# retrieve files from emails
|
|
|
|
## no critic
|
|
|
|
package NGCP::BulkProcessor::AttachmentDownloader;
|
|
use strict;
|
|
|
|
use NGCP::BulkProcessor::Logging qw(
|
|
getlogger
|
|
attachmentdownloaderdebug
|
|
attachmentdownloaderinfo
|
|
);
|
|
use NGCP::BulkProcessor::LogError qw(
|
|
fileerror
|
|
attachmentdownloadererror
|
|
attachmentdownloaderwarn
|
|
);
|
|
|
|
use Email::MIME;
|
|
use Email::MIME::Attachment::Stripper;
|
|
use URI::Find;
|
|
#use File::Fetch;
|
|
#use LWP::Simple;
|
|
use LWP::UserAgent;
|
|
use HTTP::Request;
|
|
#use HTTP::Cookies;
|
|
|
|
use NGCP::BulkProcessor::Utils qw(humanize_bytes changemod);
|
|
|
|
require Exporter;
|
|
our @ISA = qw(Exporter);
|
|
our @EXPORT_OK = qw(
|
|
$attachment_no_match
|
|
$attachment_match
|
|
$attachment_found
|
|
);
|
|
|
|
our $attachment_no_match = 0;
|
|
our $attachment_match = 1;
|
|
our $attachment_found = 2;
|
|
|
|
#my $logger = getlogger(__PACKAGE__);
|
|
|
|
sub new {
|
|
|
|
my ($class,$derived_class,@params) = @_;
|
|
my $self = bless {}, $derived_class;
|
|
$self->{download_urls} = 0;
|
|
$self->setup(@params);
|
|
return $self;
|
|
|
|
}
|
|
|
|
sub setup {
|
|
|
|
my $self = shift;
|
|
my (@params) = @_;
|
|
notimplementederror((ref $self) . ': ' . (caller(0))[3] . ' not implemented',getlogger(__PACKAGE__));
|
|
|
|
}
|
|
|
|
sub logout {
|
|
|
|
my $self = shift;
|
|
my (@params) = @_;
|
|
notimplementederror((ref $self) . ': ' . (caller(0))[3] . ' not implemented',getlogger(__PACKAGE__));
|
|
|
|
}
|
|
|
|
sub download {
|
|
|
|
my $self = shift;
|
|
my ($filedir) = @_;
|
|
notimplementederror((ref $self) . ': ' . (caller(0))[3] . ' not implemented',getlogger(__PACKAGE__));
|
|
|
|
}
|
|
|
|
sub _process_message {
|
|
|
|
my $self = shift;
|
|
my ($subject,$message_string,$filedir,$files_saved) = @_;
|
|
|
|
|
|
#if (length($message_string)) {
|
|
|
|
attachmentdownloaderinfo('processing message "' . $subject . '"',getlogger(__PACKAGE__));
|
|
|
|
my $parsed = Email::MIME->new($message_string);
|
|
|
|
my $found = ($self->{download_urls} ? $self->_process_bodies($parsed,$subject,$filedir,$files_saved) : 0);
|
|
$found = $self->_process_attachments($parsed,$subject,$filedir,$files_saved) if !$found;
|
|
|
|
|
|
|
|
|
|
#}
|
|
|
|
return $found;
|
|
|
|
|
|
}
|
|
|
|
sub _process_attachments {
|
|
my ($self,$parsed,$subject,$filedir,$files_saved,@attachments) = @_;
|
|
|
|
my $found = 0;
|
|
|
|
unless (scalar @attachments) {
|
|
my $stripper = Email::MIME::Attachment::Stripper->new($parsed, (force_filename => 1));
|
|
|
|
@attachments = $stripper->attachments();
|
|
}
|
|
|
|
foreach my $attachment (@attachments) {
|
|
$attachment->{subject} = $subject;
|
|
$attachment->{size} = length($attachment->{payload});
|
|
$attachment->{match} = undef;
|
|
if (defined $self->{checkfilenamecode} and ref $self->{checkfilenamecode} eq 'CODE') {
|
|
my $match = &{$self->{checkfilenamecode}}($attachment);
|
|
if ($match == $attachment_no_match) {
|
|
attachmentdownloaderinfo('attachment ' . $attachment->{filename} . ' (' . kbytes2gigs(int($attachment->{size} / 1024), undef, 1) . ' ' . $attachment->{content_type} . ') skipped',$logger);
|
|
next;
|
|
} elsif ($match == $attachment_found) {
|
|
attachmentdownloaderinfo('attachment ' . $attachment->{filename} . ' (' . kbytes2gigs(int($attachment->{size} / 1024), undef, 1) . ' ' . $attachment->{content_type} . ') found',$logger);
|
|
$found = 1;
|
|
} elsif ($match == $attachment_match) {
|
|
attachmentdownloaderinfo('attachment ' . $attachment->{filename} . ' (' . kbytes2gigs(int($attachment->{size} / 1024), undef, 1) . ' ' . $attachment->{content_type} . ') matched',$logger);
|
|
} else {
|
|
attachmentdownloaderwarn('attachment ' . $attachment->{filename} . ' (' . kbytes2gigs(int($attachment->{size} / 1024), undef, 1) . ' ' . $attachment->{content_type} . ') - unknown match, skipped',$logger);
|
|
next;
|
|
}
|
|
}
|
|
|
|
_save_file($attachment,$filedir,$files_saved);
|
|
|
|
}
|
|
return $found;
|
|
}
|
|
|
|
sub _save_file {
|
|
|
|
my ($attachment,$filedir,$files_saved) = @_;
|
|
my $filepath = $filedir . $attachment->{filename};
|
|
|
|
unlink $filepath;
|
|
|
|
local *ATTACHMENTFILE;
|
|
if (not open (ATTACHMENTFILE,'>' . $filepath)) {
|
|
fileerror('cannot open file ' . $filepath . ': ' . $!,getlogger(__PACKAGE__));
|
|
return; # $files_saved;
|
|
}
|
|
binmode(ATTACHMENTFILE);
|
|
print ATTACHMENTFILE $attachment->{payload};
|
|
|
|
close(ATTACHMENTFILE);
|
|
changemod($filepath);
|
|
|
|
push(@$files_saved,{ saved => $filepath, match => $attachment->{match} });
|
|
|
|
attachmentdownloaderinfo('attachment saved: ' . $filepath,getlogger(__PACKAGE__));
|
|
}
|
|
|
|
sub _process_bodies {
|
|
|
|
my ($self,$parsed,$subject,$filedir,$files_saved) = @_;
|
|
|
|
my $found = 0;
|
|
|
|
$parsed->walk_parts(sub {
|
|
my ($part) = @_;
|
|
return if $found;
|
|
if ((scalar $part->subparts) > 0) {
|
|
foreach my $subpart ($part->subparts) {
|
|
if (!$found) {
|
|
$found = $self->_process_body($subpart,$subject,$found,$filedir,$files_saved);
|
|
} else {
|
|
last;
|
|
}
|
|
}
|
|
} else {
|
|
$found = $self->_process_body($part,$subject,$found,$filedir,$files_saved);
|
|
}
|
|
});
|
|
|
|
return $found;
|
|
}
|
|
|
|
sub _process_body {
|
|
my ($self,$part,$subject,$found,$filedir,$files_saved) = @_;
|
|
|
|
if ($part->content_type =~ m/text\//i) {
|
|
my %uris;
|
|
my $finder = URI::Find->new(sub {
|
|
my ($uri,$orig_uri) = @_;
|
|
my $url = $uri->as_string;
|
|
if ($url =~ /^http/i) {
|
|
$uris{$url} = undef;
|
|
}
|
|
});
|
|
my $body = $part->body;
|
|
$finder->find(\$body);
|
|
if ((scalar keys %uris) > 0) {
|
|
foreach my $uri (sort keys %uris) {
|
|
my $attachment = _download_file($uri);
|
|
if ($attachment) {
|
|
$attachment->{subject} = $subject;
|
|
$attachment->{size} = length($attachment->{payload});
|
|
$attachment->{match} = undef;
|
|
|
|
if (defined $self->{checkfilenamecode} and ref $self->{checkfilenamecode} eq 'CODE') {
|
|
my $match = &{$self->{checkfilenamecode}}($attachment);
|
|
if ($match == $attachment_no_match) {
|
|
attachmentdownloaderinfo('attachment ' . $attachment->{filename} . ' (' . humanize_bytes($attachment->{size}, undef, 1) . ' ' . $attachment->{content_type} . ') skipped',getlogger(__PACKAGE__));
|
|
next;
|
|
} elsif ($match == $attachment_found) {
|
|
attachmentdownloaderinfo('attachment ' . $attachment->{filename} . ' (' . humanize_bytes($attachment->{size}, undef, 1) . ' ' . $attachment->{content_type} . ') found',getlogger(__PACKAGE__));
|
|
$found = 1;
|
|
} elsif ($match == $attachment_match) {
|
|
attachmentdownloaderinfo('attachment ' . $attachment->{filename} . ' (' . humanize_bytes($attachment->{size}, undef, 1) . ' ' . $attachment->{content_type} . ') matched',getlogger(__PACKAGE__));
|
|
} else {
|
|
attachmentdownloaderwarn('attachment ' . $attachment->{filename} . ' (' . humanize_bytes($attachment->{size}, undef, 1) . ' ' . $attachment->{content_type} . ') - unknown match, skipped',getlogger(__PACKAGE__));
|
|
next;
|
|
}
|
|
}
|
|
|
|
_save_file($attachment,$filedir,$files_saved);
|
|
}
|
|
}
|
|
} else {
|
|
attachmentdownloaderinfo("no urls for download found in part '" . $part->content_type . "'",getlogger(__PACKAGE__));
|
|
}
|
|
}
|
|
|
|
return $found;
|
|
}
|
|
|
|
sub _download_file { # .. dropbox links and the like
|
|
my ($uri) = @_;
|
|
my $ua = LWP::UserAgent->new;
|
|
$ua->timeout(10);
|
|
$ua->ssl_opts(
|
|
verify_hostname => 0,
|
|
);
|
|
$ua->cookie_jar({});
|
|
my $request = HTTP::Request->new('GET', $uri);
|
|
attachmentdownloaderinfo('downloading ' . $uri,getlogger(__PACKAGE__));
|
|
my $response = $ua->request($request);
|
|
if ($response->code == 200) {
|
|
my $attachment = {};
|
|
$attachment->{uri} = $uri;
|
|
$attachment->{payload} = $response->decoded_content( charset => 'none' );
|
|
#$attachment->{size} = $response->header('content-length'); # -s $attachment->{payload};
|
|
($attachment->{filename}) = ($response->header('Content-Disposition') =~ m/"([^"]+)"/);
|
|
return $attachment;
|
|
} else {
|
|
attachmentdownloaderwarn('downloading ' . $uri . ' failed',getlogger(__PACKAGE__));
|
|
}
|
|
return undef;
|
|
}
|
|
|
|
sub DESTROY {
|
|
|
|
my $self = shift;
|
|
$self->logout();
|
|
}
|
|
|
|
1;
|