Perl http log file parser
This is my http log file parse. It will tell you who is crawling your site. Hope you like it
#!/usr/local/perl
use strict;
my @files=<*_access.log*>;my %address=();
my %agents=();
foreach my $file (@files){
my $ggl=0;
my $yahoo=0;
my $cuil=0;
my $twiceler=0;
my $Jeeves=0;
my $Yandex=0;
my $legs=0;
my $Baiduspider=0;
my $dotnetdotcom=0;
my $msn=0;
my $seoprofiler=0;open (IN, “$file”);
my $outfile=”$file.txt”;
open (OUT, “>$outfile”);
while (<IN>)
{
my $orig_line=$_;
my ($line) = $orig_line;
#$line=~ s/\///g;
my @arr= split (/\”/, $line);
my $ip=shift(@arr);
my $agent=pop(@arr);
$agent=pop(@arr);
@arr=split(/ /,$ip);
$ip=shift(@arr);
if (exists $address{$ip})
{
my $count= $address { $ip } ;
$count++;
# print $ip .” “.$count.”\n”;
$address{$ip}=$count;
}
else
{
$address{$ip}=1;
$agents{$ip}=$agent;
}if ($line =~ /twiceler/ or $line =~ /cuil\.com/ or $line =~ /Yahoo\! Slurp/ or $line =~ /Googlebot/ or $line =~ /Ask Jeeves/ or
$line =~ /Yandex/ or $line =~ /80legs/ or $line =~ /Baiduspider/ or $line =~ /dotnetdotcom/ or $line=~ /seoprofiler/
or $line=~ /msn.com/)
{# print $line .”\n”;
if ($line =~ /msn\.com/)
{
$msn++;
}
if ($line =~ /twiceler/)
{
$twiceler++;
}
if ($line =~ /cuil\.com/)
{
$cuil++;
}
if ($line =~ /Yahoo\! Slurp/)
{
$yahoo++;
}
if ($line =~ /Googlebot/)
{
$ggl++;
}
if ($line =~ /Ask Jeeves/)
{
$Jeeves++;
}
if ($line =~ /Ask Jeeves/)
{
$Yandex++;
}
if ($line =~ /80legs/)
{
$legs++;
}
if ($line =~ /Baiduspider/)
{
$Baiduspider++;
}
if ($line =~ /dotnetdotcom/)
{
$dotnetdotcom++;
}
if ($line =~ /seoprofiler/)
{
$seoprofiler++;
}
}
else
{
print OUT $orig_line;
}
}
# unlink ($file);
# rename ($outfile, $file);
print “google: $ggl, Yahoo: $yahoo, Cuil: $cuil, twiceler: $twiceler, Jeeves: $Jeeves, Yandex: $Yandex, legs: $legs\n”;
print “Baiduspider: $Baiduspider, dotnetdotcom: $dotnetdotcom, msn: $msn, seoprofiler: $seoprofiler \n”;
}open (OUT, “>grant.txt”);
foreach my $key (sort hashValueDescendingNum (keys(%address)))
{print OUT “$address{$key} \t $key\t $agents{$key}\n”;
}
close(OUT);sub hashValueAscendingNum {
$address{$a} <=> $address{$b};
}sub hashValueDescendingNum {
$address{$b} <=> $address{$a};
}
