Use Perl WWW::Mechanize to automate web surfing
This Perl script shows you how to automate data download from a website.
#!/usr/bin/perl
use strict;
# load LWP library:
#use LWP::Simple;
#use LWP::RobotUA;
use HTML::Parse;
use WWW::Mechanize;#my $agent=’Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 2.0.50727)’;
my $mech = WWW::Mechanize->new();
$mech->agent_alias( ‘Windows IE 6′ );my $url = ‘http://www.xxx.com/catalog/index.php?_a=viewProd&productId=’;
my $i=1600;
for (my $count = $i; $count >= 1; $count–)
{
my $new_url=$url. $count;
#my $content = get $new_url;
$mech->get( $new_url );
my $output_page = $mech->content();#my $parsed_html = HTML::Parse::parse_html($content);
if($output_page !~ m/That product could not be found/i)
{
print “downloading $count\n”;
open (OUT, “>$count.html”) or die “—\n”;
print OUT $output_page;
close (OUT);
}}
