#!/usr/bin/perl #load libwwwperl use HTML::TokeParser; use LWP::UserAgent; $ua = LWP::UserAgent->new; $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); use HTTP::Cookies; $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1)); #send initial request for username/pass form (this is where we get the nice authtoken key) $req = HTTP::Request->new(GET => 'http://news.bbc.co.uk/register_audience?url=/text_only.stm&audience_name=International&bandwidth=low'); $req->header('Accept' => 'text/html'); $res = $ua->request($req); # check the outcome if ($res->is_success) { $content= $res->content; #print $res->content; $stream = HTML::TokeParser->new(\$res->content); while ($token = $stream->get_tag("font")){ if ($token->[1]{size} eq "2") { $ham = $stream->get_trimmed_text('/font'); $ham =~ /^(.* GMT).*$/; $date = $1; } } $content =~ /

NEWS FRONT PAGE<\/h3>(.*)AFRICA<\/b>/s; $news = $1; $news =~ s/\n{2,}/\n/g; #$news =~ s/\s{2,}/\s/g; $stream2 = HTML::TokeParser->new(\$news); $stream3 = HTML::TokeParser->new(\$news); while ($stream3->get_tag("div")) { push @synopsis, $stream3->get_trimmed_text("/div"); } while (my $token = $stream2->get_tag("a")) { my $url = $token->[1]{href} || "-"; my $text = $stream2->get_trimmed_text("/a"); # print "$text
\n"; $url =~ s/\/low\//\/hi\//; push @url, $url; push @text, $text; } #print $news; } #print "$date

\n\n"; $synopsis[0] =~ s/$text[0]//; print "$text[0]
\n$synopsis[0]

\n"; $synopsis[1] =~ s/$text[1]//; print "$text[1]
\n$synopsis[1]

\n"; $synopsis[2] =~ s/$text[2]//; print "$text[2]
\n$synopsis[2]

\n"; $synopsis[3] =~ s/$text[3]//; print "$text[3]
\n$synopsis[3]

\n"; $synopsis[4] =~ s/$text[4]//; print "$text[4]
\n$synopsis[4]

\n";