#!/usr/bin/perl
########################################
# This script logs into a google search appliance and retrieves the raw log files for the previous day.
#
# Questions about this script can be directed to:
# Blake Crosby (bcrosby@nm.cbc.ca)
#
# The Canadian Broadcasting Corporation does not support nor endorse this script.
#
#
#
# This script was tested on FreeBSD and requires the following perl modules:
# libwwwperl (LWP::UserAgent)
# HTML::TokeParser
#
# This script was tested on a 5005 model Google Search Appliance.
#
# The user this script logs in must be:
# - Version 3.4.10 or less: Collection Assistant
# - Version 3.4.12: Collection Manager
# (See user accounts in your admin interface for more information)
#
#
# Revision History
#
# 1.1 - Fixed small "date bug" where logs would not be downloaded when months had a leading "0"
# 1.2 - Changed some code to work properly with GoogleOS version 3.4.12 (removed leading 0 on days
# as well as fixed some referrer fields).
# 1.3 - Added code to reverse the log file as the GSA returns it in reverse chronological order
#
#########################################
#################################
# Config section
#
#
# hostname of the google appliance you want to retreive logs from
# For example: google.hostname.ca
$googlehost = 'google.hostname.ca';
#
# username and password for the perl script to log in as. I suggest you create a new account specifically
# for this script.
# For example: user = googlescript password = google
#
$username = 'googlescript';
$password = 'google';
#
# the collection name of the logs you want to retreive. CASE IS IMPORTANT
# for example: webpages
$collection = 'webpages';
#
# File name to store the log file.
# for example googlesearch.log
#
$logfilename = 'googlesearch.log';
#### END OF CONFIG
#
#
#Dont edit below this line, unless you know what you are doing.
#
#load libwwwperl
use HTML::TokeParser;
use LWP::UserAgent;
$date1 = `date -v-1d "+%Y:%m:%d"`;
$date1 =~ s/ //g;
chomp($date1);
@enddate = split /:/,$date1;
$enddate[1] =~ s/0(\d)/$1/g;
$enddate[2] =~ s/0(\d)/$1/g;
$ua = LWP::UserAgent->new;
$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)');
#need cookie support;
use HTTP::Cookies;
$ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
autosave => 1));
#get some magic cookies
$req = HTTP::Request->new(GET => "http://$googlehost:8000/");
$req->header('Referrer' => '');
$res = $ua->request($req);
#then load the login page (and get more cookies)
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => '');
$res = $ua->request($req);
#let's log into the box :)
$req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => "http://$googlehost:8000/EnterpriseController");
$req->content_type('application/x-www-form-urlencoded');
$req->content("actionType=authenticateUser&userName=$username&password=$password&login=Login");
$res = $ua->request($req);
#go to the view summaries form input page:
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=webLogSelect&index=$collection");
$req->header('Referrer' => '');
$res = $ua->request($req);
#print $res->content;
#print "date_$enddate[1]_$enddate[2]_$enddate[0]\n";
#generate the report
$req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => 'http://$googlehost:8000/EnterpriseController');
$req->content_type('application/x-www-form-urlencoded');
$req->content("index=$collection&reportType=recent&recentDate=date_$enddate[1]_$enddate[2]_$enddate[0]&actionType=webLogView&webLogView=View+Log");
$res = $ua->request($req);
# check the outcome
if (!($res->is_success)) {
print "Error: " . $res->status_line . "\n";
die;
}
#print $res->content;
$newurl = $res->content;
$newurl =~ //;
$newurl = $1;
#print $newurl;
$output = "Currently generating log";
while ($output =~ /Currently generating log/) {
$req = HTTP::Request->new(GET => "http://$googlehost:8000$newurl");
$req->header('Referrer' => '');
$res = $ua->request($req);
$output = $res->content;
sleep 10;
#print $output;
}
#print $res->content;
#now we need to download the file!
$req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => "http://$googlehost:8000/EnterpriseController?actionType=webLogView&index=$collection&refreshDate=date_$enddate[1]_$enddate[2]_$enddate[0]");
$req->content_type('application/x-www-form-urlencoded');
$req->content("fileExport=+Export+to+File+&fileArgs=date_$enddate[1]_$enddate[2]_$enddate[0]&actionType=fileExport&index=$collection&fileBrowse=WEB_LOG&displayString=May+4%2C+2003");
$res = $ua->request($req);
$logfile = $res->content;
#print $logfile;
open TEMPLOG, ">$logfilename";
print TEMPLOG $logfile;
close(TEMPLOG);
#lets make the log file in the proper chronological order
open (IN,$logfilename);
open (OUTFILE, ">$logfilename.tmp");
while () {
$buffer[$i] = $_; $i++
}
@good = reverse @buffer;
foreach $line (@good) {
print OUTFILE $line;
}
close (IN);
close (OUTFILE);
move("$logfilename.tmp", "$logfilename")
#log out of the box
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=logout");
$req->header('Referrer' => '');
$res = $ua->request($req);