#!/usr/bin/perl
########################################
# This script logs into a google search appliance and retrieves the raw log files for the previous day.
#
# Questions about this script can be directed to:
# Blake Crosby (bcrosby@nm.cbc.ca)
#
#
#
#
# This script was tested on FreeBSD and requires the following perl modules:
# libwwwperl (LWP::UserAgent)
# HTML::TokeParser
# File::Copy
#
# This script was tested on a 1001 model Google Search Appliance (Version 4.6.2.S.12)
#
#########################################


#################################
# Config section
#
#
# hostname of the google appliance you want to retreive logs from
# For example: google.hostname.ca

$googlehost = 'google.hostname.ca';

#
# username and password for the perl script to log in as. I suggest you create a new account specifically
# for this script.
# For example: user = googlescript password = google
#

$username = 'googlescript';
$password = 'google';


#
# the collection name of the logs you want to retreive. CASE IS IMPORTANT
# for example: webpages

$collection = 'webpages';

#
# the front end name of the logs you want to retreive. CASE IS IMPORTANT
# for example: webpages

$frontend = 'webpages';

#
# File name to store the log file.
# for example googlesearch.log
#
# By default yesterdays date will be used (20051128.log) however you can
# override that by uncommenting the following line

#$logfilename = 'googlesearch.log';

#
# Path to where you want to save log files
# for example /var/logs
#
# NOTE: You must **NOT** have a trailing slash!

$logfilepath = ".";

#### END OF CONFIG
#
#
#Dont edit below this line, unless you know what you are doing.
#


#load libwwwperl
use HTML::TokeParser;
use LWP::UserAgent;
use File::Copy;

#calculate yesterdays date
$yesterdayepoch = time() - 86400;

($seconds, $minutes, $hours, $day_of_month, $month, $year,$wday, $yday, $isdst) = localtime($yesterdayepoch);

$theyear=$year+1900;
$filemonth = sprintf("%02d",$month+1);
$fileday = sprintf("%02d",$day_of_month);
$themonth = $month+1;
$theday = $day_of_month;

#print "$theyear/$filemonth/$fileday $day_of_month $themonth\n\n\n\n";
#die;
#if a logfile name was specified, use that, otherwise use date

if ($logfilename) {
        $logfilename = "$logfilepath/$logfilename";
}
else {
        $logfilename = "$logfilepath/$theyear$filemonth$fileday.log";
}


$ua = LWP::UserAgent->new;

$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)');

#need cookie support;
use HTTP::Cookies;
$ua->cookie_jar(HTTP::Cookies->new(file => "/tmp/lwpcookies.txt",
                                     autosave => 1));


#get some magic cookies
$req = HTTP::Request->new(GET => "http://$googlehost:8000/");
$req->header('Referrer' => '');
$res = $ua->request($req);

#then load the login page (and get more cookies)
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => '');
$res = $ua->request($req);

#let's log into the box :)
$req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => "http://$googlehost:8000/EnterpriseController");
$req->content_type('application/x-www-form-urlencoded');
$req->content("actionType=authenticateUser&userName=$username&password=$password&login=Login");
$res = $ua->request($req);


#generate the report
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?collection=".$collection."&reportType=0&reportName=AutoGeneratedReport&dateFormat=recent&recentDate=date_".$themonth."_".$theday."_".$theyear."&actionType=generateLog&generateLog=Generate+Log");
$req->header('Referrer' => 'http://$googlehost:8000/EnterpriseController');
$res = $ua->request($req);

#print $res->content;

# check the outcome
if (!($res->is_success)) {
        print "Error: " . $res->status_line . "\n";
        die;

}

#print $res->content;

$output = "AutoGeneratedReport Generating";
while ($output =~ /AutoGeneratedReport.*Generating/) {
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=listLogs");
$req->header('Referrer' => '');
$res = $ua->request($req);

#check the outcome
if (!($res->is_success)) {
        print "Error: " . $res->status_line . "\n";
        die;

}

$output = $res->content;
#print "Report Not Done!\n";
sleep 10;
}

#print "Report done!\n";

#print $res->content;


#now we need to download the file!
#generate the report
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=fileExport&fileBrowse=WEB_LOG&fileArgs=AutoGeneratedReport&collection=$collection");
$req->header('Referrer' => 'http://$googlehost:8000/EnterpriseController');
$res = $ua->request($req);


$res = $ua->request($req);

# check the outcome

if (!($res->is_success)) {
        print "Error: " . $res->status_line . "\n";
        die;

}

#print $res->content;

$logfile = $res->content;

open TEMPLOG, ">$logfilename";
print TEMPLOG $logfile;
close(TEMPLOG);


#delete the generated log
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=deleteReport&reportType=0&reportName=AutoGeneratedReport&collection=".$collection);
$req->header('Referrer' => '');
$res = $ua->request($req);

#log out of the box
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=logout");
$req->header('Referrer' => '');
$res = $ua->request($req);


#lets make the log file in the proper chronological order

open (IN,$logfilename);
open (OUTFILE, ">$logfilename.tmp");
while (<IN>) {
        $buffer[$i] = $_; $i++
}
@good = reverse @buffer;
foreach $line (@good) {
        print OUTFILE $line;
}

close (IN);
close (OUTFILE);

move("$logfilename.tmp", "$logfilename");

