#!/usr/bin/perl ######################################## # This script logs into a google search appliance and retrieves the raw log files for the previous day. # # Questions about this script can be directed to: # Blake Crosby (bcrosby@nm.cbc.ca) # # # # # This script was tested on FreeBSD and requires the following perl modules: # libwwwperl (LWP::UserAgent) # HTML::TokeParser # File::Copy # # This script was tested on a 1001 model Google Search Appliance (Version 4.6.2.S.12) # ######################################### ################################# # Config section # # # hostname of the google appliance you want to retreive logs from # For example: google.hostname.ca $googlehost = 'google.hostname.ca'; # # username and password for the perl script to log in as. I suggest you create a new account specifically # for this script. # For example: user = googlescript password = google # $username = 'googlescript'; $password = 'google'; # # the collection name of the logs you want to retreive. CASE IS IMPORTANT # for example: webpages $collection = 'webpages'; # # the front end name of the logs you want to retreive. CASE IS IMPORTANT # for example: webpages $frontend = 'webpages'; # # File name to store the log file. # for example googlesearch.log # # By default yesterdays date will be used (20051128.log) however you can # override that by uncommenting the following line #$logfilename = 'googlesearch.log'; # # Path to where you want to save log files # for example /var/logs # # NOTE: You must **NOT** have a trailing slash! $logfilepath = "."; #### END OF CONFIG # # #Dont edit below this line, unless you know what you are doing. # #load libwwwperl use HTML::TokeParser; use LWP::UserAgent; use File::Copy; #calculate yesterdays date $yesterdayepoch = time() - 86400; ($seconds, $minutes, $hours, $day_of_month, $month, $year,$wday, $yday, $isdst) = localtime($yesterdayepoch); $theyear=$year+1900; $filemonth = sprintf("%02d",$month+1); $fileday = sprintf("%02d",$day_of_month); $themonth = $month+1; $theday = $day_of_month; #print "$theyear/$filemonth/$fileday $day_of_month $themonth\n\n\n\n"; #die; #if a logfile name was specified, use that, otherwise use date if ($logfilename) { $logfilename = "$logfilepath/$logfilename"; } else { $logfilename = "$logfilepath/$theyear$filemonth$fileday.log"; } $ua = LWP::UserAgent->new; $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); #need cookie support; use HTTP::Cookies; $ua->cookie_jar(HTTP::Cookies->new(file => "/tmp/lwpcookies.txt", autosave => 1)); #get some magic cookies $req = HTTP::Request->new(GET => "http://$googlehost:8000/"); $req->header('Referrer' => ''); $res = $ua->request($req); #then load the login page (and get more cookies) $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => ''); $res = $ua->request($req); #let's log into the box :) $req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => "http://$googlehost:8000/EnterpriseController"); $req->content_type('application/x-www-form-urlencoded'); $req->content("actionType=authenticateUser&userName=$username&password=$password&login=Login"); $res = $ua->request($req); #generate the report $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?collection=".$collection."&reportType=0&reportName=AutoGeneratedReport&dateFormat=recent&recentDate=date_".$themonth."_".$theday."_".$theyear."&actionType=generateLog&generateLog=Generate+Log"); $req->header('Referrer' => 'http://$googlehost:8000/EnterpriseController'); $res = $ua->request($req); #print $res->content; # check the outcome if (!($res->is_success)) { print "Error: " . $res->status_line . "\n"; die; } #print $res->content; $output = "AutoGeneratedReport Generating"; while ($output =~ /AutoGeneratedReport.*Generating/) { $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=listLogs"); $req->header('Referrer' => ''); $res = $ua->request($req); #check the outcome if (!($res->is_success)) { print "Error: " . $res->status_line . "\n"; die; } $output = $res->content; #print "Report Not Done!\n"; sleep 10; } #print "Report done!\n"; #print $res->content; #now we need to download the file! #generate the report $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=fileExport&fileBrowse=WEB_LOG&fileArgs=AutoGeneratedReport&collection=$collection"); $req->header('Referrer' => 'http://$googlehost:8000/EnterpriseController'); $res = $ua->request($req); $res = $ua->request($req); # check the outcome if (!($res->is_success)) { print "Error: " . $res->status_line . "\n"; die; } #print $res->content; $logfile = $res->content; open TEMPLOG, ">$logfilename"; print TEMPLOG $logfile; close(TEMPLOG); #delete the generated log $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=deleteReport&reportType=0&reportName=AutoGeneratedReport&collection=".$collection); $req->header('Referrer' => ''); $res = $ua->request($req); #log out of the box $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=logout"); $req->header('Referrer' => ''); $res = $ua->request($req); #lets make the log file in the proper chronological order open (IN,$logfilename); open (OUTFILE, ">$logfilename.tmp"); while () { $buffer[$i] = $_; $i++ } @good = reverse @buffer; foreach $line (@good) { print OUTFILE $line; } close (IN); close (OUTFILE); move("$logfilename.tmp", "$logfilename");