#!/usr/bin/perl ######################################## # This script logs into a google search appliance and retrieves the raw log files for the previous day. # # Questions about this script can be directed to: # Blake Crosby (bcrosby@nm.cbc.ca) # # The Canadian Broadcasting Corporation does not support nor endorse this script. # # # # This script was tested on FreeBSD and requires the following perl modules: # libwwwperl (LWP::UserAgent) # HTML::TokeParser # # This script was tested on a 5005 model Google Search Appliance. # # The user this script logs in must be: # - Version 3.4.10 or less: Collection Assistant # - Version 3.4.12: Collection Manager # (See user accounts in your admin interface for more information) # # # Revision History # # 1.1 - Fixed small "date bug" where logs would not be downloaded when months had a leading "0" # 1.2 - Changed some code to work properly with GoogleOS version 3.4.12 (removed leading 0 on days # as well as fixed some referrer fields). # 1.3 - Added code to reverse the log file as the GSA returns it in reverse chronological order # ######################################### ################################# # Config section # # # hostname of the google appliance you want to retreive logs from # For example: google.hostname.ca $googlehost = 'google.hostname.ca'; # # username and password for the perl script to log in as. I suggest you create a new account specifically # for this script. # For example: user = googlescript password = google # $username = 'googlescript'; $password = 'google'; # # the collection name of the logs you want to retreive. CASE IS IMPORTANT # for example: webpages $collection = 'webpages'; # # File name to store the log file. # for example googlesearch.log # $logfilename = 'googlesearch.log'; #### END OF CONFIG # # #Dont edit below this line, unless you know what you are doing. # #load libwwwperl use HTML::TokeParser; use LWP::UserAgent; $date1 = `date -v-1d "+%Y:%m:%d"`; $date1 =~ s/ //g; chomp($date1); @enddate = split /:/,$date1; $enddate[1] =~ s/0(\d)/$1/g; $enddate[2] =~ s/0(\d)/$1/g; $ua = LWP::UserAgent->new; $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); #need cookie support; use HTTP::Cookies; $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1)); #get some magic cookies $req = HTTP::Request->new(GET => "http://$googlehost:8000/"); $req->header('Referrer' => ''); $res = $ua->request($req); #then load the login page (and get more cookies) $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => ''); $res = $ua->request($req); #let's log into the box :) $req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => "http://$googlehost:8000/EnterpriseController"); $req->content_type('application/x-www-form-urlencoded'); $req->content("actionType=authenticateUser&userName=$username&password=$password&login=Login"); $res = $ua->request($req); #go to the view summaries form input page: $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=webLogSelect&index=$collection"); $req->header('Referrer' => ''); $res = $ua->request($req); #print $res->content; #print "date_$enddate[1]_$enddate[2]_$enddate[0]\n"; #generate the report $req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => 'http://$googlehost:8000/EnterpriseController'); $req->content_type('application/x-www-form-urlencoded'); $req->content("index=$collection&reportType=recent&recentDate=date_$enddate[1]_$enddate[2]_$enddate[0]&actionType=webLogView&webLogView=View+Log"); $res = $ua->request($req); # check the outcome if (!($res->is_success)) { print "Error: " . $res->status_line . "\n"; die; } #print $res->content; $newurl = $res->content; $newurl =~ //; $newurl = $1; #print $newurl; $output = "Currently generating log"; while ($output =~ /Currently generating log/) { $req = HTTP::Request->new(GET => "http://$googlehost:8000$newurl"); $req->header('Referrer' => ''); $res = $ua->request($req); $output = $res->content; sleep 10; #print $output; } #print $res->content; #now we need to download the file! $req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => "http://$googlehost:8000/EnterpriseController?actionType=webLogView&index=$collection&refreshDate=date_$enddate[1]_$enddate[2]_$enddate[0]"); $req->content_type('application/x-www-form-urlencoded'); $req->content("fileExport=+Export+to+File+&fileArgs=date_$enddate[1]_$enddate[2]_$enddate[0]&actionType=fileExport&index=$collection&fileBrowse=WEB_LOG&displayString=May+4%2C+2003"); $res = $ua->request($req); $logfile = $res->content; #print $logfile; open TEMPLOG, ">$logfilename"; print TEMPLOG $logfile; close(TEMPLOG); #lets make the log file in the proper chronological order open (IN,$logfilename); open (OUTFILE, ">$logfilename.tmp"); while () { $buffer[$i] = $_; $i++ } @good = reverse @buffer; foreach $line (@good) { print OUTFILE $line; } close (IN); close (OUTFILE); move("$logfilename.tmp", "$logfilename") #log out of the box $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=logout"); $req->header('Referrer' => ''); $res = $ua->request($req);