#!/usr/bin/perl ######################################## # This script logs into a google search appliance and saves the collection configuration file # to the local filesystem. # # Questions about this script can be directed to: # Blake Crosby (bcrosby@nm.cbc.ca) # # The Canadian Broadcasting Corporation does not support nor endorse this script. # # # # This script was tested on FreeBSD and requires the following perl modules: # libwwwperl (LWP::UserAgent) # HTML::TokeParser # # This script was tested on a 5005 model Google Search Appliance. # # The user this script logs in must be: # - Version 3.4.10 or less: Collection Assistant # - Version 3.4.12: Collection Manager # (See user accounts in your admin interface for more information) # # # Revision History # # 1.0 - Initial Revision ######################################### ################################# # Config section # # # hostname of the google appliance you want to retreive logs from # For example: google.hostname.ca $googlehost = 'google.hostname.ca'; # # username and password for the perl script to log in as. I suggest you create a new account specifically # for this script. # For example: user = updater password = updateme # $username = 'updater'; $password = 'updateme'; # # the collection name of the logs you want to retreive. CASE IS IMPORTANT # for example: webpages $collection = 'webpages'; #### END OF CONFIG # # #Dont edit below this line, unless you know what you are doing. # #load libwwwperl use HTML::TokeParser; use LWP::UserAgent; $date = `date "+%Y%m%d-%H%M%S"`; chomp($date); $ua = LWP::UserAgent->new; $ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); #need cookie support; use HTTP::Cookies; $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1)); #get some magic cookies $req = HTTP::Request->new(GET => "http://$googlehost:8000/"); $req->header('Referrer' => ''); $res = $ua->request($req); #then load the login page (and get more cookies) $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => ''); $res = $ua->request($req); #let's log into the box :) $req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => "http://$googlehost:8000/EnterpriseController"); $req->content_type('application/x-www-form-urlencoded'); $req->content("actionType=authenticateUser&userName=$username&password=$password&login=Login"); $res = $ua->request($req); #let's download the configuration data $req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController"); $req->header('Referrer' => "http://$googlehost:8000/EnterpriseController"); $req->content_type('application/x-www-form-urlencoded'); $req->content("actionType=webCrawlSelect&crawls=$collection&webCrawlExport=+Export+"); $res = $ua->request($req); $logfile = $res->content; # check the outcome if (!($res->is_success)) { print "Error: " . $res->status_line . "\n"; die; } #print $res->content; open TEMPLOG, ">$collection-$date.data"; print TEMPLOG $logfile; close(TEMPLOG); #log out of the box $req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=logout"); $req->header('Referrer' => ''); $res = $ua->request($req);