#!/usr/bin/perl
########################################
# This script logs into a google search appliance and saves the collection configuration file
# to the local filesystem.
#
# Questions about this script can be directed to:
# Blake Crosby (bcrosby@nm.cbc.ca)
#
# The Canadian Broadcasting Corporation does not support nor endorse this script.
#
#
#
# This script was tested on FreeBSD and requires the following perl modules:
# libwwwperl (LWP::UserAgent)
# HTML::TokeParser
#
# This script was tested on a 5005 model Google Search Appliance.
#
# The user this script logs in must be:
# - Version 3.4.10 or less: Collection Assistant
# - Version 3.4.12: Collection Manager
# (See user accounts in your admin interface for more information)
#
#
# Revision History
#
# 1.0 - Initial Revision
#########################################


#################################
# Config section
#
#
# hostname of the google appliance you want to retreive logs from
# For example: google.hostname.ca

$googlehost = 'google.hostname.ca';

#
# username and password for the perl script to log in as. I suggest you create a new account specifically
# for this script.
# For example: user = updater password = updateme
#

$username = 'updater';
$password = 'updateme';


#
# the collection name of the logs you want to retreive. CASE IS IMPORTANT
# for example: webpages

$collection = 'webpages';


#### END OF CONFIG
#
#
#Dont edit below this line, unless you know what you are doing.
#


#load libwwwperl
use HTML::TokeParser;
use LWP::UserAgent;


$date = `date "+%Y%m%d-%H%M%S"`;
chomp($date);

$ua = LWP::UserAgent->new;

$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); 

#need cookie support;
use HTTP::Cookies;
$ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
                                     autosave => 1));

#get some magic cookies
$req = HTTP::Request->new(GET => "http://$googlehost:8000/");
$req->header('Referrer' => '');
$res = $ua->request($req);

#then load the login page (and get more cookies)
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => '');
$res = $ua->request($req);

#let's log into the box :)
$req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => "http://$googlehost:8000/EnterpriseController");
$req->content_type('application/x-www-form-urlencoded');
$req->content("actionType=authenticateUser&userName=$username&password=$password&login=Login");
$res = $ua->request($req);

#let's download the configuration data
$req = HTTP::Request->new(POST => "http://$googlehost:8000/EnterpriseController");
$req->header('Referrer' => "http://$googlehost:8000/EnterpriseController");
$req->content_type('application/x-www-form-urlencoded');
$req->content("actionType=webCrawlSelect&crawls=$collection&webCrawlExport=+Export+");
$res = $ua->request($req);
$logfile = $res->content;

# check the outcome
if (!($res->is_success)) {
        print "Error: " . $res->status_line . "\n";
	die;

}

#print $res->content;


open TEMPLOG, ">$collection-$date.data";
print TEMPLOG $logfile;
close(TEMPLOG);


#log out of the box
$req = HTTP::Request->new(GET => "http://$googlehost:8000/EnterpriseController?actionType=logout");
$req->header('Referrer' => '');
$res = $ua->request($req);

