#!/usr/bin/perl 
#===========================================================#
# Websense URL Lookup Automation Script                     #
# Copyright 2004 Chris Sawall  				    #
#                      sawall -[at]- gmail -[dot]- com      #
# http://tech.stlsawall.com                                 #
#                                                           #
my $written = "08/04/04";                                   #
my $lastupdated = "04/07/05";                               #
my $version = "1.1";                                        #
my $myemail = "sawall\@gmail.com";                          #
#===========================================================#
#===========================================================#
# COPYRIGHT NOTICE                                          #
# Copyright 2004 Chris Sawall  All Rights Reserved.         #
#                                                           #
# Websense URL Lookup Automation Script (checksite.pl) may  #
# be used and modified free of charge by anyone so long as  #
# this copyright notice and the comments above remain       #
# intact.  By using this code you agree to indemnify        #
# Chris Sawall from any liability that might arise from     #
# its use.                                                  #
#                                                           #
# Selling the code for this program without prior written   #
# consent is expressly forbidden.  In other words,          #
# please ask first before you try and make money off of my  #
# program.                                                  #
#                                                           #
# Obtain permission before redistributing this software     #
# over the Internet or in any other medium. In all cases    #
# copyright and header must remain intact.                  #
#===========================================================#
#
# This main purpose of this script is to automate
# the Site Lookup tool on the Websense web site:
# http://ww2.websense.com/global/en/SupportAndKB/SiteLookup/Index.php
#
# This could allow others in your organization to utilize 
# the utility, but would not all have to individually
# get an account. 
#
#==========================================
# the WWW::Mechanize Perl module will need to be installed:
#
# [root@yoursystem root]# cpan
#
# cpan shell -- CPAN exploration and modules installation (v1.7601)
# ReadLine support available (try 'install Bundle::CPAN')
#
# cpan> install WWW::Mechanize
#
#==========================================
#
# Changelog
#
# 04/07/05 - Websense modified their pages and broke the original
#	     	script.  So I have modified this script to work with
#	     	their current page.  You will be able to tell the 
#        	main modifications because I have not yet totally
#	     	cleaned up the script, I just commented out the old
#        	stuff and put in the new to get it up and working.  I
#        	will try to get it cleaned up soon, for looks I guess.
#        
#	     	Also, the link has been changed from:
#        	http://www.websense.com/sitelookup/
#        	to:
#	     	http://ww2.websense.com/global/en/SupportAndKB/SiteLookup/Index.php
#
#==========================================

use strict;
$|++;

use WWW::Mechanize 0.48;

my $debug = 0;
my $testdata = 0;


my (@values, $z, $varname, $mydata, $weburl, $data);

# --- Content and variables that need to be set ---
my $baseurl = "https://yourserver/";
my $websensepage = "sitelookup.html";
my $username = "user\@domain.com";
my $password = "password";

# --- Content and variables that should not change ---
#my $websensesite = "http://www.websense.com/sitelookup/Index.php";
my $websensesite = "http://ww2.websense.com/global/en/SupportAndKB/SiteLookup/Index.php";
# $starttag and $endtag were the original start and stop points.
# however, extra info was displayed when output, so i changed 
# the search string to get closer to the content
#
# if Websense ever changes the information around the table containg
# the results, move back to useing the original $starttag and $endtag
# options 
my $useorigtag = 0;
#my $starttag = "<!--- Content starts here --->";
#my $endtag = "<!--- Content ends here --->";
#my $starttag2="<p><b>Products > Resources > Site Lookup Tool</b>";
#my $endtag2="<p>If you believe this site is not categorized correctly, ";
my $starttag = "SITE LOOKUP TOOL";
my $endtag = "If you believe this site";
my $starttag2 = "SITE LOOKUP TOOL";
my $endtag2 = "If you believe this site";

# --- Print out HTML content ---
print "Content-type: text/html\n\n";
print "<html><head><title>Websense URL Sitelookup</title></head>\n";
print "<body bgcolor=black text=white> <font face=Courier size=2>\n";
print "<center><table border=0 cellpadding=4 cellspacing=2 width=\"100%\">\n";
print "<tr><td nowrap align=left>\n";
if ($debug) {print "------- Debugging ON -------<br>\n";}

# --- Grab variable from HTTP GET ---
if ($ENV{'QUERY_STRING'})
{
if ($debug) {print "------- Setting Query String variables<br>\n";}
@values = split(/&/,$ENV{'QUERY_STRING'});
foreach $z (@values)
        {
        ($varname, $mydata) = split(/=/, $z);
        if ($varname eq "weburl") { $weburl = $mydata; }
        }
}

# --- Set and use test data if required ---
if($testdata)
{
$weburl = "www.yahoo.com";
if ($debug) {print "------- Using test data ($weburl)<br>\n";}
}

my $mech = WWW::Mechanize->new(  );

# --- Goto main Websense site
$mech->get($websensesite);
$mech->success or die $mech->response->status_line;

# --- Log into site ---
if ($debug) {print "------- Logging in as $username<br>\n";}
$mech->form_number( 3 );
$mech->field( Email => $username );
$mech->field( Password => $password );
$mech->submit(  );

$mech->success or die "post failed: ",
   $mech->response->status_line;

# --- Enter URL to lookup ---
if ($debug) {print "------- Looking up URL ($weburl)<br>\n";}
$mech->get($websensesite);
$mech->success or die $mech->response->status_line;
$mech->form_number( 3 );
$mech->field( LookupURL => $weburl );
$mech->submit(  );

if ($debug) {print "------- Dumping Data<br>\n";}

# --- Determine if we're using the original tags or the more detailed tags
if ($useorigtag) 
 {
 if ($debug) {print "------- Using Original TAG information<br>\n";}
 ($data)=($mech->content=~/\s*$starttag(.*?)$endtag/s);
 }
else
 {
 ($data)=($mech->content=~/$starttag2(.*?)$endtag2/s)
 }

# --- Close up and finish of HTML doc
print "<!----- Start content from Websense.com -----><br>\n";
print "$data\n";
print "<!----- End content from Websense.com -----><br>\n";
print "<br>Visit&nbsp;&nbsp;&nbsp;<a href=\"http://$weburl\">$weburl</a><br><br>\n";
print "<a href=\"$baseurl\">[Home]</a>&nbsp;|&nbsp;";
print "<a href=\"$baseurl$websensepage\">[Return to the site lookup tool]</a><br>";
print "</td></tr></table></center></font>\n";
print "</body></html>\n";
exit;