checksite1.1.txt
#!/usr/bin/perl #===========================================================# # Websense URL Lookup Automation Script # # Copyright 2004 Chris Sawall # # sawall -[at]- gmail -[dot]- com # # http://tech.stlsawall.com # # # my $written = "08/04/04"; # my $lastupdated = "04/07/05"; # my $version = "1.1"; # my $myemail = "sawall\@gmail.com"; # #===========================================================# #===========================================================# # COPYRIGHT NOTICE # # Copyright 2004 Chris Sawall All Rights Reserved. # # # # Websense URL Lookup Automation Script (checksite.pl) may # # be used and modified free of charge by anyone so long as # # this copyright notice and the comments above remain # # intact. By using this code you agree to indemnify # # Chris Sawall from any liability that might arise from # # its use. # # # # Selling the code for this program without prior written # # consent is expressly forbidden. In other words, # # please ask first before you try and make money off of my # # program. # # # # Obtain permission before redistributing this software # # over the Internet or in any other medium. In all cases # # copyright and header must remain intact. # #===========================================================# # # This main purpose of this script is to automate # the Site Lookup tool on the Websense web site: # http://ww2.websense.com/global/en/SupportAndKB/SiteLookup/Index.php # # This could allow others in your organization to utilize # the utility, but would not all have to individually # get an account. # #========================================== # the WWW::Mechanize Perl module will need to be installed: # # [root@yoursystem root]# cpan # # cpan shell -- CPAN exploration and modules installation (v1.7601) # ReadLine support available (try 'install Bundle::CPAN') # # cpan> install WWW::Mechanize # #========================================== # # Changelog # # 04/07/05 - Websense modified their pages and broke the original # script. So I have modified this script to work with # their current page. You will be able to tell the # main modifications because I have not yet totally # cleaned up the script, I just commented out the old # stuff and put in the new to get it up and working. I # will try to get it cleaned up soon, for looks I guess. # # Also, the link has been changed from: # http://www.websense.com/sitelookup/ # to: # http://ww2.websense.com/global/en/SupportAndKB/SiteLookup/Index.php # #========================================== use strict; $|++; use WWW::Mechanize 0.48; my $debug = 0; my $testdata = 0; my (@values, $z, $varname, $mydata, $weburl, $data); # --- Content and variables that need to be set --- my $baseurl = "https://yourserver/"; my $websensepage = "sitelookup.html"; my $username = "user\@domain.com"; my $password = "password"; # --- Content and variables that should not change --- #my $websensesite = "http://www.websense.com/sitelookup/Index.php"; my $websensesite = "http://ww2.websense.com/global/en/SupportAndKB/SiteLookup/Index.php"; # $starttag and $endtag were the original start and stop points. # however, extra info was displayed when output, so i changed # the search string to get closer to the content # # if Websense ever changes the information around the table containg # the results, move back to useing the original $starttag and $endtag # options my $useorigtag = 0; #my $starttag = "<!--- Content starts here --->"; #my $endtag = "<!--- Content ends here --->"; #my $starttag2="<p><b>Products > Resources > Site Lookup Tool</b>"; #my $endtag2="<p>If you believe this site is not categorized correctly, "; my $starttag = "SITE LOOKUP TOOL"; my $endtag = "If you believe this site"; my $starttag2 = "SITE LOOKUP TOOL"; my $endtag2 = "If you believe this site"; # --- Print out HTML content --- print "Content-type: text/html\n\n"; print "<html><head><title>Websense URL Sitelookup</title></head>\n"; print "<body bgcolor=black text=white> <font face=Courier size=2>\n"; print "<center><table border=0 cellpadding=4 cellspacing=2 width=\"100%\">\n"; print "<tr><td nowrap align=left>\n"; if ($debug) {print "------- Debugging ON -------<br>\n";} # --- Grab variable from HTTP GET --- if ($ENV{'QUERY_STRING'}) { if ($debug) {print "------- Setting Query String variables<br>\n";} @values = split(/&/,$ENV{'QUERY_STRING'}); foreach $z (@values) { ($varname, $mydata) = split(/=/, $z); if ($varname eq "weburl") { $weburl = $mydata; } } } # --- Set and use test data if required --- if($testdata) { $weburl = "www.yahoo.com"; if ($debug) {print "------- Using test data ($weburl)<br>\n";} } my $mech = WWW::Mechanize->new( ); # --- Goto main Websense site $mech->get($websensesite); $mech->success or die $mech->response->status_line; # --- Log into site --- if ($debug) {print "------- Logging in as $username<br>\n";} $mech->form_number( 3 ); $mech->field( Email => $username ); $mech->field( Password => $password ); $mech->submit( ); $mech->success or die "post failed: ", $mech->response->status_line; # --- Enter URL to lookup --- if ($debug) {print "------- Looking up URL ($weburl)<br>\n";} $mech->get($websensesite); $mech->success or die $mech->response->status_line; $mech->form_number( 3 ); $mech->field( LookupURL => $weburl ); $mech->submit( ); if ($debug) {print "------- Dumping Data<br>\n";} # --- Determine if we're using the original tags or the more detailed tags if ($useorigtag) { if ($debug) {print "------- Using Original TAG information<br>\n";} ($data)=($mech->content=~/\s*$starttag(.*?)$endtag/s); } else { ($data)=($mech->content=~/$starttag2(.*?)$endtag2/s) } # --- Close up and finish of HTML doc print "<!----- Start content from Websense.com -----><br>\n"; print "$data\n"; print "<!----- End content from Websense.com -----><br>\n"; print "<br>Visit <a href=\"http://$weburl\">$weburl</a><br><br>\n"; print "<a href=\"$baseurl\">[Home]</a> | "; print "<a href=\"$baseurl$websensepage\">[Return to the site lookup tool]</a><br>"; print "</td></tr></table></center></font>\n"; print "</body></html>\n"; exit;