############################################
##                                        ##
##                WebTester               ##
##           by Darryl Burgdorf           ##
##       (e-mail burgdorf@awsd.com)       ##
##                                        ##
##              version 1.05              ##
##         last modified:  5/2/98         ##
##           copyright (c) 1998           ##
##                                        ##
##    latest version is available from    ##
##        http://awsd.com/scripts/        ##
##                                        ##
############################################

# COPYRIGHT NOTICE:
#
# Copyright 1998 Darryl C. Burgdorf.  All Rights Reserved.
#
# This program may be used and modified free of charge by anyone, so
# long as this copyright notice and the header above remain intact.  By
# using this program you agree to indemnify Darryl C. Burgdorf from any
# liability.
#
# Selling the code for this program without prior written consent is
# expressly forbidden.  Obtain permission before redistributing this
# program over the Internet or in any other medium.  In all cases
# copyright and header must remain intact.

# VERSION HISTORY:
#
# 1.05  05/02/98  Removed "$Avoid" items from report lists & map
#                 Added options to print mod dates & "new" tags
#                 Added "$ListBinaryLinks" variable
#                 Added "$Verbose" flag
#                 Expanded list to include all HTTP/1.1 status codes
#                 Eliminated need to "pre-create" report files
#                 Allowed for links without quote marks
#                 Fixed bug with multiple frames or includes
#                 Hash variables are now properly initialized
#                 Files are now closed properly
#                 Various other minor improvements
# 1.04  05/24/97  Added trapping of "automount" paths
#                 Added handling of subpages contained in frames
#                 Fixed bug concerning raw directory references
# 1.03  05/06/97  Made "site map" report optional
#                 Added ISDN to download time chart
#                 Trapped for TITLE tags with no title
#                 Fixed bug in handling of "exec cgi" calls
#                 Fixed "errors only / files not referenced" bug
# 1.02  04/19/97  Changed name from "WebMapper" to "WebTester"
#                 Added computation of total "download" file size
#                 (Hopefully) Improved speed of file parsing
#                 "Optionalized" parsing of CGI scripts
#                 Fixed bug affecting some SSI-based CGI calls
#                 The usual tweaks and minor bug fixes
# 1.01  02/27/97  Added ability to influence site map organization
#                 Added parsing of CGIs, SSIs and image maps
#                 Various minor bug fixes
# 1.00  02/03/97  Initial "public" release

sub MainProg {
	require "find.pl";
	&Initialize;
	&SiteCheck;
	if ($MapFile) { &SiteMap; }
	if ($Verbose) { print "Report Complete\n"; }
	exit;
}

sub Initialize {
	$version = "1.05";
	@days=(Sun,Mon,Tue,Wed,Thu,Fri,Sat);
	@months=(Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec);
	$time=time;
	($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime($time);
	$date="$days[$wday], $mday $months[$mon] 19$year";
	@default_files = (
	  'index.html','index.htm','index.shtml','index.phtml','index.cgi',
	  'welcome.html','welcome.htm','welcome.shtml','welcome.phtml'
	  );
	$temp_file = "/tmp/webtester.$$";
	%OkStatusMsgs = (
	  '200','OK',
	  '201','Created',
	  '202','Accepted',
	  '203','Non-Authoritative Information',
	  '204','No Content',
	  '205','Reset Content',
	  '206','Partial Content',
	  '304','Not Modified'
	  );
	%FailStatusMsgs = (
	  '-1','Could Not Look Up Server',
	  '-2','Could Not Open Socket',
	  '-3','Could Not Bind Socket',
	  '-4','Could Not Connect',
	  '100','Continue',
	  '101','Switching Protocols',
	  '300','Multiple Choices',
	  '301','Moved Permanently',
	  '302','Moved Temporarily',
	  '303','See Other',
	  '305','Use Proxy',
	  '400','Bad Request',
	  '401','Unauthorized',
	  '402','Payment Required',
	  '403','Forbidden',
	  '404','Not Found',
	  '405','Method Not Allowed',
	  '406','Not Acceptable',
	  '407','Proxy Authentication Required',
	  '408','Request Time-Out',
	  '409','Conflict',
	  '410','Gone',
	  '411','Length Required',
	  '412','Precondition Failed',
	  '413','Request Entity Too Large',
	  '414','Request-URI Too Large',
	  '415','Unsupported Media Type',
	  '500','Internal Server Error',
	  '501','Not Implemented',
	  '502','Bad Gateway',
	  '503','Service Unavailable',
	  '504','Gateway Time-Out',
	  '505','HTTP Version Not Supported',
	  '600','Bad Request',
	  '601','Not Implemented',
	  '602','Connection Failed',
	  '603','Timed Out'
	  );
	$LocalPath =~ s/\/$//;
	$LocalURL =~ s/\/$//;
	$CGIPath =~ s/\/$//;
	$CGIURL =~ s/\/$//;
	$ImageMapPointer =~ s/\/$//;
	$ImageMapPath =~ s/\/$//;
	$RootPath = $LocalPath;
	$RootURL = $LocalURL;
	if ($LocalURL =~ m#^http://([\w-\.]+):?(\d*)($|/(.*))#) {
		$delete = $3;
		$RootPath =~ s/$delete//g;
		$RootURL =~ s/$delete//g;
	}
	if ($ParseCGI) { ($nbdy_uid,$nbdy_gid) = (getpwnam(nobody))[2,3]; }
	else { $ParseCGI = "xxxxxxxxxx"; }
	%FileList = ();
	%DirList = ();
	%ImageFileList = ();
	%ExtFileList = ();
	%MailList = ();
	%FTPList = ();
	%TelnetList = ();
	%GopherList = ();
	%NewsList = ();
	%HTTPList = ();
	%LostFileList = ();
	%UnreadableList = ();
	%DirNotFoundList = ();
	%LostAnchorList = ();
	%HTTP_Fail_List = ();
}

sub SiteCheck {
	stat($InFile);
	die "Cannot find file $InFile\n" unless -e $InFile;
	open (LOG,">$OutFile") || die "Error opening file: $OutFile\n";
	if ($Verbose) { print "Checking Site: $SiteName\n"; }
	print LOG "<HTML><HEAD>",
	  "<TITLE>Site Check: $SiteName</TITLE></HEAD>\n",
	  "<BODY BGCOLOR=\"#99ccff\" TEXT=\"#000000\">\n",
	  "<H1 ALIGN=CENTER>Site Check:<BR>$SiteName</H1>\n";
	$fileref = $InFile;
	$fileref =~ s/$LocalPath/$LocalURL/g;
	print LOG "<P ALIGN=CENTER><STRONG>Key File: ",
	  "<A HREF=\"$fileref\">$fileref</A></STRONG>\n<BR>$date\n",
	  "<P ALIGN=CENTER><STRONG><EM>";
	if ($ShowOnlyErrors == 1) { print LOG "&quot;Errors Only&quot; Report"; }
	else { print LOG "Full Report"; }
	print LOG "</STRONG>";
	if ($IgnoreExternals == 1) {
		print LOG "<BR>(External URLs Not Checked)";
	}
	print LOG "</EM></P>\n";
	print LOG "<A NAME=\"index\"><HR></A><H2 ALIGN=CENTER>Index</H2>\n";
	unless ($ShowOnlyErrors == 1) {
		print LOG "<P><UL><LI><STRONG>General Site Map ",
		  "Information</STRONG></P><P><UL>\n",
		  "<LI><A HREF=\"#document\">Local Documents</A>\n",
		  "<LI><A HREF=\"#sizes\">&quot;Download&quot; ",
		  "File Sizes &amp; Times</A>\n",
		  "<LI><A HREF=\"#directory\">Directories</A>\n",
		  "<LI><A HREF=\"#image\">Images &amp; Binaries</A>\n",
		  "<LI><A HREF=\"#file\">&quot;File:&quot; Links</A>\n",
		  "<LI><A HREF=\"#mailto\">&quot;Mailto:&quot; Links</A>\n",
		  "<LI><A HREF=\"#ftp\">&quot;FTP:&quot; Links</A>\n",
		  "<LI><A HREF=\"#telnet\">&quot;Telnet:&quot; Links</A>\n",
		  "<LI><A HREF=\"#gopher\">&quot;Gopher:&quot; Links</A>\n",
		  "<LI><A HREF=\"#news\">&quot;News:&quot; Links</A>\n",
		  "<LI><A HREF=\"#url\">External URLs</A>\n";
	}
	print LOG "</UL></P><P><LI><STRONG>",
	  "Bad Links &amp; Other Problems</STRONG></P><P><UL>\n",
	  "<P><LI><A HREF=\"#nofile\">Files Not Found</A>\n",
	  "<LI><A HREF=\"#badfile\">Files Not World Readable</A>\n";
	if ($MissingLinks == 1) {
		print LOG "<LI><A HREF=\"#missinglink\">Files Found But Not Referenced</A>\n";
	}
	print LOG "<LI><A HREF=\"#nodirectory\">Directories Not Found</A>\n";
	print LOG "<LI><A HREF=\"#noanchor\">Name Anchors Not Found</A>\n";
	unless ($IgnoreExternals == 1) {
		print LOG "<LI><A HREF=\"#badurl\">Failed External URLs</A>\n";
	}
	print LOG "</UL></UL></P>\n\n";
	&Get_Refs($InFile,"<none>");
	unless ($ShowOnlyErrors == 1) {
		&Print_List(%FileList,"Local Documents::document");
		&Print_Sizes;
		&Print_List(%DirList,"Directories::directory");
		&Print_List(%ImageFileList,"Images &amp; Binaries::image");
		&Print_List(%ExtFileList,"&quot;File:&quot; Links::file");
		&Print_List(%MailList,"&quot;Mailto:&quot; Links::mailto");
		&Print_List(%FTPList,"&quot;FTP:&quot; Links::ftp");
		&Print_List(%TelnetList,"&quot;Telnet:&quot; Links::telnet");
		&Print_List(%GopherList,"&quot;Gopher:&quot; Links::gopher");
		&Print_List(%NewsList,"&quot;News:&quot; Links::news");
		&Print_List(%HTTPList,"External URLs::url");
	}
	&Print_List(%LostFileList,"Files Not Found::nofile");
	&Print_List(%UnreadableList,"Files Not World Readable::badfile");
	if ($MissingLinks == 1) {
		&PrintMissingLinks;
	}
	&Print_List(%DirNotFoundList,"Directories Not Found::nodirectory");
	&Print_List(%LostAnchorList,"Name Anchors Not Found::noanchor");
	unless ($IgnoreExternals == 1) {
		&Check_External_URLs(%HTTPList);
	}
	print LOG "<HR>\n",
	  "<P ALIGN=CENTER><SMALL>This report was generated by ",
	  "<STRONG><A HREF=\"http://awsd.com/scripts/webtester/\">",
	  "WebTester $version</A></STRONG></SMALL></P>\n",
	  "</BODY></HTML>\n";
	close (LOG);
}

sub SiteMap {
	open (MAP,">$MapFile") || die "Error opening file: $MapFile\n";
	if ($Verbose) { print "Building Map: $SiteName\n"; }
	push (@SiteMap, $InFile);
	$SiteLevel{$InFile} = 1;
	foreach $key (1..10) {
		foreach $filename (@SiteMap) {
			next if ($SiteLevel{$filename} ne $key);
			@references = split(/ /,$RefsTo{$filename});
			foreach $reference (@references) {
				next if ($SiteLevel{$reference});
				next if (!($reference =~ /$LocalPath/)
				  && !($reference =~ /$CGIPath/));
				next if ($Avoid && ($reference =~ /$Avoid/));
				next if (($reference =~ /cgi-bin|cgi$|pl$/i)
				  && !($reference =~ /$ParseCGI/));
				next if ((-B $reference)
				  && !($reference =~ /cgi-bin|cgi$|pl$/i));
				next if (-d $reference);
				next if ($MinLevel{$reference} > ($key+1));
				push (@SiteMap, $reference);
				$SiteLevel{$reference} = $SiteLevel{$filename}+1;
			}
		}
	}
	print MAP "<P><STRONG>\n";
	&BuildMap($InFile);
	print MAP "</P>\n";
	close (MAP);
}

sub BuildMap {
	local(@sortrefs);
	local($lastref);
	$fileref = $_[0];
	$fileref =~ s/$CGIPath/$CGIURL/g;
	$fileref =~ s/$LocalPath/$LocalURL/g;
	unless ($_[0] eq $InFile) { print MAP "<LI>"; }
	print MAP "<A HREF=\"$fileref\">";
	if ($title{$_[0]}) { print MAP "$title{$_[0]}"; }
	else { print MAP "$fileref"; }
	print MAP "</A>";
	if ($update{$_[0]} && $PrintDates) {
		print MAP " - <SMALL>$update{$_[0]}</SMALL>";
	}
	if ($age{$_[0]} && ($age{$_[0]} < $DaysNew)) {
		print MAP " - <STRONG><EM>NEW!</EM></STRONG>";
	}
	print MAP "\n";
	if ($_[0] eq $InFile) { print MAP "</STRONG></P><P>\n"; }
	print MAP "<UL>\n";
	@references = split(/ /,$RefsTo{$_[0]});
	@sortrefs = sort @references;
	foreach $reference (@sortrefs) {
		next if (!($SiteLevel{$reference}));
		next if ($SiteLevel{$reference} ne ($SiteLevel{$_[0]}+1));
		next if ($reference eq $lastref);
		&BuildMap($reference);
		$lastref = $reference
	}
	print MAP "</UL>\n";
}

sub Get_Refs {
	local(%newlist);
	local(%bytesizer);
	local($file);
	local($dir);
	local($Old_Dir);
	local($filename);
	$dir=&Dir_Name($_[0]);
	if ($dir eq "") { $dir = &Get_PWD; }
	$file=&Base_Name($_[0]);
	if ($_[0] =~ m/.*(http:.*)/i) {
		if (!defined($HTTPList{$1})) { $HTTPList{$1} = $_[1]; }
		else { $HTTPList{$1} = "$HTTPList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($_[0] =~ m/.*(ftp:.*)/i) {
		if (!defined($FTPList{$1})) { $FTPList{$1} = $_[1]; }
		else { $FTPList{$1} = "$FTPList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($_[0] =~ m/.*(telnet:.*)/i) {
		if (!defined($TelnetList{$1})) { $TelnetList{$1} = $_[1]; }
		else { $TelnetList{$1} = "$TelnetList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($_[0] =~ m/.*(gopher:.*)/i) {
		if (!defined($GopherList{$1})) { $GopherList{$1} = $_[1]; }
		else { $GopherList{$1} = "$GopherList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($_[0] =~ m/.*(mailto:.*)/i) {
		if (!defined($MailList{$1})) { $MailList{$1} = $_[1]; }
		else { $MailList{$1} = "$MailList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($_[0] =~ m/.*(news:.*)/i) {
		if (!defined($NewsList{$1})) { $NewsList{$1} = $_[1]; }
		else { $NewsList{$1} = "$NewsList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($_[0] =~ m/.*(file:.*)/i) {
		if (!defined($ExtFileList{$1})) { $ExtFileList{$1} = $_[1]; }
		else { $ExtFileList{$1} = "$ExtFileList{$1} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
		return;
	}
	if ($file eq "") {
		if (-d $_[0]) {
			if (!defined($DirList{$_[0]})) { $DirList{$_[0]} = $_[1]; }
			else { $DirList{$_[0]} = "$DirList{$_[0]} $_[1]"; }
			if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $_[0]; }
			else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $_[0]"; }
		}
		else {
			if (!defined($DirNotFoundList{$_[0]})) {
				$DirNotFoundList{$_[0]} = $_[1];
			}
			else {
				$DirNotFoundList{$_[0]} = "$DirNotFoundList{$_[0]} $_[1]";
			}
			if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $_[0]; }
			else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $_[0]"; }
			return;
		}
	}
	$Old_Dir = &Get_PWD;
	chdir($dir);
	$dir = &Get_PWD;
	if ($_[0] =~ m#(^/cgi-bin/.*)#i) {
		$filename = $_[0];
		$filename =~ s#^/cgi-bin#$CGIPath#oi;
	}
	else { $filename = $dir . $file; }
	return if ($filename eq $_[1]);
	if ($filename =~ m/(.+)#(.+)/) {
		$filename = "$1#$2";
		if (!(&CheckAnchor($1,$2))) {
			if (!defined($LostAnchorList{$filename})) {
				$LostAnchorList{$filename} = $_[1]; 
			}
			else {
				$LostAnchorList{$filename} = "$LostAnchorList{$filename} $_[1]";
			}
			if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $filename; }
			else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $filename"; }
		}
		return;
	}
	if (-d $filename) {
		$found = 0;
		foreach $default_file (@default_files) {
			if ((-f ($filename.'/'.$default_file)) || (-f ($filename.$default_file))) {
				$dirname=$filename;
				$file= $default_file;
				$found = 1;
				last;
			}
		}
		if (! $found) {
			if (!defined($LostFileList{$filename})) {
				$LostFileList{$filename} = $_[1]; 
			}
			else {
				$LostFileList{$filename} = "$LostFileList{$filename} $_[1]";
			}
			if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $filename; }
			else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $filename"; }
			return;
		}
		chdir($dirname);
		$dir = &Get_PWD;
		$filename = $dir . $file;
	}
	$fileseen{$filename} = 1;
	if (! -f _) {
		if (!defined($LostFileList{$filename})) {
			$LostFileList{$filename} = $_[1]; 
		}
		else {
			$LostFileList{$filename} = "$LostFileList{$filename} $_[1]";
		}
		if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $filename; }
		else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $filename"; }
		return;
	}
	if (-B _) {
		if (!defined($ImageFileList{$filename})) { $ImageFileList{$filename} = $_[1]; }
		else { $ImageFileList{$filename} = "$ImageFileList{$filename} $_[1]"; }
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $filename; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $filename"; }
		return;
	}
	if (!defined($FileList{$filename})) {
		$FileList{$filename} = $_[1];
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $filename; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $filename"; }
	}
	else {
		$FileList{$filename} = "$FileList{$filename} $_[1]";
		if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $filename; }
		else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $filename"; }
		return;
	}
	$mtime = (stat($filename))[9];
	if ($mtime > 0) {
		($mday,$mon,$yr) = (localtime($mtime))[3,4,5];
		$yr += 1900;
		$update{$filename} = "$mday $months[$mon] $yr";
		$age{$filename} = (-M $filename);
	}
	$mode = (stat($filename))[2];
	$readmode = ($mode & 4);
	if ($readmode == 0) {
		if (!defined($UnreadableList{$filename})) {
			$UnreadableList{$filename} = $_[1];
		}
		else {
			$UnreadableList{$filename} = "$UnreadableList{$filename} $_[1]";
		}
	}
	return if ($Avoid && ($filename =~ /$Avoid/));
	return unless (($filename =~ /$LocalPath/) | ($filename =~ /$CGIPath/));
	return if (($filename =~ /cgi-bin|cgi$|pl$/i) && !($filename =~ /$ParseCGI/));
	return if ((-B $filename) && !($reference =~ /cgi-bin|cgi$|pl$/i));
	$err = 0;
	if ($filename =~ /cgi-bin|cgi$|pl$/i) {
		$( = $nbdy_gid;
		$) = $nbdy_gid;
		$< = $nbdy_uid;
		$> = $nbdy_uid;
		$cgifile = $filename;
		$cgifile =~ s/$CGIPath/$CGIURL/g;
		$cgifile =~ s/$LocalPath/$LocalURL/g;
		$ENV{'HTTP_USER_AGENT'} = 'Mozilla/3.0';
		$ENV{'SCRIPT_NAME'} = $cgifile;
		open(HTML, "$filename |") || ($err = 1);
	}
	else {
		open(HTML, $filename) || ($err = 1);
	}
	if ($err) {
		if ($filename =~ /cgi-bin|cgi$|pl$/i) {
			$< = 0;
			$> = 0;
			$( = 0;
			$) = 0;
		}
		$fileseen{$filename} = 0;
		return;
	}
	open(TEMP,">$temp_file") || die "Could not create file: $temp_file\n";
	if ($Verbose) { print "  Reading File: $filename\n"; }
	$offset=0;
	do {
		$size=read(HTML,$html_text,32768,$offset);
		$offset=$offset+$size;
	} until $size != 32768;
	$bytesize{$filename} = $offset;
	close(HTML);
	if ($filename =~ /cgi-bin|cgi$|pl$/i) {
		$< = 0;
		$> = 0;
		$( = 0;
		$) = 0;
	}
	$html_text =~ s/\n/ /g;
	if ($html_text =~ /<TITLE>([^<>]+)<\/TITLE>/i) { $title{$filename} = $1; }
	$html_text =~ s/[^<]*//;
	$html_text =~ s/(<[^>]*>)[^<]*/$1\n/g;
	print TEMP "$html_text";
	$html_text="";
	close(TEMP);
	open(HTML, $temp_file) || die "Could not open file: $temp_file\n";
	$offset=0;
	while (<HTML>) {
		chop;
		if (/<!--#include\s+(virtual|file)\s*=\s*"*([^"\s]*)"*-->/i) {
			$err=0;
			$SSIkey = $1;
			$SSIfile = $2;
			if ($SSIkey eq 'virtual') { $SSIdir = $RootPath; }
			else { $SSIdir = $dir; }
			$SSIfilename = $SSIdir . $SSIfile;
			open(SSI, $SSIfilename) || ($err=1);
			unless ($err) {
				do {
					$size=read(SSI,$SSI_text,32768,$offset);
					$offset=$offset+$size;
				} until $size != 32768;
				$bytesize{$filename} = $bytesize{$filename}+$offset;
				close(SSI);
				$fileseen{$SSIfilename} = 1;
				$SSI_text =~ s/\n/ /g;
				$SSI_text =~ s/[^<]*//;
				$SSI_text =~ s/(<[^>]*>)[^<]*/$1\n/g;
			}
		}
		if (/<FRAME.+src\s*=\s*"*([^"\s>]*)/i) {
			$err=0;
			$SSIfilename = $dir . $1;
			open(SSI, $SSIfilename) || ($err=1);
			unless ($err) {
				do {
					$size=read(SSI,$SSI_text,32768,$offset);
					$offset=$offset+$size;
				} until $size != 32768;
				$bytesize{$filename} = $bytesize{$filename}+$offset;
				close(SSI);
				$fileseen{$SSIfilename} = 1;
				$SSI_text =~ s/\n/ /g;
				$SSI_text =~ s/[^<]*//;
				$SSI_text =~ s/(<[^>]*>)[^<]*/$1\n/g;
			}
		}
		if (s/.+cgi\s*=\s*"*([^"\s>]*).+/$1/i) {
			$address = $_;
			unless ($address =~ s#^/cgi-bin#MAP $CGIPath#oi) { 
				if ($address =~ m#^/#o) {
					$address = "MAP $RootPath$address";
				}
				else {
					$address = "MAP $RootPath/$address";
				}
			}
			$newlist{$address} = 1;
		}
	}
	close(HTML);
	open(TEMP,">>$temp_file") || die "Could not open file: $temp_file\n";
	print TEMP "$SSI_text";
	print TEMP "$Map_text";
	$SSI_text="";
	$Map_text="";
	close(TEMP);
	open(HTML, $temp_file) || die "Could not open file: $temp_file\n";
	while (<HTML>) {
		chop;
		if (/\/imagemap\/|\/htimage\/|\.map\s*\"/i) {
			$err=0;
			s/.+href\s*=\s*"*([^"\s>]*).+/$1/i;
			$IMapFile = $_;
			if ($ImageMapPointer) {
				$IMapFile =~ s/$ImageMapPointer/$ImageMapPath/;
			}
			elsif ($IMapFile =~ m#^/.*#) {
				$IMapFile = "$LocalPath$IMapFile";
			}
			else {
				$IMapFile = $dir . $IMapFile;
			}
			$IMapFile =~ s/^.*$CGIURL/$CGIPath/;
			$IMapFile =~ s/^.*$LocalURL/$LocalPath/;
			$IMapDir = $IMapFile;
			$IMapDir =~ s/^(.*)\/.*/$1/o;
			open(MAP, "$IMapFile") || ($err=1);
			unless ($err) {
				@MapRefs = <MAP>;
				close (MAP);
				$fileseen{$IMapFile} = 1;
				foreach $ref (@MapRefs) {
					$address = "";
					if ($ref =~ /^.*(rect|circle|poly|default).*\(\d.*\s+([^<>'"\s]+)/i) {
						$address = "MAP $IMapDir$2\n";
					}
					elsif ($ref =~ /^.*(rect|circle|poly|default)\s+([^<>'"\s]+)/i) {
						$address = "MAP $IMapDir$2\n";
					}
					if ($address) { $newlist{$address} = 1; }
				}
			}
		}
		elsif (s/.+href\s*=\s*"*([^"\s>]*).+/$1/i) {
			if (m/^#.*/) {
				$file_w_anchor = $filename;
				$file_w_anchor =~ s#.*/##;
				$newlist{"$file_w_anchor$_"} = 1;
			}
			else {
				$newlist{$_} = 1;
			}
		}
		if (s/<IMG.+src\s*=\s*"*([^"\s>]*).+src\s*=\s*"*([^"\s>]*).+/$1/i) {
			$newlist{$1} = 1;
			$newlist{$2} = 1;
			$bytesizer{$1} = 1;
			$bytesizer{$2} = 1;
		}
		if (s/<IMG.+src\s*=\s*"*([^"\s>]*).+/$1/i) {
			$newlist{$_} = 1;
			$bytesizer{$_} = 1;
		}
		if (s/.+background\s*=\s*"*([^"\s>]*).+/$1/i) {
			$newlist{$_} = 1;
			$bytesizer{$_} = 1;
		}
		if (s/.+action\s*=\s*"*([^"\s>]*).+/$1/i) {
			$newlist{$_} = 1;
		}
	}
	close(HTML);
	chdir($Old_Dir);
	foreach $file (keys(%newlist)) {
		if ($file =~ m#^MAP #) {
			$file =~ s/MAP //;
			$Notlocal_file = $file;
		}
		else {
			if ($file =~ m#^//.*#) {
				$file = "http:" . $file;
			}
			$Notlocal_file = $dir . $file;
			if ($file =~ m#^/cgi-bin/.*#) {
				$Notlocal_file = $file;
			}
			elsif ($file =~ m#^/.*#) {
				$Notlocal_file = "$RootPath$file";
			}
		}
		$Notlocal_file =~ s/^.*$CGIURL/$CGIPath/;
		$Notlocal_file =~ s/^.*$LocalURL/$LocalPath/;
		if ($bytesizer{$file}) {
			$bytesize{$filename} = $bytesize{$filename}+(stat($Notlocal_file))[7];
			$bytesizer{$file} = 0;
		}
		$Notlocal_file =~ s/\?.*$//o;
		$Notlocal_file_ref = $filename;
		&Get_Refs($Notlocal_file, $Notlocal_file_ref);
	}
	unlink($temp_file);
}

sub Base_Name {
	local($local_filename)=$_[0];
	$local_filename =~ s#.*/##;
	$local_filename;
}

sub CheckAnchor {
	$err=0;
	local($fn, $anchor) = @_;
	$result = 0;
	$anchor =~ s/(\W)/\\$1/g;
	if ($fn =~ /cgi-bin|cgi$|pl$/i) {
		$( = $nbdy_gid;
		$) = $nbdy_gid;
		$< = $nbdy_uid;
		$> = $nbdy_uid;
		$cgifile = $fn;
		$cgifile =~ s/$CGIPath/$CGIURL/g;
		$cgifile =~ s/$LocalPath/$LocalURL/g;
		$ENV{'HTTP_USER_AGENT'} = 'Mozilla/3.0';
		$ENV{'SCRIPT_NAME'} = $cgifile;
		open(CH_HTML, "$fn |") || ($err=1);
	}
	else {
		open(CH_HTML, $fn) || ($err=1);
	}
	unless ($err) {
		while (<CH_HTML>) {
			chop;
			if (/<a +name *= *"*$anchor"*/i) {
				$result = 1; 
				last;
			}
		}
		close(CH_HTML);
	}
	if ($fn =~ /cgi-bin|cgi$|pl$/i) {
		$< = 0;
		$> = 0;
		$( = 0;
		$) = 0;
	}
	return $result;
}

sub Dir_Name {
	local($local_filename)=$_[0];
	$local_filename =~ s#.*/##;
	local($local_dirname) = $_[0];
	$local_filename =~ s/(\W)/\\$1/g;
	$local_dirname =~ s/$local_filename$//;
	$local_dirname;
}

sub Get_PWD {
	$_ = `pwd`;
	s#^/tmp_mnt##;
	s#\n##;
	s#([^/])$#$1/#;
	return $_;
}

sub Print_List {
	local(%list,$head) = @_;
	($header, $anchor) = split(/::/, $head);
	local($file);
	print LOG "<A NAME=\"$anchor\"><HR></A>";
	print LOG "<H2 ALIGN=CENTER>$header</H2>\n\n";
	if (! %list) {
		print LOG "<P ALIGN=CENTER>(Nothing to Report.)</P>\n\n";
	}
	else {
		@TheList=keys(%list);
		@SortedList = sort @TheList;
		$lastfileref = "";
		foreach $file (@SortedList) {
			$fileref = $file;
			$fileref =~ s/$CGIPath/$CGIURL/g;
			$fileref =~ s/$LocalPath/$LocalURL/g;
			next if ($fileref eq $lastfileref);
			next if ($Avoid && ($fileref =~ /$Avoid/));
			print LOG "<P><STRONG><A HREF=\"$fileref\">$fileref</A></STRONG>\n";
			if ($title{$file}) { print LOG " ($title{$file})\n"; }
			print LOG "<UL>";
			if ($anchor eq "badurl") {
				$rcode = $list{$file};
				unless (int($rcode) == 0) {
					print LOG "<LI><EM>Status: $rcode ";
					print LOG "($FailStatusMsgs{$rcode})</EM>\n";
				}
				@lost = split(/ /,$HTTPList{$file});
			}
			else { @lost = split(/ /,$list{$file}); }
			if (@lost) {
				@sortlost = sort @lost;
				print LOG "<LI><EM>Local Documents With Links To This File:</EM><UL>\n";
				$lastlostfile = "";
				foreach $lostfile (@sortlost) {
					next if ($lostfile eq "<none>");
					$dup = $lostfile;
					$lostfile =~ s/$CGIPath/$CGIURL/g;
					$lostfile =~ s/$LocalPath/$LocalURL/g;
					next if ($lostfile eq $lastlostfile);
					print LOG "<LI><A HREF=\"$lostfile\">$lostfile</A>\n";
					if ($title{$dup}) { print LOG " ($title{$dup})\n"; }
					$lastlostfile = $lostfile;
				}
				print LOG "</UL>\n";
			}
			@lost = split(/ /,$RefsTo{$file});
			if (@lost) {
				@sortlost = sort @lost;
				print LOG "<LI><EM>Links From This File:</EM><UL>\n";
				$lastlostfile = "";
				foreach $lostfile (@sortlost) {
					$dup = $lostfile;
					$lostfile =~ s/$CGIPath/$CGIURL/g;
					$lostfile =~ s/$LocalPath/$LocalURL/g;
					$lostfile =~ s/$RootPath/$RootURL/g;
					next if ($lostfile eq $lastlostfile);
					next if ((-B $dup) && !($ListBinaryLinks));
					print LOG "<LI><A HREF=\"$lostfile\">$lostfile</A>\n";
					if ($title{$dup}) { print LOG " ($title{$dup})\n"; }
					$lastlostfile = $lostfile;
				}
				print LOG "</UL>\n";
			}
			@lost = split(/ /,$BadRefsTo{$file});
			if (@lost) {
				@sortlost = sort @lost;
				print LOG "<LI><EM>&quot;Broken&quot; Local Links From This File:</EM><UL>\n";
				$lastlostfile = "";
				foreach $lostfile (@sortlost) {
					$dup = $lostfile;
					$lostfile =~ s/$CGIPath/$CGIURL/g;
					$lostfile =~ s/$LocalPath/$LocalURL/g;
					$lostfile =~ s/$RootPath/$RootURL/g;
					next if ($lostfile eq $lastlostfile);
					print LOG "<LI><A HREF=\"$lostfile\">$lostfile</A>\n";
					if ($title{$dup}) { print LOG " ($title{$dup})\n"; }
					$lastlostfile = $lostfile;
				}
				print LOG "</UL>\n";
			}
			$lastfileref = $fileref;
			print LOG "</UL></P>\n\n";
		}
	}
	print LOG "<P ALIGN=CENTER><SMALL>[ <A HREF=\"#index\">";
	print LOG "Return to Index</A> ]</SMALL></P>\n\n";
}

sub Print_Sizes {
	print LOG "<A NAME=\"sizes\"><HR></A><H2 ALIGN=CENTER>";
	print LOG "&quot;Download&quot; File Sizes &amp; Times</H2>\n\n";
	if (! %bytesize) {
		print LOG "<P ALIGN=CENTER>(Nothing to Report.)</P>\n\n";
	}
	else {
		print LOG "<P>This report shows the &quot;download&quot; file size of ",
		  "each file (the total of the size of the HTML document itself, the sizes of any ",
		  "text files included via SSI and the sizes of any embedded graphic images) and ",
		  "provides estimated download times (hh:mm:ss) for each file over (basic) ",
		  "14.4 Kbps, 28.8 Kbps, ISDN and T-1 connections.\n",
		  "<P><CENTER><TABLE BORDER CELLPADDING=3><TR>\n",
		  "<TH ALIGN=LEFT><P>File </TH><TH ALIGN=RIGHT>",
		  "Size (Bytes) </TH><TH ALIGN=RIGHT>14.4 Kbps </TH>",
		  "<TH ALIGN=RIGHT>28.8 Kbps </TH><TH ALIGN=RIGHT>",
		  "ISDN </TH><TH ALIGN=RIGHT>T-1 </TH>\n</TR><TR>\n";
		@TheList=keys(%bytesize);
		@SortedList = sort @TheList;
		$lastfileref = "";
		foreach $file (@SortedList) {
			$fileref = $file;
			$fileref =~ s/$CGIPath/$CGIURL/g;
			$fileref =~ s/$LocalPath/$LocalURL/g;
			next if ($fileref eq $lastfileref);
			print LOG "<TD ALIGN=LEFT><P><STRONG>";
			print LOG "<A HREF=\"$fileref\">$fileref</A></STRONG> ";
			if ($title{$file}) { print LOG "<BR>($title{$file}) "; }
			print LOG "</TD>\n<TD ALIGN=RIGHT>";
			print LOG &commas($bytesize{$file})," </TD>\n";
			$sec_14 = int(($bytesize{$file}/1350)+.5);
			$sec_28 = int(($bytesize{$file}/2700)+.5);
			$sec_isdn = int(($bytesize{$file}/5400)+.5);
			$sec_t1 = int(($bytesize{$file}/150000)+.5);
			$hr_14 = int($sec_14/3600);
			$sec_14 = $sec_14-($hr_14*3600);
			$hr_28 = int($sec_28/3600);
			$sec_28 = $sec_28-($hr_28*3600);
			$hr_isdn = int($sec_isdn/3600);
			$sec_isdn = $sec_isdn-($hr_isdn*3600);
			$hr_t1 = int($sec_t1/3600);
			$sec_t1 = $sec_t1-($hr_t1*3600);
			$min_14 = int($sec_14/60);
			$sec_14 = $sec_14-($min_14*60);
			$min_28 = int($sec_28/60);
			$sec_28 = $sec_28-($min_28*60);
			$min_isdn = int($sec_isdn/60);
			$sec_isdn = $sec_isdn-($min_isdn*60);
			$min_t1 = int($sec_t1/60);
			$sec_t1 = $sec_t1-($min_t1*60);
			if ($sec_14 < 10) { $sec_14 = "0".$sec_14; }
			if ($min_14 < 10) { $min_14 = "0".$min_14; }
			if ($hr_14 < 10) { $hr_14 = "0".$hr_14; }
			if ($sec_28 < 10) { $sec_28 = "0".$sec_28; }
			if ($min_28 < 10) { $min_28 = "0".$min_28; }
			if ($hr_28 < 10) { $hr_28 = "0".$hr_28; }
			if ($sec_isdn < 10) { $sec_isdn = "0".$sec_isdn; }
			if ($min_isdn < 10) { $min_isdn = "0".$min_isdn; }
			if ($hr_isdn < 10) { $hr_isdn = "0".$hr_isdn; }
			if ($sec_t1 < 10) { $sec_t1 = "0".$sec_t1; }
			if ($min_t1 < 10) { $min_t1 = "0".$min_t1; }
			if ($hr_t1 < 10) { $hr_t1 = "0".$hr_t1; }
			$time_14 = $hr_14.":".$min_14.":".$sec_14;
			$time_28 = $hr_28.":".$min_28.":".$sec_28;
			$time_isdn = $hr_isdn.":".$min_isdn.":".$sec_isdn;
			$time_t1 = $hr_t1.":".$min_t1.":".$sec_t1;
			print LOG "<TD ALIGN=RIGHT>$time_14 </TD>\n",
			  "<TD ALIGN=RIGHT>$time_28 </TD>\n",
			  "<TD ALIGN=RIGHT>$time_isdn </TD>\n",
			  "<TD ALIGN=RIGHT>$time_t1 </TD>\n",
			  "</TR><TR>\n";
			$lastfileref = $fileref;
		}
		print LOG "</TR></TABLE></CENTER></P>\n";
	}
	print LOG "<P ALIGN=CENTER><SMALL>[ <A HREF=\"#index\">";
	print LOG "Return to Index</A> ]</SMALL></P>\n\n";
}

sub commas {
	local($_)=@_;
	1 while s/(.*\d)(\d\d\d)/$1,$2/;
	$_;
}

sub Check_External_URLs {
	local(%list) = @_;
	local($URL);
	@TheList=keys(%list);
	@SortedList = sort @TheList;
	foreach $URL (@SortedList) {
		if (defined($HTTPStatusList{$URL})) {
			next;
		}
		else {
			$rcode = &Check_URL($URL);
		}
		if (!(defined($OkStatusMsgs{$rcode}))) {
			$HTTP_Fail_List{$URL} = $rcode;
		}
	}
	&Print_List(%HTTP_Fail_List, "Failed External URLs::badurl");
}

sub Check_URL {
	local($URL) = @_;
	if ($URL !~ m#^http://.*#i) { 
		return;
	}
	else {
		if ($URL =~ m#^http://([\w-\.]+):?(\d*)($|/(.*))#) {
			$host = $1;
			$port = $2;
			$path = $3;
		}
		if ($path eq "") { $path = '/'; }
		if ($port eq "") { $port = 80; }
		$path =~ s/#.*//;
	}
	if ($Verbose) { print "  Checking URL: $URL\n"; }
	$AF_INET = 2;
	$SOCK_STREAM = 1;
	$sockaddr = 'S n a4 x8';
	chop($hostname = `hostname`);
	($name,$aliases,$proto) = getprotobyname('tcp');
	($name,$aliases,$port) = getservbyname($port,'tcp') unless $port =~ /^\d+$/;
	($name,$aliases,$type,$len,$thisaddr) = gethostbyname($hostname);
	if (!(($name,$aliases,$type,$len,$thataddr) = gethostbyname($host))) {
		return -1;
	}
	$this = pack($sockaddr, $AF_INET, 0, $thisaddr);
	$that = pack($sockaddr, $AF_INET, $port, $thataddr);
	if (!(socket(S, $AF_INET, $SOCK_STREAM, $proto))) {
		$SOCK_STREAM = 2;
		if (!(socket(S, $AF_INET, $SOCK_STREAM, $proto))) { return -2; }
	}
	if (!(bind(S, $this))) {
		return -3;
	}
	if (!(connect(S,$that))) {
		return -4;
	}
	select(S); $| = 1; select(STDOUT);
	print S "HEAD $path HTTP/1.0\n\n";
	$response = <S>;
	($protocol, $status) = split(/ /, $response);
	close(S);
	return $status;
}

sub PrintMissingLinks {
	local($file);
	$missinglinkcount = 0;
	print LOG "<A NAME=\"missinglink\"><HR></A>";
	print LOG "<H2 ALIGN=CENTER>Files Found But Not Referenced</H2>\n\n";
	&find ($LocalPath);
	@SortedList = sort @AllFiles;
	foreach $file (@SortedList) {
		unless (($fileseen{$file}) || (-d $file) || ($Avoid && ($file =~ /$Avoid/))) {
			$missinglinkcount ++;
			$fileref = $file;
			$fileref =~ s/$LocalPath/$LocalURL/g;
			print LOG "<P><STRONG><A HREF=\"$fileref\">$fileref</A></STRONG>\n";
			if ($title{$file}) { print LOG " ($title{$file})\n"; }
		}
	}
	if ($missinglinkcount < 1) {
		print LOG "<P ALIGN=CENTER>(Nothing to Report.)";
	}
	print LOG "</P>\n\n";
	print LOG "<P ALIGN=CENTER><SMALL>[ <A HREF=\"#index\">";
	print LOG "Return to Index</A> ]</SMALL></P>\n\n";
}

sub wanted {
	(push (@AllFiles, $name)) && -f $_;
}

1;


