#! \usr\bin\perl.exe

###############################################################################
###############################################################################
#
# To remove external links to images from downloaded web pages
#
#	Version 3
#
#	by Andrew Hardwick, http://duramecho.com
#
#	Released under GNU Public Licence.
#
###############################################################################
###############################################################################

use File::Find;
use Cwd;

###############################################################################
#
# Use:
#  To get rid of external image links (and anything else specified by a
#  'src' attribute) from downloaded web pages which would otherwise annoyingly
#  cause a download attempt to be made (possibly with problems like automatic
#  modem dialing) when they are viewed in a browser. These images are typically
#  just adverts & counters anyway.
# How to use:
#  Just run it. It will automatically process all *.htm & *.html files in
#  current directory and below.
#
###############################################################################
# Version 1, 2002/2/8
#  Original.
# Version 2, 2002/4/8
# Version 3, 2004/4/14
#  Made it skip writing unchanged files (faster & does not unnecessary
#   mess up file dates).
#  Added paranoid check for the case for directories accidentally
#   named *.html.
###############################################################################

# Find files
my @FilesToProcess;
find(	sub
		{	if(-f($_)&&$_=~/.*.html?$/i)
			{	push(@FilesToProcess,$File::Find::name);}},
		cwd());
print "Number of files=".scalar(@FilesToProcess)."\n";
# Alter file contents
print "Processing:\n";
my $TotalCount=0;
foreach (@FilesToProcess)
{	print " $_\n";
	# Read in whole file contents
	open (FILETOEDIT,'<'.$_) || die "cannot open $_ to read";
	read FILETOEDIT, $FileContents, -s $_;
	close FILETOEDIT;
	# Remove links
	my $Count=0;
	$Count+=$FileContents=~s/src\s*\=\s*\"http\:\/\/.*?\"/src\=\"\"/gsi;
	$Count+=$FileContents=~s/src\s*\=\s*\'http\:\/\/.*?\'/src\=\'\'/gsi;
	$Count+=$FileContents=~s/src\s*\=\s*\"ftp\:\/\/.*?\"/src\=\"\"/gsi;
	$Count+=$FileContents=~s/src\s*\=\s*\'ftp\:\/\/.*?\'/src\=\'\'/gsi;
	# Put results back in the file if there have been changes
	if($Count)
	{	print "  Removed $Count links.\n";
		open (FILETOEDIT,'>'.$_) || die "cannot open $_ to write";
		print FILETOEDIT $FileContents;
		close FILETOEDIT;
		$TotalCount+=$Count;}}
print "Total removed = $TotalCount";

###############################################################################

