#! \usr\bin\perl.exe ############################################################################### ############################################################################### # # To remove external links to images from downloaded web pages # # Version 3 # # by Andrew Hardwick, http://duramecho.com # # Released under GNU Public Licence. # ############################################################################### ############################################################################### use File::Find; use Cwd; ############################################################################### # # Use: # To get rid of external image links (and anything else specified by a # 'src' attribute) from downloaded web pages which would otherwise annoyingly # cause a download attempt to be made (possibly with problems like automatic # modem dialing) when they are viewed in a browser. These images are typically # just adverts & counters anyway. # How to use: # Just run it. It will automatically process all *.htm & *.html files in # current directory and below. # ############################################################################### # Version 1, 2002/2/8 # Original. # Version 2, 2002/4/8 # Version 3, 2004/4/14 # Made it skip writing unchanged files (faster & does not unnecessary # mess up file dates). # Added paranoid check for the case for directories accidentally # named *.html. ############################################################################### # Find files my @FilesToProcess; find( sub { if(-f($_)&&$_=~/.*.html?$/i) { push(@FilesToProcess,$File::Find::name);}}, cwd()); print "Number of files=".scalar(@FilesToProcess)."\n"; # Alter file contents print "Processing:\n"; my $TotalCount=0; foreach (@FilesToProcess) { print " $_\n"; # Read in whole file contents open (FILETOEDIT,'<'.$_) || die "cannot open $_ to read"; read FILETOEDIT, $FileContents, -s $_; close FILETOEDIT; # Remove links my $Count=0; $Count+=$FileContents=~s/src\s*\=\s*\"http\:\/\/.*?\"/src\=\"\"/gsi; $Count+=$FileContents=~s/src\s*\=\s*\'http\:\/\/.*?\'/src\=\'\'/gsi; $Count+=$FileContents=~s/src\s*\=\s*\"ftp\:\/\/.*?\"/src\=\"\"/gsi; $Count+=$FileContents=~s/src\s*\=\s*\'ftp\:\/\/.*?\'/src\=\'\'/gsi; # Put results back in the file if there have been changes if($Count) { print " Removed $Count links.\n"; open (FILETOEDIT,'>'.$_) || die "cannot open $_ to write"; print FILETOEDIT $FileContents; close FILETOEDIT; $TotalCount+=$Count;}} print "Total removed = $TotalCount"; ###############################################################################