#! D:\perl\bin\perl.exe ############################################################################### ############################################################################### # # To insert image (IMG) alternative (ALT) text into an HTML page # from a plain text file. # # Version 3 # # by Andrew Hardwick, http://duramecho.com, 2002/4/2-4 # # Released under GNU Public Licence. # ############################################################################### ############################################################################### # # How To Use # # Run from a command line with the paths (relative to the current directory # of the HTML file & the file of ALT texts as the arguements. # Output will be to a file with the same name as the source file with # '.AltText.txt' appended. # ALT text input format is a file of plain ASCII text. For each image, # there should be an entry consisting of the image file path (as it appears # in the IMG SRC tags in the HTML file) followed by a line break and the # ALT text for that image then two line breaks (i.e. a blank line terminates) # each record. # ############################################################################### # # Known Deficiencies # # The input format cannot cope with double line breaks in ALT text because # that is used as a record separator. # If the same image file is accessed by two different paths (e.g. relative & # absolute) then it will need two separate entries in the ALT text file. # It simply converts double quotes to single quotes to avoid conflict with # those surrounding the ALT text in the HTML instead of more intelligently # escaping them. # ############################################################################### ############################################################################### # Include libraries use Cwd; # To find current directory use strict; # Disenable automatic variables # File-global variables my %AltTexts; # Look-up table of image ALT texts verses SRC paths ############################################################################### # Main rountine ############################################################################### { # Get ALT text data from file my $AltTextFile=cwd().'/'.$ARGV[1]; open(ALTTEXTFILE,'<'.$AltTextFile)or die("Cannot open $AltTextFile to read."); my $AltTextData; read ALTTEXTFILE,$AltTextData,-s $AltTextFile; close ALTTEXTFILE; # Parse ALT text data my @AltTextDataRecords=split("\n\n",$AltTextData); foreach my $AltTextDataRecord (@AltTextDataRecords) { # Extract ALT text $AltTextDataRecord=~/^(.*?)\n(.*?)$/s or die("Invalid record: $AltTextDataRecord."); $AltTexts{$1}=$2; # Replace double quotes which would conflict with those demarking text $AltTexts{$1}=~s/\"/\'/g;} # Get HTML data from file my $HtmlFile=cwd().'/'.$ARGV[0]; open(HTMLFILE,'<'.$HtmlFile)or die("Cannot open $HtmlFile to read."); my $Html; read HTMLFILE,$Html,-s $HtmlFile; close HTMLFILE; # Insert ALT text in all IMG elements print $Html=~s/()/AddAltText($1)/iseg. "images processed.\n"; # Put HTML data back in the file my $To=cwd().'/'.$ARGV[0].'AltText.txt'; open(HTMLFILE,'>'.$HtmlFile)or die("Cannot open $HtmlFile to write."); print HTMLFILE $Html; close HTMLFILE;} ############################################################################### # Add ALT text to an image element ############################################################################### # Parameter: # The IMG element (including the <...> triangular brackets). # Returns: # The element with ALT text inserted (or unaltered if none available). ############################################################################### sub AddAltText { my $ImgElement=$_[0]; # Find SRC tag $ImgElement=~/SRC\s*=\s*([\"\'])(.*?)\1/is; my $Src=$2; # Abort if no text available unless(exists($AltTexts{$Src})) { print "Warning: No ALT text for $Src\n"; return $ImgElement;} # Remove old ALT text $ImgElement=~s/\s*ALT\s*=\s*([\"\']).*?\1//isg; # Add new ALT text $ImgElement=~s/>/ ALT=\"$AltTexts{$Src}\">/isg; return $ImgElement;} ###############################################################################