#! D:\perl\bin\perl.exe

###############################################################################
###############################################################################
#
# To insert image (IMG) alternative (ALT) text into an HTML page
#  from a plain text file.
#
#			Version 3
#
#	by Andrew Hardwick, http://duramecho.com, 2002/4/2-4
#
#	Released under GNU Public Licence.
#
###############################################################################
###############################################################################
#
# How To Use
#
# Run from a command line with the paths (relative to the current directory
#  of the HTML file & the file of ALT texts as the arguements.
# Output will be to a file with the same name as the source file with
#  '.AltText.txt' appended.
# ALT text input format is a file of plain ASCII text. For each image,
#  there should be an entry consisting of the image file path (as it appears
#  in the IMG SRC tags in the HTML file) followed by a line break and the
#  ALT text for that image then two line breaks (i.e. a blank line terminates)
#  each record.
#
###############################################################################
#
# Known Deficiencies
#
# The input format cannot cope with double line breaks in ALT text because
#  that is used as a record separator.
# If the same image file is accessed by two different paths (e.g. relative &
#  absolute) then it will need two separate entries in the ALT text file.
# It simply converts double quotes to single quotes to avoid conflict with 
#  those surrounding the ALT text in the HTML instead of more intelligently
#  escaping them.
#
###############################################################################
###############################################################################

# Include libraries
use Cwd;		# To find current directory
use strict;		# Disenable automatic variables

# File-global variables
my %AltTexts;	# Look-up table of image ALT texts verses SRC paths

###############################################################################
# Main rountine
###############################################################################

{	# Get ALT text data from file
	my $AltTextFile=cwd().'/'.$ARGV[1];
	open(ALTTEXTFILE,'<'.$AltTextFile)or
			die("Cannot open $AltTextFile to read.");
	my $AltTextData;
	read ALTTEXTFILE,$AltTextData,-s $AltTextFile;
	close ALTTEXTFILE;
	# Parse ALT text data
	my @AltTextDataRecords=split("\n\n",$AltTextData);
	foreach my $AltTextDataRecord (@AltTextDataRecords)
	{	# Extract ALT text
		$AltTextDataRecord=~/^(.*?)\n(.*?)$/s or
				die("Invalid record: $AltTextDataRecord.");
		$AltTexts{$1}=$2;
		# Replace double quotes which would conflict with those demarking text
		$AltTexts{$1}=~s/\"/\'/g;}
	# Get HTML data from file
	my $HtmlFile=cwd().'/'.$ARGV[0];
	open(HTMLFILE,'<'.$HtmlFile)or
			die("Cannot open $HtmlFile to read.");
	my $Html;
	read HTMLFILE,$Html,-s $HtmlFile;
	close HTMLFILE;
	# Insert ALT text in all IMG elements
	print $Html=~s/(<IMG\b.*?>)/AddAltText($1)/iseg. "images processed.\n";
	# Put HTML data back in the file
	my $To=cwd().'/'.$ARGV[0].'AltText.txt';
	open(HTMLFILE,'>'.$HtmlFile)or
			die("Cannot open $HtmlFile to write.");
	print HTMLFILE $Html;
	close HTMLFILE;}	
	
###############################################################################
# Add ALT text to an image element
###############################################################################
# Parameter:
#  The IMG element (including the <...> triangular brackets).
# Returns:
#  The element with ALT text inserted (or unaltered if none available).
###############################################################################

sub AddAltText
{	my $ImgElement=$_[0];
	# Find SRC tag
	$ImgElement=~/SRC\s*=\s*([\"\'])(.*?)\1/is;
	my $Src=$2;
	# Abort if no text available
	unless(exists($AltTexts{$Src}))
	{	print "Warning: No ALT text for $Src\n";
		return $ImgElement;}
	# Remove old ALT text
	$ImgElement=~s/\s*ALT\s*=\s*([\"\']).*?\1//isg;
	# Add new ALT text
	$ImgElement=~s/>/ ALT=\"$AltTexts{$Src}\">/isg;
	return $ImgElement;}

###############################################################################
