perl script to legalize HTML files

Dan Connolly (connolly@pixel.convex.com)
Wed, 15 Jul 92 22:49:21 CDT


#!/usr/local/bin/perl
#
# USE
# fix-html.pl <W3-file.html >W3-file.sgml
#
# SEE ALSO
# the html.dtd.
#

print "<!DOCTYPE HTML SYSTEM>\n";

@html = <>; # read whole file
$_ = join('', @html);

while(/</){
&out($`);
$_ = $';
if(s/^A\s+//i){
&fix_anchor;
}elsif(s/^NEXTID\s+(\d+)\s*>//){
&out("<NEXTID N=$1>");
}else{
&out('<');
}
}

&out($_);

sub out{
print $_[0];
}

sub fix_anchor{
local($name, $href, $type);

# What exactly is the syntax of an SGML attribute value?
while(s/^(\w+)\s*=\s*((\"[^\"]*\")|([^\s>]+))\s*//){
local($v) = ($3 || $4);
local($a) = $1;
$href = $v if $a =~ /^href$/i;
$name = $v if $a =~ /^name$/i;
$type = $v if $a =~ /^type$/i;
}
s/[^>]*>//;

&out("<A");
&out(" NAME=\"$name\"") if $name ne '';
&out(" TYPE=\"$type\"") if $type ne '';
&out(" HREF=\"$href\"") if $href ne '';
&out(">");
}