#!/usr/bin/perl -w
use strict;
my($code, $short, $explanation);
my $mode = 'code';
my $DEBUG = 0;
while (<>) {
if ( /^\s*
\s*$/i ) {
($code, $short, $explanation) = ('', '', '');
} elsif ( /^\s*]*>(\d+)(\s| )*<\/td>\s*$/i ) {
$code = $1;
warn "found code $code" if $DEBUG;
$mode = 'short';
} elsif ( /^\s* | ]*>(\d+(\s| )*\-\s*\d+)<\/td>\s*$/i ){
warn "skipping range $1" if $DEBUG;
until ( ($_=<>) =~ /^\s*<\/tr>\s*$/i ) {
#nop
}
next;
} elsif ( /^\s* | ]*>(.*)<\/td>\s*$/i ) {
warn "found one line data" if $DEBUG;
if ( $mode eq 'short' ) {
$short = $1;
$short =~ s/<\/?FONT[^>]*>//gi;
$short =~ s/ / /g;
$short =~ s/<\/?a[^>]*>//gi;
$mode = 'explanation';
} elsif ( $mode eq 'explanation' ) {
$explanation = $1;
$explanation =~ s/<\/?FONT[^>]*>//gi;
$explanation =~ s/ / /g;
$mode = 'code';
} else {
die "found (one-line) data, but in unknown mode $mode";
}
} elsif ( /^\s* | ]*>(.*)$/i ) {
warn "found multi-line data (mode $mode)" if $DEBUG;
chop(my $data = $1);
#$data =~ s/<\/?FONT[^>]*>//g;
until ( ($_=<>) =~ /^\s*(.*)<\/td>/i ) {
/^\s*(.*)\s*$/ or die;
chop($data .= $1);
warn "found intermediate data $1" if $DEBUG;
}
$_ =~ /^\s*(.*)<\/td>/i;
$data .= $1;
$data =~ s/<\/?FONT[^>]*>//gi;
$data =~ s/ / /g;
$data =~ s/<\/?[BI]>//gi;
$data =~ s/<\/?BR>/ /gi;
$data =~ s/<\/?a[^>]*>//gi;
warn "last line $1 ($_)" if $DEBUG;
warn "coalesced multi-line data: $data" if $DEBUG;
if ( $mode eq 'short' ) {
$short = $data;
$mode = 'explanation';
} elsif ( $mode eq 'explanation' ) {
$explanation = $data;
$mode = 'code';
} elsif ( $mode eq 'code' && $data =~ /^(\d+)$/ ) {
$code = $1;
warn "found code $code" if $DEBUG;
$mode = 'short';
} else {
die "found (multi-line) data, but in unknown mode $mode or don't know what to do with it: $data";
}
} elsif ( /^\s*<\/tr>\s*$/i ) {
#$short =~ s/<\/?FONT[^>]*>//g;
#$explanation =~ s/<\/?FONT[^>]*>//g;
#$short =~ s/[\n\r]//;
#$explanation =~ s/[\n\r]//;
$short =~ s/"/\\"/gi;
$explanation =~ s/"/\\"/gi;
warn "end of row, printing hash element (code $code)" if $DEBUG;
print qq! "$code" => \[ "$short", "$explanation" \],\n!
unless $short =~ /^\s*not\s*used\s*/i;
$mode = 'code';
}
}
|