#!C:\perl\bin\perl.exe -w use strict ; use File::Find ; #my $dirname = 'C:\temp\dotrose\html\etext\sermons' ; my $dirname = '.' ; my $head = ' ' ; my $foot = '
' ; find(\&process_directory, $dirname) ; sub process_directory { return 1 unless ( $_ =~ /html?$/ ) ; print "\tProcessing File: $_\n" ; my ( $infile, $outfile ) ; $infile = $outfile = $_ ; $outfile =~ s/html?$/php/i ; open (IN, "<$infile") or die "Problem opening $infile: $!\n" ; open (OUT, ">$outfile") or die "Problem opening $outfile: $!\n" ; my $out = '' ; # Make the file one long string while ( my $line =]*>/
/ig ;
$out =~ s/ )/<\/p>\n\n$1/ig ;
$out =~ s/ \s*/ /ig ;
$out =~ s/\s*<\/p>/<\/p>/ig ;
$out =~ s/<\/p><\/p>/<\/p>/ig ;
$out =~ s/(<\/div>)/$1\n/ig ;
$out =~ s/]*>//ig ;
$out =~ s/
]*>//ig ;
$out =~ s/\s<\/strong>(\w)/<\/strong> $1/gi ;
$out =~ s/<(\/?)i>/<$1em>/ig ;
$out =~ s/\s<\/em>(\w)/<\/em> $1/gi ;
$out =~ s/<\/em>//ig ;
$out =~ s/<\/strong>//ig ;
$out =~ s/<\/u>//ig ;
$out =~ s/<(\/?)b>/<$1strong>/ig ;
$out =~ s/<\/?o:[^>]*>//ig ;
$out =~ s/<\/?html[^>]*>//ig ;
$out =~ s/<\/?head[^>]*>//ig ;
$out =~ s/<\/?body[^>]*>//ig ;
$out =~ s/<\/?title[^>]*>/\n/ig ;
$out =~ s/style="[^"]*"//ig ;
$out =~ s/<\!DOCTYPE[^>]*>// ;
$out =~ s/\.html?/.php/ig ;
$out =~ s/
\w<\/p>/<\/p>/ig ;
# $out =~ s/href="http:\/\/.*\/([a-zA-Z0-9%]*\.php)"/href="$1"/ig ;
$out =~ s/http:\/\/www.jonathanedwards.com\///gi ;
# Remove multiple spaces throughout document
$out =~ s/\s\s*/ /g ;
# Finally, add back some "end of line" characters
$out =~ s/(
(.)/
\n$1/g ;
# If any lines start with whitespace, remove it
$out =~ s/^\s// ;
print OUT $head or die "Printing Head: $!\n" ;
print OUT $out or die "Printing: $!\n" ;
print OUT $foot or die "Printing Foot: $!\n" ;
close(OUT) or die "Closing: $!\n" ;
close(IN) or die "Closing: $!\n" ;
unlink $infile or die "Could Not Delete: $infile: $!\n" ;
return 1 ;
}