#!C:\perl\bin\perl.exe -w use strict ; use File::Find ; #my $dirname = 'C:\temp\dotrose\html\etext\sermons' ; my $dirname = '.' ; my $head = ' ' ; my $foot = ' ' ; find(\&process_directory, $dirname) ; sub process_directory { return 1 unless ( $_ =~ /html?$/ ) ; print "\tProcessing File: $_\n" ; my ( $infile, $outfile ) ; $infile = $outfile = $_ ; $outfile =~ s/html?$/php/i ; open (IN, "<$infile") or die "Problem opening $infile: $!\n" ; open (OUT, ">$outfile") or die "Problem opening $outfile: $!\n" ; my $out = '' ; # Make the file one long string while ( my $line = ) { # Remove the end of line character chomp $line ; # Replace it with a blank space $line .= ' ' ; # And append it to the rest of the document $out .= $line ; } # REGEXes go here to change what wants changing $out =~ s/]*>/

/ig ; $out =~ s/]*>//ig ; $out =~ s/<\/h([0-9]>)/<\/h$1\n/ig ; $out =~ s/<\/?font[^>]*>//ig ; $out =~ s/<\/?meta[^>]*>//ig ; $out =~ s/<\/?span[^>]*>//ig ; $out =~ s/<\/?small[^>]*>//ig ; $out =~ s/<\/?large[^>]*>//ig ; $out =~ s/]*>//ig ; $out =~ s/]*>//ig ; $out =~ s/\s<\/strong>(\w)/<\/strong> $1/gi ; $out =~ s/<(\/?)i>/<$1em>/ig ; $out =~ s/\s<\/em>(\w)/<\/em> $1/gi ; $out =~ s/<\/em>//ig ; $out =~ s/<\/strong>//ig ; $out =~ s/<\/u>//ig ; $out =~ s/<(\/?)b>/<$1strong>/ig ; $out =~ s/<\/?o:[^>]*>//ig ; $out =~ s/<\/?html[^>]*>//ig ; $out =~ s/<\/?head[^>]*>//ig ; $out =~ s/<\/?body[^>]*>//ig ; $out =~ s/<\/?title[^>]*>/\n/ig ; $out =~ s/style="[^"]*"//ig ; $out =~ s/<\!DOCTYPE[^>]*>// ; $out =~ s/\.html?/.php/ig ; $out =~ s/
\w<\/p>/<\/p>/ig ; # $out =~ s/href="http:\/\/.*\/([a-zA-Z0-9%]*\.php)"/href="$1"/ig ; $out =~ s/http:\/\/www.jonathanedwards.com\///gi ; # Remove multiple spaces throughout document $out =~ s/\s\s*/ /g ; # Finally, add back some "end of line" characters $out =~ s/(

)/<\/p>\n\n$1/ig ; $out =~ s/

\s*/

/ig ; $out =~ s/\s*<\/p>/<\/p>/ig ; $out =~ s/<\/p><\/p>/<\/p>/ig ; $out =~ s/(<\/div>)/$1\n/ig ; $out =~ s/
(.)/
\n$1/g ; # If any lines start with whitespace, remove it $out =~ s/^\s// ; print OUT $head or die "Printing Head: $!\n" ; print OUT $out or die "Printing: $!\n" ; print OUT $foot or die "Printing Foot: $!\n" ; close(OUT) or die "Closing: $!\n" ; close(IN) or die "Closing: $!\n" ; unlink $infile or die "Could Not Delete: $infile: $!\n" ; return 1 ; }