<\/center>
/ig;
return $section;
}
# Will convert the oldHDL format to the new HDL format (using the Section tag)
sub convert_to_newHDLformat
{
my $self = shift (@_);
my ($file,$cnfile) = @_;
my $input_filename = $file;
my $tmp_filename = $cnfile;
# write HTML tmp file with new HDL format
open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");
# read in the file and do basic html cleaning (removing header etc)
my $html = "";
$self->HB_read_html_file ($input_filename, \$html);
# process the file one section at a time
my $curtoclevel = 1;
my $firstsection = 1;
my $toclevel = 0;
while (length ($html) > 0) {
if ($html =~ s/^.*?(?:]*>)?((|||\s)*)<<TOC(\d+)>>\s*(.*?)
]*>)?((|||\s)*)<<TOC\d+>>)/$2/i) {
$sectiontext = $1;
} else {
$sectiontext = $html;
$html = "";
}
# remove tags and extra spaces from the title
$title =~ s/<\/?[^>]+>//g;
$title =~ s/^\s+|\s+$//g;
# close any sections below the current level and
# create a new section (special case for the firstsection)
print PROD "\n";
print PROD "\n\n$title\n\n\n";
print PROD "\n";
# clean up the section html
$sectiontext = $self->HB_clean_section($sectiontext);
print PROD "$sectiontext\n";
} else {
print STDERR "WARNING - leftover text\n" , $self->shorten($html),
"\nin $input_filename\n";
last;
}
$firstsection = 0;
}
print PROD "\n";
close (PROD) || die("Error Closing File: $tmp_filename $!");
return $tmp_filename;
}
sub shorten {
my $self = shift (@_);
my ($text) = @_;
return "\"$text\"" if (length($text) < 100);
return "\"" . substr ($text, 0, 50) . "\" ... \"" .
substr ($text, length($text)-50) . "\"";
}
sub convert_tidy_or_oldHDL_file
{
my $self = shift (@_);
my ($file) = @_;
my $input_filename = $file;
if (-d $input_filename)
{
return $input_filename;
}
# get the input filename
my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
my $base_dirname = $dirname;
$suffix = lc($suffix);
# derive tmp filename from input filename
# Remove any white space from filename -- no risk of name collision, and
# makes later conversion by utils simpler. Leave spaces in path...
# tidy up the filename with space, dot, hyphen between
$tailname =~ s/\s+//g;
$tailname =~ s/\.+//g;
$tailname =~ s/\-+//g;
# convert to utf-8 otherwise we have problems with the doc.xml file
# later on
&unicode::ensure_utf8(\$tailname);
# softlink to collection tmp dir
my $tmp_dirname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tidytmp");
&util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
my $test_dirname = "";
my $f_separator = &util::get_os_dirsep();
if ($dirname =~ m/import$f_separator/)
{
$test_dirname = $'; #'
#print STDERR "init $'\n";
while ($test_dirname =~ m/[$f_separator]/)
{
my $folderdirname = $`;
$tmp_dirname = &util::filename_cat($tmp_dirname,$folderdirname);
&util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
$test_dirname = $'; #'
}
}
my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
# tidy or convert the input file if it is a HTML-like file or it is accepted by the process_exp
if (($suffix eq ".htm") || ($suffix eq ".html") || ($suffix eq ".shtml"))
{
#convert the input file to a new style HDL
my $hdl_output_filename = $input_filename;
if ($self->{'old_style_HDL'})
{
$hdl_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
$hdl_output_filename = $self->convert_to_newHDLformat($input_filename,$hdl_output_filename);
}
#just for checking copy all other file from the base dir to tmp dir if it is not exists
opendir(DIR,$base_dirname) or die "Can't open base directory : $base_dirname!";
my @files = grep {!/^\.+$/} readdir(DIR);
close(DIR);
foreach my $file (@files)
{
my $src_file = &util::filename_cat($base_dirname,$file);
my $dest_file = &util::filename_cat($tmp_dirname,$file);
if ((!-e $dest_file) && (!-d $src_file))
{
# just copy the original file back to the tmp directory
copy($src_file,$dest_file) or die "Can't copy file $src_file to $dest_file $!";
}
}
# tidy the input file
my $tidy_output_filename = $hdl_output_filename;
if ($self->{'use_realistic_book'})
{
$tidy_output_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
$tidy_output_filename = $self->tmp_tidy_file($hdl_output_filename,$tidy_output_filename);
}
$tmp_filename = $tidy_output_filename;
}
else
{
if (!-e $tmp_filename)
{
# just copy the original file back to the tmp directory
copy($input_filename,$tmp_filename) or die "Can't copy file $input_filename to $tmp_filename $!";
}
}
return $tmp_filename;
}
# Will make the html input file as a proper XML file with removed font tag and
# image size added to the img tag.
# The tidying process takes place in a collection specific 'tmp' directory so
# that we don't accidentally damage the input.
sub tmp_tidy_file
{
my $self = shift (@_);
my ($file,$cnfile) = @_;
my $input_filename = $file;
my $tmp_filename = $cnfile;
# get the input filename
my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
require HTML::TokeParser::Simple;
# create HTML parser to decode the input file
my $parser = HTML::TokeParser::Simple->new($input_filename);
# write HTML tmp file without the font tag and image size are added to the img tag
open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");
while (my $token = $parser->get_token())
{
# is it an img tag
if ($token->is_start_tag('img'))
{
# get the attributes
my $attr = $token->return_attr;
# get the full path to the image
my $img_file = &util::filename_cat($dirname,$attr->{src});
# set the width and height attribute
($attr->{width}, $attr->{height}) = imgsize($img_file);
# recreate the tag
print PROD "
{$_}"} } keys %$attr;
print PROD ">";
}
# is it a font tag
else
{
if (($token->is_start_tag('font')) || ($token->is_end_tag('font')))
{
# remove font tag
print PROD "";
}
else
{
# print without changes
print PROD $token->as_is;
}
}
}
close (PROD) || die("Error Closing File: $tmp_filename $!");
# run html-tidy on the tmp file to make it a proper XML file
my $outhandle = $self->{'outhandle'};
print $outhandle "Converting HTML to be XML compliant:\n";
my $tidy_cmd = "tidy";
$tidy_cmd .= " -q" if ($self->{'verbosity'} <= 2);
$tidy_cmd .= " -raw -wrap 0 -asxml \"$tmp_filename\"";
if ($self->{'verbosity'} <= 2) {
if ($ENV{'GSDLOS'} =~ m/^windows/i) {
$tidy_cmd .= " 2>nul";
}
else {
$tidy_cmd .= " 2>/dev/null";
}
print $outhandle " => $tidy_cmd\n";
}
my $tidyfile = `$tidy_cmd`;
# write result back to the tmp file
open (PROD, ">$tmp_filename") || die("Error Writing to File: $tmp_filename $!");
print PROD $tidyfile;
close (PROD) || die("Error Closing File: $tmp_filename $!");
# return the output filename
return $tmp_filename;
}
sub associate_cover_image
{
my $self = shift(@_);
my ($doc_obj, $filename) = @_;
if (($self->{'use_realistic_book'}) || ($self->{'old_style_HDL'}))
{
# we will have cover image in tidytmp, but want it from import
$filename =~ s/([\\\/])tidytmp([\\\/])/$1import$2/;
}
$self->SUPER::associate_cover_image($doc_obj, $filename);
}
1;