From: Nicolas Boisselier Date: Sun, 16 Apr 2023 21:17:07 +0000 (+0200) Subject: html2txt(): use html_unescape on return text X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=8e2fa385a1a9a78ca5eea076ff79bd4259cee13b;p=nb.git html2txt(): use html_unescape on return text --- diff --git a/bin/html2csv b/bin/html2csv index 5709044e..a3367662 100755 --- a/bin/html2csv +++ b/bin/html2csv @@ -74,7 +74,8 @@ my $exp1 = $Opt{exp1}; $exp1 and $exp1 = "[^>]*$exp1"; my $exp2 = $Opt{exp2}; $exp2 and $exp2 = "[^>]*$exp2"; my $exp3 = $Opt{exp3}; $exp3 and $exp3 = "[^>]*$exp3"; -for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) { +for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) +{ $table_num++; #warn $table_num; next if defined $Opt{'num'} and $Opt{num} and $Opt{num} != $table_num; @@ -86,14 +87,17 @@ for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) { $tr = "<>$tr" unless $T3; my $count = 0; - for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) { + for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) + { $td = html2txt($td) unless $Opt{html}; $_ = chr(194).chr(160); $td =~ s/$_/ /g; $td =~ s/\s+/ /g; $td = str_trim($td); $count++; + push(@col,$td); - if (1 and $COUNT and $count > $COUNT) { + if ($COUNT and $count > $COUNT) + { print join($Opt{sep},@col)."\n"; @col = (); } @@ -201,7 +205,7 @@ __DATA__ =head1 NAME -$NAME - Script to extract html table into csv +$NAME - Script to print html table into csv =head1 SYNOPSIS diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index cd3a0a69..9e94d0da 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -529,7 +529,7 @@ return $db; sub html2txt { -my $v = shift @_; + my $v = shift @_; # New line $v =~ s, @@ -561,6 +561,7 @@ my $v = shift @_; $v =~ s/[\f ]+/ /g; &str_trim($v); + $v = &NB::Functions::html_unescape($v); return $v;