From: Nicolas Boisselier <nicolas.boisselier@gmail.com>
Date: Sun, 16 Apr 2023 21:17:07 +0000 (+0200)
Subject: html2txt(): use html_unescape on return text
X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=8e2fa385a1a9a78ca5eea076ff79bd4259cee13b;p=nb.git

html2txt(): use html_unescape on return text
---

diff --git a/bin/html2csv b/bin/html2csv
index 5709044e..a3367662 100755
--- a/bin/html2csv
+++ b/bin/html2csv
@@ -74,7 +74,8 @@ my $exp1 = $Opt{exp1}; $exp1 and $exp1 = "[^>]*$exp1";
 my $exp2 = $Opt{exp2}; $exp2 and $exp2 = "[^>]*$exp2";
 my $exp3 = $Opt{exp3}; $exp3 and $exp3 = "[^>]*$exp3";
 
-for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) {
+for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi)
+{
   $table_num++;
   #warn $table_num;
   next if defined $Opt{'num'} and $Opt{num} and $Opt{num} != $table_num;
@@ -86,14 +87,17 @@ for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) {
 
   	$tr = "<>$tr</>" unless $T3;
   	my $count = 0;
-    for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) {
+    for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi)
+    {
       $td = html2txt($td) unless $Opt{html};
       $_ = chr(194).chr(160); $td =~ s/$_/ /g;
       $td =~ s/\s+/ /g;
       $td = str_trim($td);
       $count++;
+
       push(@col,$td);
-      if (1 and $COUNT and $count > $COUNT) {
+      if ($COUNT and $count > $COUNT)
+      {
     		print join($Opt{sep},@col)."\n";
     		@col = ();
       }
@@ -201,7 +205,7 @@ __DATA__
 
 =head1 NAME
 
-$NAME - Script to extract html table into csv
+$NAME - Script to print html table into csv
 
 =head1 SYNOPSIS
 
diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm
index cd3a0a69..9e94d0da 100644
--- a/lib/perl/NB/Functions.pm
+++ b/lib/perl/NB/Functions.pm
@@ -529,7 +529,7 @@ return $db;
 
 sub html2txt {
 
-my $v = shift @_;
+	my $v = shift @_;
 
   # New line
 	$v =~ s,
@@ -561,6 +561,7 @@ my $v = shift @_;
 
 	$v =~ s/[\f ]+/ /g;
 	&str_trim($v);
+	$v = &NB::Functions::html_unescape($v);
 
 	return $v;