From 699461386cdfcbddfa37967a85bebf98c1e26abb Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Sun, 4 Jun 2023 20:08:49 +0200 Subject: [PATCH] html2txt fix delete entities --- lib/perl/NB/Functions.pm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 9e94d0da..ff275047 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -546,8 +546,12 @@ sub html2txt { # Delete $v =~ s, - ( (<|<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|>) ) - |( (<|<)?\w+(\s+\w+=\S+)*/?(>|>) ) + # NB 04.06.23: NO we want entities !!! + # NB 04.06.23 ( (<|<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|>) ) + # NB 04.06.23 |( (<|<)?\w+(\s+\w+=\S+)*/?(>|>) ) + + ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag + |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag |( (<|<)!\[CDATA\[ ) # cdata begin | ( \]\](>|>) ) # cdata end -- 2.47.3