From: Nicolas Boisselier Date: Sun, 4 Jun 2023 18:08:49 +0000 (+0200) Subject: html2txt fix delete entities X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=699461386cdfcbddfa37967a85bebf98c1e26abb;p=nb.git html2txt fix delete entities --- diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 9e94d0da..ff275047 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -546,8 +546,12 @@ sub html2txt { # Delete $v =~ s, - ( (<|<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|>) ) - |( (<|<)?\w+(\s+\w+=\S+)*/?(>|>) ) + # NB 04.06.23: NO we want entities !!! + # NB 04.06.23 ( (<|<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|>) ) + # NB 04.06.23 |( (<|<)?\w+(\s+\w+=\S+)*/?(>|>) ) + + ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag + |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag |( (<|<)!\[CDATA\[ ) # cdata begin | ( \]\](>|>) ) # cdata end