From 47d34a14d63396951c176a0ffb8406002621cf56 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Sun, 4 Jun 2023 21:04:08 +0200 Subject: [PATCH] fix html entities --- lib/perl/NB/Functions.pm | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 75d14cde..aa43fd9b 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -554,11 +554,18 @@ sub html2txt { ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag - |( (<|<)!\[CDATA\[ ) # cdata begin - | ( \]\](>|>) ) # cdata end + # NB 04.06.23: NO we want entities !!! + # NB 04.06.23 |( (<|<)!\[CDATA\[ ) # cdata begin + # NB 04.06.23 | ( \]\](>|>) ) # cdata end + |( ) # cdata end + + # NB 04.06.23: NO we want entities !!! + # NB 04.06.23 |( (<|<)/\w+\s*$ ) # incomplet html + # NB 04.06.23 |( ^(<|<)\w+ .* ) # incomplet html + |( ]+>) |([\w_-]+="[^"]+") # attrs |( <\w[^>]+> ) # tags -- 2.47.3