From: Nicolas Boisselier Date: Sun, 4 Jun 2023 19:04:08 +0000 (+0200) Subject: fix html entities X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=47d34a14d63396951c176a0ffb8406002621cf56;p=nb.git fix html entities --- diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 75d14cde..aa43fd9b 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -554,11 +554,18 @@ sub html2txt { ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag - |( (<|<)!\[CDATA\[ ) # cdata begin - | ( \]\](>|>) ) # cdata end + # NB 04.06.23: NO we want entities !!! + # NB 04.06.23 |( (<|<)!\[CDATA\[ ) # cdata begin + # NB 04.06.23 | ( \]\](>|>) ) # cdata end + |( ) # cdata end + + # NB 04.06.23: NO we want entities !!! + # NB 04.06.23 |( (<|<)/\w+\s*$ ) # incomplet html + # NB 04.06.23 |( ^(<|<)\w+ .* ) # incomplet html + |( ]+>) |([\w_-]+="[^"]+") # attrs |( <\w[^>]+> ) # tags