]> git.nbdom.net Git - nb.git/commitdiff
fix html entities
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sun, 4 Jun 2023 19:04:08 +0000 (21:04 +0200)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sun, 4 Jun 2023 19:04:08 +0000 (21:04 +0200)
lib/perl/NB/Functions.pm

index 75d14cde561890ebe1548467cec9f4167a17ceb9..aa43fd9ba6f40d91acd2282f30bec02cd8802cc7 100644 (file)
@@ -554,11 +554,18 @@ sub html2txt {
                ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
                |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag
 
-               |( (<|&lt;)!\[CDATA\[ ) # cdata begin
-               | ( \]\](>|&gt;) ) # cdata end
+       # NB 04.06.23: NO we want entities !!!  
+               # NB 04.06.23 |( (<|&lt;)!\[CDATA\[ ) # cdata begin
+               # NB 04.06.23 | ( \]\](>|&gt;) ) # cdata end
+               |( <!\[CDATA\[ ) # cdata begin
+               |( \]\]> ) # cdata end
+
+       # NB 04.06.23: NO we want entities !!!  
+               # NB 04.06.23 |( (<|&lt;)/\w+\s*$ ) # incomplet html
+               # NB 04.06.23 |( ^(<|&lt;)\w+ .* ) # incomplet html
+               |( </\w+\s*$ ) # incomplet html
+               |( ^<\w+ .* ) # incomplet html
 
-               |( (<|&lt;)/\w+\s*$ ) # incomplet html
-               |( ^(<|&lt;)\w+ .* ) # incomplet html
     |(<\!DOCTYPE[^>]+>)
     |([\w_-]+="[^"]+") # attrs
                |( <\w[^>]+> ) # tags