]> git.nbdom.net Git - nb.git/commitdiff
html2txt fix delete entities
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sun, 4 Jun 2023 18:08:49 +0000 (20:08 +0200)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sun, 4 Jun 2023 18:08:49 +0000 (20:08 +0200)
lib/perl/NB/Functions.pm

index 9e94d0da21f8991425e0f4e82018d4bf72c0ba10..ff275047cae27d1c6d98e2f24a88ce3331fc6b66 100644 (file)
@@ -546,8 +546,12 @@ sub html2txt {
 
   # Delete
        $v =~ s,
-               ( (<|&lt;)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|&gt;) )
-               |( (<|&lt;)?\w+(\s+\w+=\S+)*/?(>|&gt;) )
+       # NB 04.06.23: NO we want entities !!!  
+               # NB 04.06.23 ( (<|&lt;)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|&gt;) )
+               # NB 04.06.23 |( (<|&lt;)?\w+(\s+\w+=\S+)*/?(>|&gt;) )
+
+               ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
+               |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag
 
                |( (<|&lt;)!\[CDATA\[ ) # cdata begin
                | ( \]\](>|&gt;) ) # cdata end