]> git.nbdom.net Git - nb.git/commitdiff
html2txt fix start and end tags exp
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 2 Dec 2023 15:02:51 +0000 (16:02 +0100)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 2 Dec 2023 15:02:51 +0000 (16:02 +0100)
lib/perl/NB/Functions.pm

index 5289c20f12c2a2fadb28e4727ccf00ffa0c15a76..0ace1c6a9601311a15edffb711fd498eaf9de421 100644 (file)
@@ -553,9 +553,19 @@ sub html2txt {
                |
                ( \\n )
        ,\n,gix;
-       $v =~ s,
-       (<p[^>]*>.*?</p>)
-  ,$1\n,sgix;
+
+       # Text tags
+  # NB 02.12.23 for my $tag (qw(
+       # NB 02.12.23 div
+       # NB 02.12.23 p
+       # NB 02.12.23 span
+       # NB 02.12.23 pre
+       # NB 02.12.23 code
+  # NB 02.12.23 ))
+  # NB 02.12.23 {
+               # NB 02.12.23 my $exp = "<".$tag."[^>]*>(.*?)</$tag>";
+               # NB 02.12.23 $v =~ s,$exp,$1\n,sgi;
+  # NB 02.12.23 }
 
   # Links
        $v =~ s,
@@ -569,6 +579,8 @@ sub html2txt {
   my $tag = '[\w:]';
 
   # Delete
+               # NB 02.12.23 |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
+               # NB 02.12.23 |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag
        $v =~ s,(^_DUMMY_$) # never happend only for easy regexp change order
 
                |( <!--\[if [^\]]+\]>.*?<!\[endif\]--> ) # condition
@@ -578,8 +590,8 @@ sub html2txt {
                |( <noscript[^>]*>.*?</noscript> ) # noscript
                |( <script[^>]*>.*?</script> ) # script
 
-               |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
-               |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag
+               |( (<)${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*(>) ) # start tag
+               |( </${tag}+\s*> ) # end tag
 
                |( <!\[CDATA\[ ) # cdata begin
                |( \]\]> ) # cdata end