From: Nicolas Boisselier Date: Sat, 29 Jul 2023 15:17:45 +0000 (+0200) Subject: html2txt tag p for new line, tag regex containing : X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=11b9d5c1c978e9d602cf3d0795e43814733201b9;p=nb.git html2txt tag p for new line, tag regex containing : --- diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 6636155c..4a0058e3 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -533,18 +533,6 @@ sub html2txt { my $v = shift @_; # New line - # NB 08.07.23 $v =~ s, - # NB 08.07.23 ( - # NB 08.07.23 (<|<)(tr|li|)(\s+[^>])*> - # NB 08.07.23 ) - # NB 08.07.23 | - # NB 08.07.23 ( - # NB 08.07.23 (<|<)(br|hr)(\s*/\s*)?> - # NB 08.07.23 ) - # NB 08.07.23 | - # NB 08.07.23 ( \\n ) - # NB 08.07.23 ,\n,gix; - $v =~ s, ( <(tr|li|)(\s+[^>])*> @@ -556,15 +544,21 @@ sub html2txt { | ( \\n ) ,\n,gix; + $v =~ s, + (]*>.*?

) + ,$1\n,sgix; # Links $v =~ s, ]+href="([^"]+)"[^>]*>\g1 - ,$1,gix; # when target equal text + ,$1,gix; # when href equal text $v =~ s, ]+href="([^"]+)"[^>]*>([^<]+) ,$2 ( $1 ),gix; + # Make sure that tags match the regexp \w (eg: ) ) # start tag - |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag + |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag + |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag |( ) # cdata end