From 11b9d5c1c978e9d602cf3d0795e43814733201b9 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Sat, 29 Jul 2023 17:17:45 +0200 Subject: [PATCH] html2txt tag p for new line, tag regex containing : --- lib/perl/NB/Functions.pm | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 6636155c..4a0058e3 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -533,18 +533,6 @@ sub html2txt { my $v = shift @_; # New line - # NB 08.07.23 $v =~ s, - # NB 08.07.23 ( - # NB 08.07.23 (<|<)(tr|li|)(\s+[^>])*> - # NB 08.07.23 ) - # NB 08.07.23 | - # NB 08.07.23 ( - # NB 08.07.23 (<|<)(br|hr)(\s*/\s*)?> - # NB 08.07.23 ) - # NB 08.07.23 | - # NB 08.07.23 ( \\n ) - # NB 08.07.23 ,\n,gix; - $v =~ s, ( <(tr|li|)(\s+[^>])*> @@ -556,15 +544,21 @@ sub html2txt { | ( \\n ) ,\n,gix; + $v =~ s, + (]*>.*?

) + ,$1\n,sgix; # Links $v =~ s, ]+href="([^"]+)"[^>]*>\g1 - ,$1,gix; # when target equal text + ,$1,gix; # when href equal text $v =~ s, ]+href="([^"]+)"[^>]*>([^<]+) ,$2 ( $1 ),gix; + # Make sure that tags match the regexp \w (eg: ) ) # start tag - |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag + |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag + |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag |( ) # cdata end -- 2.47.3