From: Nicolas Boisselier Date: Sat, 2 Dec 2023 15:02:51 +0000 (+0100) Subject: html2txt fix start and end tags exp X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=ebef71d20e79e5a78691a3f363fef174425bf3c5;p=nb.git html2txt fix start and end tags exp --- diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 5289c20f..0ace1c6a 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -553,9 +553,19 @@ sub html2txt { | ( \\n ) ,\n,gix; - $v =~ s, - (]*>.*?

) - ,$1\n,sgix; + + # Text tags + # NB 02.12.23 for my $tag (qw( + # NB 02.12.23 div + # NB 02.12.23 p + # NB 02.12.23 span + # NB 02.12.23 pre + # NB 02.12.23 code + # NB 02.12.23 )) + # NB 02.12.23 { + # NB 02.12.23 my $exp = "<".$tag."[^>]*>(.*?)"; + # NB 02.12.23 $v =~ s,$exp,$1\n,sgi; + # NB 02.12.23 } # Links $v =~ s, @@ -569,6 +579,8 @@ sub html2txt { my $tag = '[\w:]'; # Delete + # NB 02.12.23 |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag + # NB 02.12.23 |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag $v =~ s,(^_DUMMY_$) # never happend only for easy regexp change order |( ) # condition @@ -578,8 +590,8 @@ sub html2txt { |( ]*>.*? ) # noscript |( ]*>.*? ) # script - |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag - |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag + |( (<)${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*(>) ) # start tag + |( ) # end tag |( ) # cdata end