my $v = shift @_;
# New line
- # NB 08.07.23 $v =~ s,
- # NB 08.07.23 (
- # NB 08.07.23 (<|<)(tr|li|)(\s+[^>])*>
- # NB 08.07.23 )
- # NB 08.07.23 |
- # NB 08.07.23 (
- # NB 08.07.23 (<|<)(br|hr)(\s*/\s*)?>
- # NB 08.07.23 )
- # NB 08.07.23 |
- # NB 08.07.23 ( \\n )
- # NB 08.07.23 ,\n,gix;
-
$v =~ s,
(
<(tr|li|)(\s+[^>])*>
|
( \\n )
,\n,gix;
+ $v =~ s,
+ (<p[^>]*>.*?</p>)
+ ,$1\n,sgix;
# Links
$v =~ s,
<a[^>]+href="([^"]+)"[^>]*>\g1</a>
- ,$1,gix; # when target equal text
+ ,$1,gix; # when href equal text
$v =~ s,
<a[^>]+href="([^"]+)"[^>]*>([^<]+)</a>
,$2 ( $1 ),gix;
+ # Make sure that tags match the regexp \w (eg: <o:shapedefaults v:ext="...)
+ my $tag = '[\w:]';
+
# Delete
$v =~ s,(^_DUMMY_$) # never happend only for easy regexp change order
|( <noscript[^>]*>.*?</noscript> ) # noscript
|( <script[^>]*>.*?</script> ) # script
- |( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
- |( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag
+ |( (<)/?${tag}+(\s+${tag}+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
+ |( (<)?${tag}+(\s+${tag}+=\S+)*/?(>) ) # end tag
|( <!\[CDATA\[ ) # cdata begin
|( \]\]> ) # cdata end