,\n,gix;
# Delete
- $v =~ s,
- # NB 04.06.23: NO we want entities !!!
- # NB 04.06.23 ( (<|<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>|>) )
- # NB 04.06.23 |( (<|<)?\w+(\s+\w+=\S+)*/?(>|>) )
+ $v =~ s,(^_DUMMY_$) # never happend only for easy regexp change order
+
+ |( <!--\[if [^\]]+\]>.*?<!\[endif\]--> ) # condition
+
+ |( <style[^>]*>.*?</style> ) # style
+ |( <noscript[^>]*>.*?</noscript> ) # noscript
+ |( <script[^>]*>.*?</script> ) # script
- ( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
+ |( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
|( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag
- # NB 04.06.23: NO we want entities !!!
- # NB 04.06.23 |( (<|<)!\[CDATA\[ ) # cdata begin
- # NB 04.06.23 | ( \]\](>|>) ) # cdata end
|( <!\[CDATA\[ ) # cdata begin
|( \]\]> ) # cdata end
- # NB 04.06.23: NO we want entities !!!
- # NB 04.06.23 |( (<|<)/\w+\s*$ ) # incomplet html
- # NB 04.06.23 |( ^(<|<)\w+ .* ) # incomplet html
|( </\w+\s*$ ) # incomplet html
|( ^<\w+ .* ) # incomplet html
|(<\!DOCTYPE[^>]+>)
|([\w_-]+="[^"]+") # attrs
|( <\w[^>]+> ) # tags
- ,,gix;
+ ,,sgix;
+
+ # NB 03.07.23: Multiple empty nosy lines
+ $v =~ s/\n(\s*\n)+/\n/sg;
+
+ # Mutiple tabs
+ #$v =~ s/\t+/\t/g;
$v =~ s/[\f ]+/ /g;
&str_trim($v);