]> git.nbdom.net Git - nb.git/commitdiff
html2txt links
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 8 Jul 2023 22:12:34 +0000 (00:12 +0200)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 8 Jul 2023 22:12:34 +0000 (00:12 +0200)
lib/perl/NB/Functions.pm

index 7146358b118fb68ceb93db0982558276b58cc6b5..8e7c6b649b0688fcb04b7c97f7eac36aa73e4ae1 100644 (file)
@@ -557,10 +557,16 @@ sub html2txt {
                ( \\n )
        ,\n,gix;
 
+  # Links
+       $v =~ s,
+       <a[^>]+href="([^"]+)"[^>]*>([^<]+)</a>
+  ,$2 ( $1 ),gix;
+
   # Delete
        $v =~ s,(^_DUMMY_$) # never happend only for easy regexp change order
 
                |( <!--\[if [^\]]+\]>.*?<!\[endif\]--> ) # condition
+               |( <!--.*?--> ) # comment
 
                |( <style[^>]*>.*?</style> ) # style
                |( <noscript[^>]*>.*?</noscript> ) # noscript
@@ -581,6 +587,7 @@ sub html2txt {
        ,,sgix;
 
        # NB 03.07.23: Multiple empty nosy lines 
+       #$v =~ s/([ \t\r]*\n){2}([ \t\r]*\n)*/$1/sg;
        $v =~ s/\n(\s*\n)+/\n/sg;
 
        # Mutiple tabs