From b603f85bb6a373cb171ae4b581ad5642a1ef29f2 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Mon, 21 May 2018 15:53:41 +0100 Subject: [PATCH] bin/html2csv --- bin/{html-table2csv => html2csv} | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) rename bin/{html-table2csv => html2csv} (88%) diff --git a/bin/html-table2csv b/bin/html2csv similarity index 88% rename from bin/html-table2csv rename to bin/html2csv index 29fd4900..0e280c5d 100755 --- a/bin/html-table2csv +++ b/bin/html2csv @@ -28,8 +28,11 @@ my $VERBOSE = $main::VERBOSE = 1; my $DEBUG = $main::DEBUG = 0; my %Opt = ( - 'nume' => undef, + 'num' => undef, 'tag' => 'table', + 'exp1'=> '', + 'exp2'=> '', + 'exp3'=> '', 'sep' => "\t", 'html' => 0, ); @@ -59,19 +62,23 @@ my %TAGS = ( 'd[td]', ], ); +exists $TAGS{$Opt{tag}} or die "$NAME: Unknown tag '$Opt{tag}'\n"; my ($T1,$T2,$T3) = @{ $TAGS{$Opt{tag}} }; my $table_num = 0; -for my $table ($html =~ m,<${T1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) { +my $exp1 = $Opt{exp1}; $exp1 and $exp1 = "[^>]*$exp1"; +my $exp2 = $Opt{exp2}; $exp2 and $exp2 = "[^>]*$exp2"; +my $exp3 = $Opt{exp3}; $exp3 and $exp3 = "[^>]*$exp3"; +for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) { $table_num++; #warn $table_num; next if defined $Opt{'num'} and $Opt{num} and $Opt{num} != $table_num; $table = "<>$table" unless $T2; - for my $tr ($table =~ m,<${T2}[^>]*>(.*?)<\s*/\s*${T2}\s*>,gi) { + for my $tr ($table =~ m,<${T2}${exp2}[^>]*>(.*?)<\s*/\s*${T2}\s*>,gi) { my @col; - for my $td ($tr =~ m,<${T3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) { + for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) { $td = html2txt($td) unless $Opt{html}; $_ = chr(194).chr(160); $td =~ s/$_/ /g; $td =~ s/\s+/ /g; @@ -196,9 +203,12 @@ Quick usage: =head1 OPTIONS -option[tag|T=s] Default: table - -option[sep|s=s] Default: tab + -option[sep|s=s] Default: tab (\t) -option[html!] Escape html (default: yes) -option[num|n=i] Only dump table number + -option[exp1|E1=s] Regexp filter on tag 1 + -option[exp2|E2=s] Regexp filter on tag 2 + -option[exp3|E3=s] Regexp filter on tag 3 -option[verbose|v+] Verbose mode: increase the verbosity level. -option[debug+] Debug mode: increase the verbosity level. -option[version|V] Print version (default: $VERSION) -- 2.47.3