my %Opt = (
'num' => undef,
+ 'print_num' => 0,
'tag' => 'table',
'exp1'=> '',
'exp2'=> '',
@ARGV = map {m,^\w+://, ? "curl -s '$_' |" : $_} @ARGV if @ARGV;
my $html = join('',<>);
$html =~ s/[\r\n]+//g;
+$_ = $Opt{sep}; $html =~ s/$_/ /g;
$html =~ s/\s+/ /g;
my %TAGS = (
'dl',
'',
'd[td]',
+ 2,
],
);
exists $TAGS{$Opt{tag}} or die "$NAME: Unknown tag '$Opt{tag}'\n";
-my ($T1,$T2,$T3) = @{ $TAGS{$Opt{tag}} };
+my ($T1,$T2,$T3,$COUNT) = @{ $TAGS{$Opt{tag}} };
+$COUNT ||= 0;
my $table_num = 0;
my $exp1 = $Opt{exp1}; $exp1 and $exp1 = "[^>]*$exp1";
my $exp2 = $Opt{exp2}; $exp2 and $exp2 = "[^>]*$exp2";
my $exp3 = $Opt{exp3}; $exp3 and $exp3 = "[^>]*$exp3";
+
for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) {
$table_num++;
#warn $table_num;
next if defined $Opt{'num'} and $Opt{num} and $Opt{num} != $table_num;
$table = "<>$table</>" unless $T2;
- for my $tr ($table =~ m,<${T2}${exp2}[^>]*>(.*?)<\s*/\s*${T2}\s*>,gi) {
+ for my $tr ($table =~ m,<(${T2})${exp2}[^>]*>(.*?)<\s*/\s*${T2}\s*>,gi) {
my @col;
+ print "$table_num " if $Opt{print_num};
+ $tr = "<>$tr</>" unless $T3;
+ my $count = 0;
for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) {
$td = html2txt($td) unless $Opt{html};
$_ = chr(194).chr(160); $td =~ s/$_/ /g;
$td =~ s/\s+/ /g;
$td = str_trim($td);
+ $count++;
push(@col,$td);
+ if (1 and $COUNT and $count > $COUNT) {
+ print join($Opt{sep},@col)."\n";
+ @col = ();
+ }
}
- print join($Opt{sep},@col)."\n";
+ print join($Opt{sep},@col)."\n" if @col;
+ #print "\n" unless $T3;
}
+ #print "\n" unless $T2;
}
-option[sep|s=s] Default: tab (\t)
-option[html!] Escape html (default: yes)
-option[num|n=i] Only dump table number
+ -option[print_num|pn!] Print num tag preffix
-option[exp1|E1=s] Regexp filter on tag 1
-option[exp2|E2=s] Regexp filter on tag 2
-option[exp3|E3=s] Regexp filter on tag 3