my $DEBUG = $main::DEBUG = 0;
my %Opt = (
- 'table' => undef,
+ 'nume' => undef,
+ 'tag' => 'table',
+ 'sep' => "\t",
+ 'html' => 0,
);
get_options(\%Opt);
help() unless @ARGV;
#################################################################################
@ARGV = map {m,^\w+://, ? "curl -s '$_' |" : $_} @ARGV if @ARGV;
my $html = join('',<>);
+$html =~ s/[\r\n]+//g;
$html =~ s/\s+/ /g;
+my %TAGS = (
+ 'table' => [
+ 'table',
+ 'tr',
+ 't[dh]',
+ ],
+ 'dl' => [
+ 'dl',
+ '',
+ 'd[td]',
+ ],
+);
+my ($T1,$T2,$T3) = @{ $TAGS{$Opt{tag}} };
my $table_num = 0;
-for my $table ($html =~ m,<table[^>]*>(.*?)<\s*/\s*table\s*>,gi) {
+for my $table ($html =~ m,<${T1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) {
$table_num++;
- next if defined $Opt{'table'} and $Opt{table} and $Opt{table} != $table_num;
+ #warn $table_num;
+ next if defined $Opt{'num'} and $Opt{num} and $Opt{num} != $table_num;
- for my $tr ($table =~ m,<tr[^>]*>(.*?)<\s*/\s*tr\s*>,gi) {
+ $table = "<>$table</>" unless $T2;
+ for my $tr ($table =~ m,<${T2}[^>]*>(.*?)<\s*/\s*${T2}\s*>,gi) {
my @col;
- for my $td ($tr =~ m,<t[dh][^>]*>(.*?)<\s*/\s*t[dh]\s*>,gi) {
- $td = html2txt($td);
+ for my $td ($tr =~ m,<${T3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) {
+ $td = html2txt($td) unless $Opt{html};
$_ = chr(194).chr(160); $td =~ s/$_/ /g;
$td =~ s/\s+/ /g;
$td = str_trim($td);
push(@col,$td);
}
- print join("\t",@col)."\n";
+ print join($Opt{sep},@col)."\n";
}
=head1 OPTIONS
- -option[table|t=i] Only dump table number
+ -option[tag|T=s] Default: table
+ -option[sep|s=s] Default: tab
+ -option[html!] Escape html (default: yes)
+ -option[num|n=i] Only dump table number
-option[verbose|v+] Verbose mode: increase the verbosity level.
-option[debug+] Debug mode: increase the verbosity level.
-option[version|V] Print version (default: $VERSION)
=cut
-=head1 EXAMPLES
-
-...
-
-=head1 REQUIRES
-
-Getopt::Std, Pod::Usage
-
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2017 Nicolas Boisselier