From: Nicolas Boisselier Date: Wed, 25 Jan 2017 15:17:38 +0000 (+0000) Subject: bin/html-table2csv X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=20d3372626efc9be45a62a515c75782c1c54e1e0;p=nb.git bin/html-table2csv --- diff --git a/bin/html-table2csv b/bin/html-table2csv new file mode 100755 index 00000000..470e9381 --- /dev/null +++ b/bin/html-table2csv @@ -0,0 +1,211 @@ +#!/usr/bin/env perl +use strict; +use warnings; +#use LWP::Simple qw/get/; +use NB::Functions qw/html2txt str_trim/; +################################################################################# +# +# VERSION +# +################################################################################# +my $VERSION = '0.0.1'; +# NB 25.01.17 +# - create script: html-table2csv + +################################################################################# +# +# GLOBALS +# +################################################################################# +my ($NAME) = $0 =~ m,([^/]+)$,; + +################################################################################# +# +# ARGS +# +################################################################################# +my $VERBOSE = $main::VERBOSE = 1; +my $DEBUG = $main::DEBUG = 0; + +my %Opt = ( + 'table' => undef, +); +get_options(\%Opt); +help() unless @ARGV; +$main::_DATA_ = undef; + +################################################################################# +# +# BEGIN +# +################################################################################# +@ARGV = map {m,^\w+://, ? "curl -s '$_'" : $_} @ARGV if @ARGV; +my $html = join('',<>); +$html =~ s/\s+/ /g; + +my $table_num = 0; + +for my $table ($html =~ m,]*>(.*?)<\s*/\s*table\s*>,gi) { + $table_num++; + next if defined $Opt{'table'} and $Opt{table} and $Opt{table} != $table_num; + + for my $tr ($table =~ m,]*>(.*?)<\s*/\s*tr\s*>,gi) { + my @col; + + for my $td ($tr =~ m,]*>(.*?)<\s*/\s*t[dh]\s*>,gi) { + $td = html2txt($td); + $td =~ s/\s+/ /g; + $td = str_trim($td); + push(@col,$td); + } + + print join("\t",@col)."\n"; + + } + +} + +################################################################################# +# +# END +# +################################################################################# +exit 0; + +################################################################################# +# +# Functions +# +################################################################################# +sub help { +#------------------------------------------------------------------------------ +# Print help and exit +#------------------------------------------------------------------------------ + + require 'Pod/Usage.pm' unless $INC{'Pod/Usage.pm'}; + require 'Pod/Perldoc.pm' unless $INC{'Pod/Perldoc.pm'}; + + # Substitutions + sub pod_env { + my $v = ''; + eval '$v = ref(\\'.$_[0].') eq "ARRAY" ? join(" ",'.$_[0].') : '.$_[0].'; return defined $v ? $v : qq|UNDEF|;'; + return $v; + } + + $main::_DATA_ =~ s/([@\$][A-Z_a-z\{\}]+)/pod_env($1)/eg; + + # Create tmp + my $in_file = (-e '/dev/shm' ? '/dev/shm' : '/tmp')."/$NAME.$$"; + my $in; + open($in,">$in_file") or die "$NAME: Can't write into $in_file: $!"; + print $in $main::_DATA_; + close $in; + + # Output + open(STDOUT,"|perl -pe 's/\.$$//g'".(($ENV{PAGER}||'') eq 'less' ? "|less -FRi" : "")); + my $opts = { + -input => $in_file, + -ouput => \*STDOUT, + -exitval => 'noexit', + -sections => [qw(SYNOPSIS DESCRIPTION OPTIONS)], + -verbose => ($Opt{'help'} ? 99 : 3), + }; + + Pod::Usage::pod2usage($opts); + close STDOUT; + unlink $in_file if $in_file and -e $in_file; + + exit 0; +} + +#------------------------------------------------------------------------------ +# Print version and exit +#------------------------------------------------------------------------------ +sub version { print "$NAME: version [$VERSION]\n"; exit 0; } + +#------------------------------------------------------------------------------ +# Get options from pod +#------------------------------------------------------------------------------ +sub get_options { + + use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); + + my @Opt; + + sub pod_opt { + local $_; + my $o = shift; + $o =~ s/(=.|[\+\-\!]$)//; + $o = join(", ",map{"-$_"} split(/[\|,:;]/,$o)); + return "$o"; + } + + while () { + s/option\[([^\]]+)\]/push(@Opt,$1) and pod_opt($1)/eg; + $main::_DATA_ .= $_; + } + + GetOptions($_[0],@Opt) || exit -1; + + help() if $_[0]{'help'} or $_[0]{'man'}; + version() if $_[0]{'version'}; + + $main::VERBOSE = $VERBOSE = $_[0]{'verbose'} if defined $_[0]{'verbose'}; + $main::DEBUG = $DEBUG = $_[0]{'debug'} if defined $_[0]{'debug'}; + +} + +__DATA__ + +=head1 NAME + +$NAME - Script to extract html table into csv + +=head1 SYNOPSIS + +Quick usage: + +=over + +=item $NAME --verbose + +=item $NAME --help + +=back + +=head1 OPTIONS + + -option[table|t=i] Only dump table number + -option[verbose|v+] Verbose mode: increase the verbosity level. + -option[debug+] Debug mode: increase the verbosity level. + -option[version|V] Print version (default: $VERSION) + -option[help|h|?] Print a brief help message and exits. + -option[man] Print the manual page and exits. + +=cut + +=head1 EXAMPLES + +... + +=head1 REQUIRES + +Getopt::Std, Pod::Usage + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2017 Nicolas Boisselier + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +See . + +=head1 SEE ALSO + +perl(1), http://perldoc.perl.org/perlpodstyle.html + +=head1 AUTHOR + +Nicolas Boisselier + +=cut diff --git a/bin/vimplate b/bin/vimplate new file mode 100755 index 00000000..563972d8 --- /dev/null +++ b/bin/vimplate @@ -0,0 +1,368 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; + +=head1 NAME + +vimplate - the vim template system. + +=cut + +use constant VERSION => '0.2.3'; + +use POSIX qw(strftime cuserid setlocale LC_ALL); +use English qw(-no_match_vars); +use Getopt::Long qw(:config no_ignore_case ); +use Pod::Usage; + +my $vimplaterc=''; + +=head1 DEPENDS on PACKAGES + +B http://search.cpan.org/~abw/Template-Toolkit-2.14 + +please install Template-Toolkit on your system. + +=cut + +BEGIN { + eval { require Template; }; + if ($EVAL_ERROR=~/Can't locate Template.pm/) { + print STDERR "$EVAL_ERROR"; + print STDERR '-' x 60, "\n"; + print STDERR "please install Template-Toolkit!\n"; + print STDERR "example with $^X -MCPAN -e\"install Template\"\n"; + print STDERR '-' x 60, "\n"; + exit 1; + } +} + +=head1 DEPENDS on SETTINGS + +B + +on unix/bsd/linux the variable home is set. +On Windows please set the variable home to the value +where _vimplaterc should be locatet. + +=cut + +unless ( $ENV{'HOME'} ) { + print STDERR "Variable HOME isn't set!\n"; + print STDERR "Please read the documentation.\n"; + exit 1; +} +else { + if ( $^O =~ /Win/ ) { + $vimplaterc = $ENV{'HOME'} . '/_vimplaterc'; + unless ( $ENV{'USER'} ) { + $ENV{'USER'}=$ENV{'USERNAME'}; + } + else { + print STDERR "Variable USER isn't set!\n"; + print STDERR "Please set this variable.\n"; + } + } + else { + $vimplaterc = $ENV{'HOME'} . '/.vimplaterc'; + } +} + +=head1 SYNOPSIS + +=over 4 + +=item vimplate <-template=