From 0018c7a459a03e36807e3fa78a3d85b20a556bd2 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Wed, 6 Dec 2017 02:38:07 +0000 Subject: [PATCH] lib/xslt/csv.xslt --- etc/profile.d/net.sh | 4 ++++ lib/xslt/csv.xslt | 28 ++++++++++++++++++++++++++++ share/db/adservers | 2 ++ share/db/http_status | 13 +++++++++++++ share/db/port | 9 +++++++++ share/db/update.sh | 31 ++----------------------------- share/db/useragent | 3 +++ 7 files changed, 61 insertions(+), 29 deletions(-) create mode 100644 lib/xslt/csv.xslt create mode 100755 share/db/adservers create mode 100755 share/db/http_status create mode 100755 share/db/port create mode 100755 share/db/useragent diff --git a/etc/profile.d/net.sh b/etc/profile.d/net.sh index 5df372d9..ba6911f8 100644 --- a/etc/profile.d/net.sh +++ b/etc/profile.d/net.sh @@ -30,3 +30,7 @@ cdr2mask() { html2txt() { lynx -pseudo_inlines -nomargins -display_charset=utf8 -nocolor -nolist -width=999 -dump $@ } + +xml2csv() { + xsltproc lib/xslt/csv.xslt $@ +} diff --git a/lib/xslt/csv.xslt b/lib/xslt/csv.xslt new file mode 100644 index 00000000..5da10f5a --- /dev/null +++ b/lib/xslt/csv.xslt @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/share/db/adservers b/share/db/adservers new file mode 100755 index 00000000..42d32082 --- /dev/null +++ b/share/db/adservers @@ -0,0 +1,2 @@ +#!/usr/bin/env sh +curl -s http://pgl.yoyo.org/adservers/serverlist.php?hostformat=nohtml diff --git a/share/db/http_status b/share/db/http_status new file mode 100755 index 00000000..2cae0465 --- /dev/null +++ b/share/db/http_status @@ -0,0 +1,13 @@ +#!/usr/bin/env perl +use strict; +use warnings; +@ARGV = ('w3m -dump -cols 10000 https://en.wikipedia.org/wiki/List_of_HTTP_status_codes |'); + +while (<>) { + next unless /^\d\d\d / .. /^ /; + s/.\[\d+\]//g; + s/^ *//; + s/\t/ /g; + s/^(\d+) ([\w _-]+).*?\n$/$1\t$2\t/; + print; +} diff --git a/share/db/port b/share/db/port new file mode 100755 index 00000000..473aab60 --- /dev/null +++ b/share/db/port @@ -0,0 +1,9 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +@ARGV = ('/etc/services'); +while (<>) { + m,^(\w+)\s+(\d+)/(\w+)\s+#\s*(.*?)$, or next; + print join(qq|\t|,map(/^\s*(.*?)\s*$/,$2,$3,$1)).qq|\n|; +} diff --git a/share/db/update.sh b/share/db/update.sh index a169d5e7..104f843f 100755 --- a/share/db/update.sh +++ b/share/db/update.sh @@ -35,37 +35,10 @@ EOF } data_src() { - #pwd -# NB 25.10.17 perl -ne 'm,^(\w+)\s+\d+/\w+\s+#\s*(.*?)$, and $_=join(qq|\t|,map(/^\s*(.*?)\s*$/,$1,$2)).qq|\n| and !$h{$_}++ and print' \ -# NB 25.10.17 /etc/services \ -# NB 25.10.17 > "$TMP/service.csv" -# NB 25.10.17 csv2table service "$TMP/service.csv" - - perl -ne 'm,^(\w+)\s+(\d+)/(\w+)\s+#\s*(.*?)$, and print join(qq|\t|,map(/^\s*(.*?)\s*$/,$2,$3,$1)).qq|\n|' \ - /etc/services \ - > "$TMP/port.csv" + $DIR/port > "$TMP/port.csv" csv2table port "$TMP/port.csv" -# NB 25.10.17 html-table2csv https://www.ovh.co.uk/dedicated_servers/|perl -ne \ -# NB 25.10.17 '/^\w/ or next; @F=split("\t",$_);splice(@F,6,1); $F[6] =~s/ ex.*$//; $F[6] =~s/[^\d\.]//g; print $_ if $_=join("\t",map{$_ eq "-" ? "" : $_} @F[0..6])."\n" and !$dbl{$_}++' \ -# NB 25.10.17 > "$TMP/ovh_dedicated.csv" -# NB 25.10.17 csv2table ovh_dedicated "$TMP/ovh_dedicated.csv" - - return 0 - w3m -dump -cols 999999 https://en.wikipedia.org/wiki/List_of_HTTP_status_codes| \ - perl -ne 'next unless /^\d\d\d / .. /^ /;s/.\[\d+\]//g; s/^ *//; s/^(\d+) ([\w _-]+).*?\n$/$1\t$2\t/;print' \ - > "$TMP/http_status.csv" - csv2table http_status "$TMP/http_status.csv" - - curl -s http://pgl.yoyo.org/adservers/serverlist.php?hostformat=nohtml \ - > "$TMP/adservers.csv" - csv2table adservers "$TMP/adservers.csv" - - curl -s 'http://www.user-agents.org/' | perl -e '$_=join(qq||,<>); print map {s/^\s*(.*?)(?:.nbsp.)?\s*$/$1/;qq|$_\n|} m,([^<]+),g' - > "$TMP/useragent.csv" - csv2table useragent "$TMP/useragent.csv" - } ( @@ -92,7 +65,7 @@ rm -rf "$TMP" ( -for f in $(grep -ErlI '^.{1,3}