]> git.nbdom.net Git - nb.git/commitdiff
lib/xslt/csv.xslt
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Wed, 6 Dec 2017 02:38:07 +0000 (02:38 +0000)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Wed, 6 Dec 2017 02:38:07 +0000 (02:38 +0000)
etc/profile.d/net.sh
lib/xslt/csv.xslt [new file with mode: 0644]
share/db/adservers [new file with mode: 0755]
share/db/http_status [new file with mode: 0755]
share/db/port [new file with mode: 0755]
share/db/update.sh
share/db/useragent [new file with mode: 0755]

index 5df372d955b7f6308ed4cf9ed7685fd3ea355406..ba6911f88f883d73a94c1baa5a7805dffbb5df76 100644 (file)
@@ -30,3 +30,7 @@ cdr2mask() {
 html2txt() {
   lynx -pseudo_inlines -nomargins -display_charset=utf8 -nocolor -nolist -width=999 -dump $@
 }
+
+xml2csv() {
+       xsltproc lib/xslt/csv.xslt $@
+}
diff --git a/lib/xslt/csv.xslt b/lib/xslt/csv.xslt
new file mode 100644 (file)
index 0000000..5da10f5
--- /dev/null
@@ -0,0 +1,28 @@
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="text" encoding="utf-8" />
+
+  <xsl:param name="break" select="'&#xA;'" />
+  <xsl:param name="quote" select="''" />
+  <xsl:param name="delim" select="'&#009;'" />
+
+  <xsl:template match="/">
+    <xsl:apply-templates select="/*/*" />
+  </xsl:template>
+
+  <xsl:template match="/*/*">
+    <xsl:apply-templates />
+    <xsl:if test="following-sibling::*">
+      <xsl:value-of select="$break" />
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="*">
+    <!-- remove normalize-space() if you want keep white-space at it is --> 
+    <xsl:value-of select="concat($quote, normalize-space(), $quote)" />
+    <xsl:if test="following-sibling::*">
+      <xsl:value-of select="$delim" />
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="text()" />
+</xsl:stylesheet>
diff --git a/share/db/adservers b/share/db/adservers
new file mode 100755 (executable)
index 0000000..42d3208
--- /dev/null
@@ -0,0 +1,2 @@
+#!/usr/bin/env sh
+curl -s http://pgl.yoyo.org/adservers/serverlist.php?hostformat=nohtml
diff --git a/share/db/http_status b/share/db/http_status
new file mode 100755 (executable)
index 0000000..2cae046
--- /dev/null
@@ -0,0 +1,13 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+@ARGV = ('w3m -dump -cols 10000 https://en.wikipedia.org/wiki/List_of_HTTP_status_codes |');
+
+while (<>) {
+       next unless /^\d\d\d / .. /^ /;
+       s/.\[\d+\]//g;
+       s/^ *//;
+       s/\t/  /g;
+       s/^(\d+) ([\w _-]+).*?\n$/$1\t$2\t/;
+       print;
+}
diff --git a/share/db/port b/share/db/port
new file mode 100755 (executable)
index 0000000..473aab6
--- /dev/null
@@ -0,0 +1,9 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+@ARGV = ('/etc/services');
+while (<>) {
+       m,^(\w+)\s+(\d+)/(\w+)\s+#\s*(.*?)$, or next;
+       print join(qq|\t|,map(/^\s*(.*?)\s*$/,$2,$3,$1)).qq|\n|;
+}
index a169d5e77d536f9b5df3a93c701e66f29077c849..104f843f5146597f450b77562e0d5d4f4ada169b 100755 (executable)
@@ -35,37 +35,10 @@ EOF
 }
 
 data_src() {
-  #pwd
 
-# NB 25.10.17   perl -ne 'm,^(\w+)\s+\d+/\w+\s+#\s*(.*?)$, and $_=join(qq|\t|,map(/^\s*(.*?)\s*$/,$1,$2)).qq|\n| and !$h{$_}++ and print' \
-# NB 25.10.17     /etc/services \
-# NB 25.10.17     > "$TMP/service.csv"
-# NB 25.10.17   csv2table service "$TMP/service.csv"
-
-  perl -ne 'm,^(\w+)\s+(\d+)/(\w+)\s+#\s*(.*?)$, and print join(qq|\t|,map(/^\s*(.*?)\s*$/,$2,$3,$1)).qq|\n|' \
-    /etc/services \
-    > "$TMP/port.csv"
+       $DIR/port > "$TMP/port.csv"
   csv2table port "$TMP/port.csv"
 
-# NB 25.10.17   html-table2csv https://www.ovh.co.uk/dedicated_servers/|perl -ne \
-# NB 25.10.17     '/^\w/ or next; @F=split("\t",$_);splice(@F,6,1); $F[6] =~s/ ex.*$//; $F[6] =~s/[^\d\.]//g; print $_ if $_=join("\t",map{$_ eq "-" ? "" : $_} @F[0..6])."\n" and !$dbl{$_}++' \
-# NB 25.10.17     > "$TMP/ovh_dedicated.csv"
-# NB 25.10.17   csv2table ovh_dedicated "$TMP/ovh_dedicated.csv"
-
-  return 0
-  w3m -dump -cols 999999 https://en.wikipedia.org/wiki/List_of_HTTP_status_codes| \
-    perl -ne 'next unless /^\d\d\d / .. /^ /;s/.\[\d+\]//g; s/^ *//; s/^(\d+) ([\w _-]+).*?\n$/$1\t$2\t/;print' \
-    > "$TMP/http_status.csv"
-  csv2table http_status "$TMP/http_status.csv"
-
-  curl -s http://pgl.yoyo.org/adservers/serverlist.php?hostformat=nohtml \
-    > "$TMP/adservers.csv"
-  csv2table adservers "$TMP/adservers.csv"
-
-  curl -s 'http://www.user-agents.org/' | perl -e '$_=join(qq||,<>); print map {s/^\s*(.*?)(?:.nbsp.)?\s*$/$1/;qq|$_\n|} m,<td class=.right.>([^<]+),g'
-    > "$TMP/useragent.csv"
-  csv2table useragent "$TMP/useragent.csv"
-
 }
 
 (
@@ -92,7 +65,7 @@ rm -rf "$TMP"
 
 (
 
-for f in $(grep -ErlI '^.{1,3}<SHELL_REPLACE' $NB_ROOT); do
+for f in $(shell_replace -find $NB_ROOT/*); do
 
   echo "Update $f" | sed "s; $NB_ROOT/; ;"
   shell_replace -i.shell_replace "$f"
diff --git a/share/db/useragent b/share/db/useragent
new file mode 100755 (executable)
index 0000000..b5806d9
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+[ -n "$NB_ROOT" ] || . "$(dirname "$0")/../../etc/profile" || exit
+xsltproc $NB_ROOT/lib/xslt/csv.xslt http://www.user-agents.org/allagents.xml