]> git.nbdom.net Git - nb.git/commitdiff
lib/postgres/html_attribute.sh
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 10:01:06 +0000 (11:01 +0100)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 10:01:06 +0000 (11:01 +0100)
lib/postgres/html_attribute.sh

index aa17677bf8909c48eaa7a0686cd8d4b296748663..8b19c49e0e91b53da64b6ee05c5a387229a0d500 100755 (executable)
@@ -2,10 +2,26 @@
 http_get https://html.com/attributes/ \
 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
 | xmllint --html --xpath //table - 2>/dev/null \
-| sed -E \
-  -e "s,<a[^>]+href=.https://html.com/attributes/([^/]+)[^<]+</a></td>[^<]*<td>,\1,g" \
+| sed -E "s,<a[^>]+href=.(https://html.com/attributes/[^\"]+)[^<]+</a>,\1,g" \
 | xml2csv - \
-| sed -E \
-  -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
-  -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
-| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+| awk 'BEGIN{FS="\t"}{tag=$1; sub(/^[^-]+-/,"",tag); sub(/\/$/,"",tag); print tag"\t"$2"\t"$1}' \
+
+# NB 05.12.20 | sed -E "s,^([\t]+/([^/]+)-([^-/]+)[^\t]*),\1\t\3," \
+
+# NB 05.12.20 | xml2csv - \
+# NB 05.12.20 | sed -E \
+# NB 05.12.20   -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
+# NB 05.12.20   -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
+# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+
+
+
+#!/bin/bash -l
+# NB 05.12.20 http_get https://html.com/attributes/ \
+# NB 05.12.20 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
+# NB 05.12.20 | xmllint --html --xpath //table - 2>/dev/null \
+# NB 05.12.20 | xml2csv - \
+# NB 05.12.20 | sed -E \
+# NB 05.12.20   -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
+# NB 05.12.20   -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
+# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \