From: Nicolas Boisselier Date: Sat, 5 Dec 2020 10:11:45 +0000 (+0000) Subject: html_attribute.sh X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=65637eb21bf848b58870229ba5def83e574b06eb;p=nb.git html_attribute.sh --- diff --git a/lib/postgres/html_attribute.sh b/lib/postgres/html_attribute.sh index 8b19c49e..0944487e 100755 --- a/lib/postgres/html_attribute.sh +++ b/lib/postgres/html_attribute.sh @@ -2,26 +2,6 @@ http_get https://html.com/attributes/ \ | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+),
\2
," \ | xmllint --html --xpath //table - 2>/dev/null \ -| sed -E "s,]+href=.(https://html.com/attributes/[^\"]+)[^<]+,\1,g" \ +| sed -E "s,]+href=.(https://html.com/attributes/([^/]+)[^\"]*)[^<]+,\2\1,g" \ | xml2csv - \ -| awk 'BEGIN{FS="\t"}{tag=$1; sub(/^[^-]+-/,"",tag); sub(/\/$/,"",tag); print tag"\t"$2"\t"$1}' \ - -# NB 05.12.20 | sed -E "s,^([\t]+/([^/]+)-([^-/]+)[^\t]*),\1\t\3," \ - -# NB 05.12.20 | xml2csv - \ -# NB 05.12.20 | sed -E \ -# NB 05.12.20 -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \ -# NB 05.12.20 -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \ -# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \ - - - -#!/bin/bash -l -# NB 05.12.20 http_get https://html.com/attributes/ \ -# NB 05.12.20 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+),
\2
," \ -# NB 05.12.20 | xmllint --html --xpath //table - 2>/dev/null \ -# NB 05.12.20 | xml2csv - \ -# NB 05.12.20 | sed -E \ -# NB 05.12.20 -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \ -# NB 05.12.20 -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \ -# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \ +| awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \