From: Nicolas Boisselier Date: Sat, 5 Dec 2020 09:16:05 +0000 (+0000) Subject: lib/postgres/html_attribute.sh X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=b3b1ea0cf8eca2fea3798a89c66df1f35e48cdf8;p=nb.git lib/postgres/html_attribute.sh --- diff --git a/lib/postgres/html_attribute.sh b/lib/postgres/html_attribute.sh index 7ba5b846..564442b8 100755 --- a/lib/postgres/html_attribute.sh +++ b/lib/postgres/html_attribute.sh @@ -3,3 +3,7 @@ http_get https://html.com/attributes/ \ | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+),
\2
," \ | xmllint --html --xpath //table - 2>/dev/null \ | xml2csv - \ +| sed -E \ + -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \ + -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \ +| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \