]> git.nbdom.net Git - nb.git/commitdiff
lib/postgres/html_attribute.sh
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 09:16:05 +0000 (09:16 +0000)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 09:16:05 +0000 (09:16 +0000)
lib/postgres/html_attribute.sh

index 7ba5b8464c0c1f0b001fa14a57609d727060e7c0..564442b8045d63da4e4b5c10d462fe1d3650a046 100755 (executable)
@@ -3,3 +3,7 @@ http_get https://html.com/attributes/ \
 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
 | xmllint --html --xpath //table - 2>/dev/null \
 | xml2csv - \
+| sed -E \
+  -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
+  -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
+| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \