]> git.nbdom.net Git - nb.git/commitdiff
html_attribute.sh
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 09:39:45 +0000 (09:39 +0000)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 09:39:45 +0000 (09:39 +0000)
lib/postgres/html_attribute.sh

index 564442b8045d63da4e4b5c10d462fe1d3650a046..559fccf5000bc3e419c6f5d3b9d10b4c3751e196 100755 (executable)
@@ -2,8 +2,5 @@
 http_get https://html.com/attributes/ \
 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
 | xmllint --html --xpath //table - 2>/dev/null \
-| xml2csv - \
 | sed -E \
-  -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
-  -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
-| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+  -e "s,<a[^>]+href=.https://html.com/attributes/([^/]+)[^<]+</a></td>[^<]*<td>,\1,g" \