From 4ad4e08f014708e5d93f753d6ff38b94e1cf57f3 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Sat, 5 Dec 2020 09:39:45 +0000 Subject: [PATCH] html_attribute.sh --- lib/postgres/html_attribute.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/postgres/html_attribute.sh b/lib/postgres/html_attribute.sh index 564442b8..559fccf5 100755 --- a/lib/postgres/html_attribute.sh +++ b/lib/postgres/html_attribute.sh @@ -2,8 +2,5 @@ http_get https://html.com/attributes/ \ | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+),
\2
," \ | xmllint --html --xpath //table - 2>/dev/null \ -| xml2csv - \ | sed -E \ - -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \ - -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \ -| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \ + -e "s,]+href=.https://html.com/attributes/([^/]+)[^<]+[^<]*,\1,g" \ -- 2.47.3