From 2d56601b7b2a0fd452c98f1679504e3d71a4cc79 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Thu, 1 Jun 2023 00:50:14 +0200 Subject: [PATCH] use www.w3schools.com --- lib/postgres/html_attribute.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/postgres/html_attribute.sh b/lib/postgres/html_attribute.sh index fadb700e..6bcaa627 100755 --- a/lib/postgres/html_attribute.sh +++ b/lib/postgres/html_attribute.sh @@ -1,7 +1,19 @@ #!/bin/bash -l -http_get https://html.com/attributes/ \ + +# NB 01.06.23: TODEL +false && http_get https://html.com/attributes/ \ | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+),
\2
," \ | xmllint --html --xpath //table - 2>/dev/null \ | sed -E "s,]+href=\"((https://html.com)?/attributes/([^\"\/]+)\/?)[^<]+,\3https://html.com/\1,g" \ | xml2csv - \ | awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \ + + +true && http_get https://www.w3schools.com/tags/ref_attributes.asp | \ +xmllint --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \ +grep -v 'href="tag_' | \ +perl -pe 's|]*>([^<]+)|https://www.w3schools.com/tags/$1$2|' | \ +html2csv | \ +sed -E "s,(\\\\),\1\1," | \ +awk 'BEGIN{FS="\t"} /www\.w3schools\.com/ {print $2"\t"$3"\t"$1}' \ + -- 2.47.3