From: Nicolas Boisselier Date: Sun, 4 Jun 2023 23:44:08 +0000 (+0200) Subject: lib/postgres/w3schools.sh X-Git-Url: https://git.nbdom.net/?a=commitdiff_plain;h=995d429aa51114461c7187af79a2d8a5768a1236;p=nb.git lib/postgres/w3schools.sh --- diff --git a/bin/html2csv b/bin/html2csv index c54d16eb..a49e3d42 100755 --- a/bin/html2csv +++ b/bin/html2csv @@ -91,11 +91,13 @@ for my $table ($html =~ m,<${T1}${exp1}[^>]*>(.*?)<\s*/\s*${T1}\s*>,gi) #@_ = $tr =~ m,(]*>)\s*]*>(.*?),; #die @_ if @_; - $tr =~ s,]*>\s*]*>(.*?),$1$2,g - if $Opt{href}; + $tr =~ s,]*>\s*]*href="([^"]+)"[^>]*>(.*?),$1$2,g + if $Opt{href2td}; for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi) { + $td =~ s,]*href="([^"]+)"[^>]*>(.*?),[href=$1]$2,g + if $Opt{href2txt}; $td = html2txt($td) unless $Opt{html}; $_ = chr(194).chr(160); $td =~ s/$_/ /g; $td =~ s/\s+/ /g; @@ -229,7 +231,8 @@ Quick usage: =head1 OPTIONS -option[tag|T=s] table|dl Default: table - -option[href!] Treat href as a column + -option[href2td!] Treat first column href as a new column + -option[href2txt!] -option[sep|s=s] Default: tab (\t) -option[html!] Escape html (default: yes) -option[num|n=i] Only dump table number diff --git a/lib/postgres/css_entity.sh b/lib/postgres/css_entity.sh index e29af082..b152fb66 100755 --- a/lib/postgres/css_entity.sh +++ b/lib/postgres/css_entity.sh @@ -1,9 +1,3 @@ -#!/bin/bash -l -#(chr,entity,name) -http_get https://www.w3schools.com/cssref/css_entities.php | \ -xmllint --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \ -xml2csv - | \ -tail -n+2 | \ -sed -E "s,(\\\\),\1\1," | \ -awk 'BEGIN{FS="\t"}{print $2"\t"$1"\t"$3}' \ - +#!/bin/bash +"$(dirname "$0")"/w3schools.sh /cssref/css_entities.php \ + | awk -F$'\t' '{print $2"\t"$1"\t"$3}' diff --git a/lib/postgres/css_function.sh b/lib/postgres/css_function.sh index 32387b4d..64ee550e 100755 --- a/lib/postgres/css_function.sh +++ b/lib/postgres/css_function.sh @@ -1,8 +1,2 @@ -#!/bin/bash -l -http_get https://www.w3schools.com/cssref/css_functions.php | \ -xmllint --html --xpath "/html/body//div[@id=\"main\"]//table" - 2>/dev/null | \ -sed -E "s, href=\"([^\"]+)\",>https://www.w3schools.com/cssref/\1|/dev/null | \ -perl -pe 's|]*>([^<]+)|https://www.w3schools.com/cssref/$1$2|' | \ -html2csv | \ -sed -E "s,(\\\\),\1\1," | \ -awk 'BEGIN{FS="\t"} /www\.w3schools\.com/ {print $2"\t"$3"\t"$1}' \ - +#!/bin/bash +"$(dirname "$0")"/w3schools.sh /cssref/index.php diff --git a/lib/postgres/css_selector.sh b/lib/postgres/css_selector.sh index 028132aa..06d412df 100755 --- a/lib/postgres/css_selector.sh +++ b/lib/postgres/css_selector.sh @@ -1,8 +1,2 @@ -#!/bin/bash -l -http_get https://www.w3schools.com/cssref/css_selectors.php \ -| xmllint --html --xpath "/html/body//div[@id=\"main\"]//table" - 2>/dev/null \ -| sed -E "s, href=\"([^\"]+)\",>https://www.w3schools.com/cssref/\1|]+>([^<]+),
\2
," \ -| xmllint --html --xpath //table - 2>/dev/null \ -| sed -E "s,]+href=\"((https://html.com)?/attributes/([^\"\/]+)\/?)[^<]+,\3https://html.com/\1,g" \ -| xml2csv - \ -| awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \ - - -true && http_get https://www.w3schools.com/tags/ref_attributes.asp | \ -xmllint --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \ -grep -v 'href="tag_' | \ -perl -pe 's|]*>([^<]+)|https://www.w3schools.com/tags/$1$2|' | \ -html2csv | \ -sed -E "s,(\\\\),\1\1," | \ -awk 'BEGIN{FS="\t"} /www\.w3schools\.com/ {print $2"\t"$3"\t"$1}' \ - +#!/bin/bash +"$(dirname "$0")"/w3schools.sh /tags/ref_attributes.asp 1,3 diff --git a/lib/postgres/html_tag.sh b/lib/postgres/html_tag.sh index 7063c358..50204ef6 100755 --- a/lib/postgres/html_tag.sh +++ b/lib/postgres/html_tag.sh @@ -1,5 +1,2 @@ -#!/bin/bash -l -html2txt https://html.com/tags/ \ -| grep "^<" \ -| sed -E -e "s/ *HTML Tag *//" -e "s,^<([^>]+)>,\1\thttps://www.w3schools.com/TAGS/tag_\1.asp\t," \ -| awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \ +#!/bin/bash +"$(dirname "$0")"/w3schools.sh /TAGS/ | sed -E 's,<([a-z][^>]*)>,\1,' diff --git a/lib/postgres/w3schools.sh b/lib/postgres/w3schools.sh new file mode 100755 index 00000000..6047c205 --- /dev/null +++ b/lib/postgres/w3schools.sh @@ -0,0 +1,22 @@ +#!/bin/bash -l + +path=${1:?Usage: $0 /path/ for url https://www.w3schools.com} +cut="$2" +url="https://www.w3schools.com$path" +preff="$(echo "$url"|sed -E 's,[^/]+\.[^/]+$,,')" +#exec echo $preff + +http_get "$url" | \ +xmllint --format --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \ +grep -Ev '