#@_ = $tr =~ m,(<td[^>]*>)\s*<a\s+href="([^"]+)"[^>]*>(.*?)</a>,;
#die @_ if @_;
- $tr =~ s,<td[^>]*>\s*<a\s+href="([^"]+)"[^>]*>(.*?)</a>,<td>$1</td><td>$2,g
- if $Opt{href};
+ $tr =~ s,<td[^>]*>\s*<a\s+[^>]*href="([^"]+)"[^>]*>(.*?)</a>,<td>$1</td><td>$2,g
+ if $Opt{href2td};
for my $td ($tr =~ m,<${T3}${exp3}[^>]*>(.*?)<\s*/\s*${T3}\s*>,gi)
{
+ $td =~ s,<a\s+[^>]*href="([^"]+)"[^>]*>(.*?)</a>,[href=$1]$2,g
+ if $Opt{href2txt};
$td = html2txt($td) unless $Opt{html};
$_ = chr(194).chr(160); $td =~ s/$_/ /g;
$td =~ s/\s+/ /g;
=head1 OPTIONS
-option[tag|T=s] table|dl Default: table
- -option[href!] Treat href as a column
+ -option[href2td!] Treat first column href as a new column
+ -option[href2txt!]
-option[sep|s=s] Default: tab (\t)
-option[html!] Escape html (default: yes)
-option[num|n=i] Only dump table number
-#!/bin/bash -l
-#(chr,entity,name)
-http_get https://www.w3schools.com/cssref/css_entities.php | \
-xmllint --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \
-xml2csv - | \
-tail -n+2 | \
-sed -E "s,(\\\\),\1\1," | \
-awk 'BEGIN{FS="\t"}{print $2"\t"$1"\t"$3}' \
-
+#!/bin/bash
+"$(dirname "$0")"/w3schools.sh /cssref/css_entities.php \
+ | awk -F$'\t' '{print $2"\t"$1"\t"$3}'
-#!/bin/bash -l
-http_get https://www.w3schools.com/cssref/css_functions.php | \
-xmllint --html --xpath "/html/body//div[@id=\"main\"]//table" - 2>/dev/null | \
-sed -E "s, href=\"([^\"]+)\",>https://www.w3schools.com/cssref/\1|</a><a,g" | \
-xml2csv - | \
-grep "|" | \
-sed "s/|/\t/" \
-| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+#!/bin/bash
+"$(dirname "$0")"/w3schools.sh /cssref/css_functions.php
-#!/bin/bash -l
-http_get https://www.w3schools.com/cssref/index.php | \
-xmllint --html --xpath '/html/body//div[@id="cssproperties"]//table' - 2>/dev/null | \
-perl -pe 's|<td[^>]*><a href="([^"]+)">([^<]+)</a></td>|<td>https://www.w3schools.com/cssref/$1</td><td>$2</td>|' | \
-html2csv | \
-sed -E "s,(\\\\),\1\1," | \
-awk 'BEGIN{FS="\t"} /www\.w3schools\.com/ {print $2"\t"$3"\t"$1}' \
-
+#!/bin/bash
+"$(dirname "$0")"/w3schools.sh /cssref/index.php
-#!/bin/bash -l
-http_get https://www.w3schools.com/cssref/css_selectors.php \
-| xmllint --html --xpath "/html/body//div[@id=\"main\"]//table" - 2>/dev/null \
-| sed -E "s, href=\"([^\"]+)\",>https://www.w3schools.com/cssref/\1|</a><a,g" \
-| xml2csv - \
-| grep "|" \
-| sed "s/|/\t/" \
-| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$4"\t"$1}' \
+#!/bin/bash
+"$(dirname "$0")"/w3schools.sh /cssref/css_selectors.php
-#!/bin/bash -l
-
-# NB 01.06.23: TODEL
-false && http_get https://html.com/attributes/ \
-| sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
-| xmllint --html --xpath //table - 2>/dev/null \
-| sed -E "s,<a[^>]+href=\"((https://html.com)?/attributes/([^\"\/]+)\/?)[^<]+</a>,\3</td><td>https://html.com/\1,g" \
-| xml2csv - \
-| awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \
-
-
-true && http_get https://www.w3schools.com/tags/ref_attributes.asp | \
-xmllint --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \
-grep -v 'href="tag_' | \
-perl -pe 's|<td[^>]*><a href="([^"]+)">([^<]+)</a></td>|<td>https://www.w3schools.com/tags/$1</td><td>$2</td>|' | \
-html2csv | \
-sed -E "s,(\\\\),\1\1," | \
-awk 'BEGIN{FS="\t"} /www\.w3schools\.com/ {print $2"\t"$3"\t"$1}' \
-
+#!/bin/bash
+"$(dirname "$0")"/w3schools.sh /tags/ref_attributes.asp 1,3
-#!/bin/bash -l
-html2txt https://html.com/tags/ \
-| grep "^<" \
-| sed -E -e "s/ *HTML Tag *//" -e "s,^<([^>]+)>,\1\thttps://www.w3schools.com/TAGS/tag_\1.asp\t," \
-| awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \
+#!/bin/bash
+"$(dirname "$0")"/w3schools.sh /TAGS/ | sed -E 's,<([a-z][^>]*)>,\1,'
--- /dev/null
+#!/bin/bash -l
+
+path=${1:?Usage: $0 /path/ for url https://www.w3schools.com}
+cut="$2"
+url="https://www.w3schools.com$path"
+preff="$(echo "$url"|sed -E 's,[^/]+\.[^/]+$,,')"
+#exec echo $preff
+
+http_get "$url" | \
+xmllint --format --html --xpath '/html/body//div[@id="main"]//table' - 2>/dev/null | \
+grep -Ev '<th' | \
+html2csv --href2txt | \
+sed -E \
+ -e "s,\[href=([^]]+),[href=$preff\1,g" \
+ -e "s,(\\\\),\1\1," \
+| \
+( test -z "$cut" && cat || cut -f "$cut" ) \
+| perl -pe '
+ s/\[href=([^\]]+)\](.*)$/\2\t\1/;
+ m,\thttp[^\t]+$, or s,$,\thttps://www.w3schools.com/tags/ref_attributes.asp,;
+' \
+;