http_get https://html.com/attributes/ \
| sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
| xmllint --html --xpath //table - 2>/dev/null \
-| sed -E \
- -e "s,<a[^>]+href=.https://html.com/attributes/([^/]+)[^<]+</a></td>[^<]*<td>,\1,g" \
+| sed -E "s,<a[^>]+href=.(https://html.com/attributes/[^\"]+)[^<]+</a>,\1,g" \
| xml2csv - \
-| sed -E \
- -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
- -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
-| awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+| awk 'BEGIN{FS="\t"}{tag=$1; sub(/^[^-]+-/,"",tag); sub(/\/$/,"",tag); print tag"\t"$2"\t"$1}' \
+
+# NB 05.12.20 | sed -E "s,^([\t]+/([^/]+)-([^-/]+)[^\t]*),\1\t\3," \
+
+# NB 05.12.20 | xml2csv - \
+# NB 05.12.20 | sed -E \
+# NB 05.12.20 -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
+# NB 05.12.20 -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
+# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+
+
+
+#!/bin/bash -l
+# NB 05.12.20 http_get https://html.com/attributes/ \
+# NB 05.12.20 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
+# NB 05.12.20 | xmllint --html --xpath //table - 2>/dev/null \
+# NB 05.12.20 | xml2csv - \
+# NB 05.12.20 | sed -E \
+# NB 05.12.20 -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
+# NB 05.12.20 -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
+# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \