]> git.nbdom.net Git - nb.git/commitdiff
html_attribute.sh
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 10:11:45 +0000 (10:11 +0000)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Sat, 5 Dec 2020 10:11:45 +0000 (10:11 +0000)
lib/postgres/html_attribute.sh

index 8b19c49e0e91b53da64b6ee05c5a387229a0d500..0944487e3097c1e91c1a49d6e3b4497a128fd809 100755 (executable)
@@ -2,26 +2,6 @@
 http_get https://html.com/attributes/ \
 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
 | xmllint --html --xpath //table - 2>/dev/null \
-| sed -E "s,<a[^>]+href=.(https://html.com/attributes/[^\"]+)[^<]+</a>,\1,g" \
+| sed -E "s,<a[^>]+href=.(https://html.com/attributes/([^/]+)[^\"]*)[^<]+</a>,\2</td><td>\1,g" \
 | xml2csv - \
-| awk 'BEGIN{FS="\t"}{tag=$1; sub(/^[^-]+-/,"",tag); sub(/\/$/,"",tag); print tag"\t"$2"\t"$1}' \
-
-# NB 05.12.20 | sed -E "s,^([\t]+/([^/]+)-([^-/]+)[^\t]*),\1\t\3," \
-
-# NB 05.12.20 | xml2csv - \
-# NB 05.12.20 | sed -E \
-# NB 05.12.20   -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
-# NB 05.12.20   -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
-# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
-
-
-
-#!/bin/bash -l
-# NB 05.12.20 http_get https://html.com/attributes/ \
-# NB 05.12.20 | sed -E "s,<(thrive_headline|header)[^.>]+>([^<]+)</\1>,<div>\2</div>," \
-# NB 05.12.20 | xmllint --html --xpath //table - 2>/dev/null \
-# NB 05.12.20 | xml2csv - \
-# NB 05.12.20 | sed -E \
-# NB 05.12.20   -e "s/”/\"/g" -e "s,^[^<]+(<[^>]+>)[^\t]+,\L\1," \
-# NB 05.12.20   -e "s,^(<([^ ]+) ([^=>]+)),https://html.com/attributes/\2-\3/\t\1," \
-# NB 05.12.20 | awk 'BEGIN{FS="\t"}{print $2"\t"$3"\t"$1}' \
+| awk 'BEGIN{FS="\t"}{print $1"\t"$3"\t"$2}' \