( (<)/?\w+(\s+\w+=['"]+[^'"]+['"]+)*/?(>) ) # start tag
|( (<)?\w+(\s+\w+=\S+)*/?(>) ) # end tag
- |( (<|<)!\[CDATA\[ ) # cdata begin
- | ( \]\](>|>) ) # cdata end
+ # NB 04.06.23: NO we want entities !!!
+ # NB 04.06.23 |( (<|<)!\[CDATA\[ ) # cdata begin
+ # NB 04.06.23 | ( \]\](>|>) ) # cdata end
+ |( <!\[CDATA\[ ) # cdata begin
+ |( \]\]> ) # cdata end
+
+ # NB 04.06.23: NO we want entities !!!
+ # NB 04.06.23 |( (<|<)/\w+\s*$ ) # incomplet html
+ # NB 04.06.23 |( ^(<|<)\w+ .* ) # incomplet html
+ |( </\w+\s*$ ) # incomplet html
+ |( ^<\w+ .* ) # incomplet html
- |( (<|<)/\w+\s*$ ) # incomplet html
- |( ^(<|<)\w+ .* ) # incomplet html
|(<\!DOCTYPE[^>]+>)
|([\w_-]+="[^"]+") # attrs
|( <\w[^>]+> ) # tags