&html_fix_entities
&html_unescape_fix_err
&html_unescape
+ &html_escape
&encoding_is_utf8
&encoding_is_latin
&del_accent
}
-#die &html_unescape("einbaufähig | Zaza é é | Nike Chaussure Air Jordan Phat&#1601 pour Fille") if $ARGV[0] eq 'zaza';
+sub html_escape {
+#------------------------------------------------------------------------------
+# NB 09.11.23: Do what is says
+#------------------------------------------------------------------------------
+ my $str = shift @_;
+ my $encode = shift @_; $encode ||= "utf8";
+
+ require CGI unless $INC{'CGI.pm'};
+ if (&encoding_is_latin($encode)) {
+ $str =~ s/(&#\w+;)/&CGI::escapeHTML("$1")/ge;
+ } else {
+ $str =~ s/(&#\w+;)/&CGI::escapeHTML(&Encode::encode($encode,"$1"))/ge;
+ }
+
+ foreach my $char (sort {length($a) <=> length($b)} keys %CHAR_ENTITIES) {
+ my $ent = $CHAR_ENTITIES{$char};
+
+ $str =~ s/\Q$char\E/$ent/g;
+ }
+
+ return $str;
+
+}
+
sub html_unescape {
#------------------------------------------------------------------------------
# NB 22.03.07
# Replace html entities with proper char
#------------------------------------------------------------------------------
-my $str = shift @_;
-my $encode = shift @_; $encode ||= "utf8";
+ my $str = shift @_;
+ my $encode = shift @_; $encode ||= "utf8";
$str =~ s/&(#?[^;]+;)/&$1/g;
-#require HTML::Entities unless $INC{'HTML/Entities.pm'};
-#require Text::Unidecode unless $INC{'Text/Unidecode.pm'};
-#return &Text::Unidecode::unidecode(&HTML::Entitie::decode_entities($str));
-
-# use Data::Dumper; print Dumper(\%CHAR_ENTITIES)."\n";
foreach my $char (keys %CHAR_ENTITIES) {
my $ent = $CHAR_ENTITIES{$char};
- #$char = &Encode::encode($encode,$char) if !&encoding_is_utf8($encode);
- #&Encode::from_to($char,"utf8",$encode) if !&encoding_is_utf8($encode);
- #$str =~ s/$ent/$char/g;
-
- #$char = &Encode::decode('utf8',$char);
$str =~ s/\Q$ent\E/$char/g;
}
- #return $str;
require CGI unless $INC{'CGI.pm'};
if (&encoding_is_latin($encode)) {
$str =~ s/(&#\w+;)/&CGI::unescapeHTML("$1")/ge;
-# NB 25.06.12 $str =~ s/(&\w+;)/&CGI::unescapeHTML("$1")/ge;
} else {
$str =~ s/(&#\w+;)/&Encode::encode($encode,&CGI::unescapeHTML("$1"))/ge;
-# NB 25.06.12 $str =~ s/(&\w+;)/&Encode::encode($encode,&CGI::unescapeHTML("$1"))/ge;
}
-return $str;
+ return $str;
}