From c6ccc0b780d8885ac7e1a5249f50dfcb5a49e9c4 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Thu, 9 Nov 2023 11:25:56 +0100 Subject: [PATCH] html_escape --- lib/perl/NB/Functions.pm | 45 +++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/lib/perl/NB/Functions.pm b/lib/perl/NB/Functions.pm index 488294b6..5289c20f 100644 --- a/lib/perl/NB/Functions.pm +++ b/lib/perl/NB/Functions.pm @@ -193,6 +193,7 @@ our %CHAR_ENTITIES = ( &html_fix_entities &html_unescape_fix_err &html_unescape + &html_escape &encoding_is_utf8 &encoding_is_latin &del_accent @@ -652,44 +653,54 @@ return $str; } -#die &html_unescape("einbaufähig | Zaza é é | Nike Chaussure Air Jordan Phat&#1601 pour Fille") if $ARGV[0] eq 'zaza'; +sub html_escape { +#------------------------------------------------------------------------------ +# NB 09.11.23: Do what is says +#------------------------------------------------------------------------------ + my $str = shift @_; + my $encode = shift @_; $encode ||= "utf8"; + + require CGI unless $INC{'CGI.pm'}; + if (&encoding_is_latin($encode)) { + $str =~ s/(&#\w+;)/&CGI::escapeHTML("$1")/ge; + } else { + $str =~ s/(&#\w+;)/&CGI::escapeHTML(&Encode::encode($encode,"$1"))/ge; + } + + foreach my $char (sort {length($a) <=> length($b)} keys %CHAR_ENTITIES) { + my $ent = $CHAR_ENTITIES{$char}; + + $str =~ s/\Q$char\E/$ent/g; + } + + return $str; + +} + sub html_unescape { #------------------------------------------------------------------------------ # NB 22.03.07 # Replace html entities with proper char #------------------------------------------------------------------------------ -my $str = shift @_; -my $encode = shift @_; $encode ||= "utf8"; + my $str = shift @_; + my $encode = shift @_; $encode ||= "utf8"; $str =~ s/&(#?[^;]+;)/&$1/g; -#require HTML::Entities unless $INC{'HTML/Entities.pm'}; -#require Text::Unidecode unless $INC{'Text/Unidecode.pm'}; -#return &Text::Unidecode::unidecode(&HTML::Entitie::decode_entities($str)); - -# use Data::Dumper; print Dumper(\%CHAR_ENTITIES)."\n"; foreach my $char (keys %CHAR_ENTITIES) { my $ent = $CHAR_ENTITIES{$char}; - #$char = &Encode::encode($encode,$char) if !&encoding_is_utf8($encode); - #&Encode::from_to($char,"utf8",$encode) if !&encoding_is_utf8($encode); - #$str =~ s/$ent/$char/g; - - #$char = &Encode::decode('utf8',$char); $str =~ s/\Q$ent\E/$char/g; } - #return $str; require CGI unless $INC{'CGI.pm'}; if (&encoding_is_latin($encode)) { $str =~ s/(&#\w+;)/&CGI::unescapeHTML("$1")/ge; -# NB 25.06.12 $str =~ s/(&\w+;)/&CGI::unescapeHTML("$1")/ge; } else { $str =~ s/(&#\w+;)/&Encode::encode($encode,&CGI::unescapeHTML("$1"))/ge; -# NB 25.06.12 $str =~ s/(&\w+;)/&Encode::encode($encode,&CGI::unescapeHTML("$1"))/ge; } -return $str; + return $str; } -- 2.47.3