]> git.nbdom.net Git - nb.git/commitdiff
html_escape
authorNicolas Boisselier <nicolas.boisselier@gmail.com>
Thu, 9 Nov 2023 10:25:56 +0000 (11:25 +0100)
committerNicolas Boisselier <nicolas.boisselier@gmail.com>
Thu, 9 Nov 2023 10:25:56 +0000 (11:25 +0100)
lib/perl/NB/Functions.pm

index 488294b61e701f652ae88eb6310878899bdf4ece..5289c20f12c2a2fadb28e4727ccf00ffa0c15a76 100644 (file)
@@ -193,6 +193,7 @@ our %CHAR_ENTITIES = (
        &html_fix_entities
        &html_unescape_fix_err
        &html_unescape
+       &html_escape
        &encoding_is_utf8
        &encoding_is_latin
        &del_accent
@@ -652,44 +653,54 @@ return $str;
 
 }
 
-#die &html_unescape("einbauf&#228hig | Zaza &#233; &eacute; | Nike Chaussure Air Jordan Phat&amp;#1601 pour Fille") if $ARGV[0] eq 'zaza';
+sub html_escape {
+#------------------------------------------------------------------------------
+# NB 09.11.23: Do what is says  
+#------------------------------------------------------------------------------
+       my $str = shift @_;
+       my $encode = shift @_; $encode ||= "utf8";
+
+       require CGI unless $INC{'CGI.pm'};
+       if (&encoding_is_latin($encode)) {
+               $str =~ s/(&#\w+;)/&CGI::escapeHTML("$1")/ge;
+       } else {
+               $str =~ s/(&#\w+;)/&CGI::escapeHTML(&Encode::encode($encode,"$1"))/ge;
+       }
+
+       foreach my $char (sort {length($a) <=> length($b)} keys %CHAR_ENTITIES) {
+               my $ent = $CHAR_ENTITIES{$char};
+
+               $str =~ s/\Q$char\E/$ent/g;
+       }
+
+       return $str;
+
+}
+
 sub html_unescape {
 #------------------------------------------------------------------------------
 # NB 22.03.07 
 # Replace html entities with proper char
 #------------------------------------------------------------------------------
-my $str = shift @_;
-my $encode = shift @_; $encode ||= "utf8";
+       my $str = shift @_;
+       my $encode = shift @_; $encode ||= "utf8";
 
        $str =~ s/&amp;(#?[^;]+;)/&$1/g;
 
-#require HTML::Entities unless $INC{'HTML/Entities.pm'};
-#require Text::Unidecode unless $INC{'Text/Unidecode.pm'};
-#return &Text::Unidecode::unidecode(&HTML::Entitie::decode_entities($str));
-
-# use Data::Dumper; print Dumper(\%CHAR_ENTITIES)."\n";
        foreach my $char (keys %CHAR_ENTITIES) {
                my $ent = $CHAR_ENTITIES{$char};
 
-               #$char = &Encode::encode($encode,$char) if !&encoding_is_utf8($encode);
-               #&Encode::from_to($char,"utf8",$encode) if !&encoding_is_utf8($encode);
-               #$str =~ s/$ent/$char/g;
-
-               #$char = &Encode::decode('utf8',$char);
                $str =~ s/\Q$ent\E/$char/g;
        }
-       #return $str;
 
        require CGI unless $INC{'CGI.pm'};
        if (&encoding_is_latin($encode)) {
                $str =~ s/(&#\w+;)/&CGI::unescapeHTML("$1")/ge;
-# NB 25.06.12          $str =~ s/(&\w+;)/&CGI::unescapeHTML("$1")/ge;
        } else {
                $str =~ s/(&#\w+;)/&Encode::encode($encode,&CGI::unescapeHTML("$1"))/ge;
-# NB 25.06.12          $str =~ s/(&\w+;)/&Encode::encode($encode,&CGI::unescapeHTML("$1"))/ge;
        }
 
-return $str;
+       return $str;
 
 }