unicode-to-nginx.pl 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/usr/bin/perl -w
  2. # Convert unicode mappings to nginx configuration file format.
  3. # You may find useful mappings in various places, including
  4. # unicode.org official site:
  5. #
  6. # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
  7. # http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
  8. # Needs perl 5.6 or later.
  9. # Written by Maxim Dounin, mdounin@mdounin.ru
  10. ###############################################################################
  11. require 5.006;
  12. while (<>) {
  13. # Skip comments and empty lines
  14. next if /^#/;
  15. next if /^\s*$/;
  16. chomp;
  17. # Convert mappings
  18. if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) {
  19. # Mapping <from-code> <unicode-code> "#" <unicode-name>
  20. my $cs_code = $1;
  21. my $un_code = $2;
  22. my $un_name = $3;
  23. # Produce UTF-8 sequence from character code;
  24. my $un_utf8 = join('',
  25. map { sprintf("%02X", $_) }
  26. unpack("U0C*", pack("U", hex($un_code)))
  27. );
  28. print " $cs_code $un_utf8 ; $un_name\n";
  29. } else {
  30. warn "Unrecognized line: '$_'";
  31. }
  32. }
  33. ###############################################################################