Nice url without removing characters
1 2 3 4 56 7 8 9 1011 12 13 14 1516 17 18 19 2021 22 23 24 2526 27 28 29 3031 32 33 34 3536 37 38 39 4041 42 43 44 4546 47 48 49 5051 52 53 54 5556 57 58 59 6061 62 63 64 6566 67 68 69 7071 72 73 74 7576 77 78 79 8081 82 83 84 8586 87 88 89 9091 92 93 94 9596 97 98 99 100101 102 103 104 105106 107 108 109 110111 112 113 114 115116 117 118 119 120121 122 123 124 | <?php /** * @copyright 2008, Giorgio Sironi * @license http://www.gnu.org/licenses/gpl-2.0.html GNU Public License * @version $Id$ * @link http://sf.net/projects/ossigeno */ /** * remove diacritics mark from letters of a string */ class Otk_Filter_Diacritics implements Zend_Filter_Interface { /** * transform accented letters encoded in unicode string * in their normal version (without any diacritic marks) * * Commonly used in building url rewrite requests. * @param string text to be parsed * @return string new sanitized text */ public function filter($string) { $chars = array( // Latin-1 chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', chr(195).chr(135) => 'C', chr(195).chr(136) => 'E', chr(195).chr(137) => 'E', chr(195).chr(138) => 'E', chr(195).chr(139) => 'E', chr(195).chr(140) => 'I', chr(195).chr(141) => 'I', chr(195).chr(142) => 'I', chr(195).chr(143) => 'I', chr(195).chr(145) => 'N', chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', chr(195).chr(159) => 's', chr(195).chr(160) => 'a', chr(195).chr(161) => 'a', chr(195).chr(162) => 'a', chr(195).chr(163) => 'a', chr(195).chr(164) => 'a', chr(195).chr(165) => 'a', chr(195).chr(167) => 'c', chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', chr(195).chr(177) => 'n', chr(195).chr(178) => 'o', chr(195).chr(179) => 'o', chr(195).chr(180) => 'o', chr(195).chr(181) => 'o', chr(195).chr(182) => 'o', chr(195).chr(182) => 'o', chr(195).chr(185) => 'u', chr(195).chr(186) => 'u', chr(195).chr(187) => 'u', chr(195).chr(188) => 'u', chr(195).chr(189) => 'y', chr(195).chr(191) => 'y', // Latin A chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', chr(196).chr(132) => 'A', chr(196).chr(133) => 'a', chr(196).chr(134) => 'C', chr(196).chr(134) => 'c', chr(196).chr(136) => 'C', chr(196).chr(137) => 'c', chr(196).chr(138) => 'C', chr(196).chr(139) => 'c', chr(196).chr(140) => 'C', chr(196).chr(141) => 'c', chr(196).chr(142) => 'D', chr(196).chr(143) => 'd', chr(196).chr(144) => 'D', chr(196).chr(145) => 'd', chr(196).chr(146) => 'E', chr(196).chr(147) => 'e', chr(196).chr(148) => 'E', chr(196).chr(149) => 'e', chr(196).chr(150) => 'E', chr(196).chr(151) => 'e', chr(196).chr(152) => 'E', chr(196).chr(153) => 'e', chr(196).chr(154) => 'E', chr(196).chr(155) => 'e', chr(196).chr(156) => 'G', chr(196).chr(157) => 'g', chr(196).chr(158) => 'G', chr(196).chr(159) => 'g', chr(196).chr(160) => 'G', chr(196).chr(161) => 'g', chr(196).chr(162) => 'G', chr(196).chr(163) => 'g', chr(196).chr(164) => 'H', chr(196).chr(165) => 'h', chr(196).chr(166) => 'H', chr(196).chr(167) => 'h', chr(196).chr(168) => 'I', chr(196).chr(169) => 'i', chr(196).chr(170) => 'I', chr(196).chr(171) => 'i', chr(196).chr(172) => 'I', chr(196).chr(173) => 'i', chr(196).chr(174) => 'I', chr(196).chr(175) => 'i', chr(196).chr(176) => 'I', chr(196).chr(177) => 'i', chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij', chr(196).chr(180) => 'J', chr(196).chr(181) => 'j', chr(196).chr(182) => 'K', chr(196).chr(183) => 'k', chr(196).chr(184) => 'k', chr(196).chr(185) => 'L', chr(196).chr(186) => 'l', chr(196).chr(187) => 'L', chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', chr(197).chr(128) => 'l', chr(196).chr(129) => 'L', chr(197).chr(130) => 'l', chr(196).chr(131) => 'N', chr(197).chr(132) => 'n', chr(196).chr(133) => 'N', chr(197).chr(134) => 'n', chr(196).chr(135) => 'N', chr(197).chr(136) => 'n', chr(196).chr(137) => 'N', chr(197).chr(138) => 'n', chr(196).chr(139) => 'N', chr(197).chr(140) => 'O', chr(196).chr(141) => 'o', chr(197).chr(142) => 'O', chr(196).chr(143) => 'o', chr(197).chr(144) => 'O', chr(196).chr(145) => 'o', chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', chr(197).chr(150) => 'R',chr(197).chr(151) => 'r', chr(197).chr(152) => 'R',chr(197).chr(153) => 'r', chr(197).chr(154) => 'S',chr(197).chr(155) => 's', chr(197).chr(156) => 'S',chr(197).chr(157) => 's', chr(197).chr(158) => 'S',chr(197).chr(159) => 's', chr(197).chr(160) => 'S', chr(197).chr(161) => 's', chr(197).chr(162) => 'T', chr(197).chr(163) => 't', chr(197).chr(164) => 'T', chr(197).chr(165) => 't', chr(197).chr(166) => 'T', chr(197).chr(167) => 't', chr(197).chr(168) => 'U', chr(197).chr(169) => 'u', chr(197).chr(170) => 'U', chr(197).chr(171) => 'u', chr(197).chr(172) => 'U', chr(197).chr(173) => 'u', chr(197).chr(174) => 'U', chr(197).chr(175) => 'u', chr(197).chr(176) => 'U', chr(197).chr(177) => 'u', chr(197).chr(178) => 'U', chr(197).chr(179) => 'u', chr(197).chr(180) => 'W', chr(197).chr(181) => 'w', chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y', chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z', chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', chr(197).chr(190) => 'z', chr(197).chr(191) => 's', // Euro chr(226).chr(130).chr(172) => 'EUR' ); return strtr($string, $chars); } } |
Comments
You must login before commenting on a snippet. If you do not have an account, please register.
Snippet description
In many languages there are some characters not user friendly. Given an unicode string, this filter substitute them with their equivalent without diacritics mark.
For example: è => e, ç => c.
Snippet details
- Created:
-
Giorgio Sironi
- Edited:
-
Giorgio Sironi
- Revision Id:
- 10
- Edit Message:
- Initial Release
- Tags:
- unicode filter diacritics url friendly
- Comments:
- 0
- Views:
- 1389
- Points:
- 0 (0 votes)