Login | ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/oidinfo_api/trunk/xml_utils.inc.phps
Revision: 3
Committed: Thu May 2 10:10:59 2019 UTC (11 months ago) by daniel-marschall
File size: 6151 byte(s)
Log Message:
Fixed symlinks

File Contents

# Content
1 <?php
2
3 /*
4 * XML Encoding Utilities
5 * Copyright 2011-2019 Daniel Marschall, ViaThinkSoft
6 * Version 1.7
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 // http://www.viathinksoft.de/?page=codelib&showid=89
22
23 // Unicode-proof htmlentities.
24 // Returns 'normal' chars as chars and weirdos as numeric html entites.
25 // Source: http://www.php.net/manual/en/function.htmlentities.php#107985 ; modified
26 // Modified by Daniel Marschall, ViaThinkSoft
27 function htmlentities_numeric($str, $allow_html=false, $encode_linebreaks=false) {
28 // Convert $str to UTF-8 if it is not already
29 if (mb_detect_encoding($str, "auto", true) != 'UTF-8') {
30 # $str = mb_convert_encoding($str, 'UTF-8', 'Windows-1252');
31 # $str = mb_convert_encoding($str, 'UTF-8', 'auto');
32 $str = mb_convert_encoding($str, 'UTF-8');
33 }
34
35 // get rid of existing entities else double-escape
36 // DM 24.08.2016 Auskommentiert wegen oid+ xml export
37 // $str = html_entity_decode(stripslashes($str),ENT_QUOTES,'UTF-8');
38
39 $ar = preg_split('/(?<!^)(?!$)/u', $str); // return array of every multi-byte character
40 $str2 = '';
41 foreach ($ar as $c) {
42 $o = ord($c);
43 if (
44 (strlen($c) > 1) || /* multi-byte [unicode] */
45 ($o < 32 || $o > 126) || /* <- control / latin weirdos -> */
46 ($o > 33 && $o < 40) || /* quotes + ampersand */
47 ($o > 59 && $o < 63) /* html */
48 ) {
49 // convert to numeric entity
50 $c = mb_encode_numericentity($c, array(0x0, 0xffff, 0, 0xffff), 'UTF-8');
51
52 if ($allow_html) {
53 if ($c == '&#60;') $c = '<';
54 if ($c == '&#62;') $c = '>';
55 if ($c == '&#61;') $c = '=';
56 if ($c == '&#34;') $c = '"';
57 if ($c == '&#39;') $c = '\'';
58 if ($c == '&#38;') $c = '&'; // DM 24.08.2016 Re-Aktiviert wegen oid+ xml export
59 }
60
61 if (!$encode_linebreaks) {
62 if ($allow_html) {
63 if ($c == "&#10;") $c = "<br />";
64 if ($c == "&#13;") $c = "<br />";
65 } else {
66 if ($c == "&#10;") $c = "\n";
67 if ($c == "&#13;") $c = "\r";
68 }
69 }
70 }
71 $str2 .= $c;
72 }
73 return $str2;
74 }
75
76 function ordUTF8($c, $index = 0, &$bytes = null) {
77 // http://de.php.net/manual/en/function.ord.php#78032
78
79 $len = strlen($c);
80 $bytes = 0;
81
82 if ($index >= $len) {
83 return false;
84 }
85
86 $h = ord($c{$index});
87
88 if ($h <= 0x7F) {
89 $bytes = 1;
90 return $h;
91 } else if ($h < 0xC2) {
92 return false;
93 } else if ($h <= 0xDF && $index < $len - 1) {
94 $bytes = 2;
95 return ($h & 0x1F) << 6 | (ord($c{$index + 1}) & 0x3F);
96 } else if ($h <= 0xEF && $index < $len - 2) {
97 $bytes = 3;
98 return ($h & 0x0F) << 12 | (ord($c{$index + 1}) & 0x3F) << 6
99 | (ord($c{$index + 2}) & 0x3F);
100 } else if ($h <= 0xF4 && $index < $len - 3) {
101 $bytes = 4;
102 return ($h & 0x0F) << 18 | (ord($c{$index + 1}) & 0x3F) << 12
103 | (ord($c{$index + 2}) & 0x3F) << 6
104 | (ord($c{$index + 3}) & 0x3F);
105 } else {
106 return false;
107 }
108 }
109
110 function utf16_to_utf8($str) {
111 // http://betamode.de/2008/09/08/php-utf-16-zu-utf-8-konvertieren/
112 // http://www.moddular.org/log/utf16-to-utf8
113
114 $c0 = ord($str[0]);
115 $c1 = ord($str[1]);
116 if ($c0 == 0xFE && $c1 == 0xFF) {
117 $be = true;
118 } else if ($c0 == 0xFF && $c1 == 0xFE) {
119 $be = false;
120 } else {
121 return $str;
122 }
123 $str = substr($str, 2);
124 $len = strlen($str);
125 $dec = '';
126 for ($i = 0; $i < $len; $i += 2) {
127 $c = ($be) ? ord($str[$i]) << 8 | ord($str[$i + 1]) :
128 ord($str[$i + 1]) << 8 | ord($str[$i]);
129 if ($c >= 0x0001 && $c <= 0x007F) {
130 $dec .= chr($c);
131 } else if ($c > 0x07FF) {
132 $dec .= chr(0xE0 | (($c >> 12) & 0x0F));
133 $dec .= chr(0x80 | (($c >> 6) & 0x3F));
134 $dec .= chr(0x80 | (($c >> 0) & 0x3F));
135 } else {
136 $dec .= chr(0xC0 | (($c >> 6) & 0x1F));
137 $dec .= chr(0x80 | (($c >> 0) & 0x3F));
138 }
139 }
140 return $dec;
141 }
142
143 function html_named_to_numeric_entities($str) {
144 if (!function_exists('decodeNamedEntities')) {
145 function decodeNamedEntities($string) {
146 // https://stackoverflow.com/questions/20406599/how-to-encode-for-entity-igrave-not-defined-error-in-xml-feed
147 static $entities = NULL;
148 if (NULL === $entities) {
149 $entities = array_flip(
150 array_diff(
151 get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8'),
152 get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_XML1, 'UTF-8')
153 )
154 );
155 }
156 return str_replace(array_keys($entities), $entities, $string);
157 }
158 }
159
160 if (!function_exists('mb_convert_encoding')) {
161 // https://riptutorial.com/php/example/15633/converting-unicode-characters-to-their-numeric-value-and-or-html-entities-using-php
162 function mb_convert_encoding($str, $to_encoding, $from_encoding = NULL) {
163 return iconv(($from_encoding === NULL) ? mb_internal_encoding() : $from_encoding, $to_encoding, $str);
164 }
165 }
166
167 if (!function_exists('mb_ord')) {
168 // https://riptutorial.com/php/example/15633/converting-unicode-characters-to-their-numeric-value-and-or-html-entities-using-php
169 function mb_ord($char, $encoding = 'UTF-8') {
170 if ($encoding === 'UCS-4BE') {
171 list(, $ord) = (strlen($char) === 4) ? @unpack('N', $char) : @unpack('n', $char);
172 return $ord;
173 } else {
174 return mb_ord(mb_convert_encoding($char, 'UCS-4BE', $encoding), 'UCS-4BE');
175 }
176 }
177 }
178
179 if (!function_exists('mb_htmlentities')) {
180 // https://riptutorial.com/php/example/15633/converting-unicode-characters-to-their-numeric-value-and-or-html-entities-using-php
181 function mb_htmlentities($string, $hex = true, $encoding = 'UTF-8') {
182 return preg_replace_callback('/[\x{80}-\x{10FFFF}]/u', function ($match) use ($hex) {
183 return sprintf($hex ? '&#x%X;' : '&#%d;', mb_ord($match[0]));
184 }, $string);
185 }
186 }
187
188 if (!mb_detect_encoding($str, 'UTF-8', true)) $str = utf8_encode($str);
189 return mb_htmlentities(decodeNamedEntities($str));
190 }