Rev 1308 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
597 | daniel-mar | 1 | <?php |
1042 | daniel-mar | 2 | |
597 | daniel-mar | 3 | /** |
1042 | daniel-mar | 4 | * Abstract minifier class. |
597 | daniel-mar | 5 | * |
6 | * Please report bugs on https://github.com/matthiasmullie/minify/issues |
||
7 | * |
||
8 | * @author Matthias Mullie <minify@mullie.eu> |
||
9 | * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved |
||
10 | * @license MIT License |
||
11 | */ |
||
1042 | daniel-mar | 12 | |
597 | daniel-mar | 13 | namespace MatthiasMullie\Minify; |
14 | |||
15 | use MatthiasMullie\Minify\Exceptions\IOException; |
||
16 | use Psr\Cache\CacheItemInterface; |
||
17 | |||
18 | /** |
||
19 | * Abstract minifier class. |
||
20 | * |
||
21 | * Please report bugs on https://github.com/matthiasmullie/minify/issues |
||
22 | * |
||
23 | * @author Matthias Mullie <minify@mullie.eu> |
||
24 | * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved |
||
25 | * @license MIT License |
||
26 | */ |
||
27 | abstract class Minify |
||
28 | { |
||
29 | /** |
||
30 | * The data to be minified. |
||
31 | * |
||
32 | * @var string[] |
||
33 | */ |
||
34 | protected $data = array(); |
||
35 | |||
36 | /** |
||
37 | * Array of patterns to match. |
||
38 | * |
||
39 | * @var string[] |
||
40 | */ |
||
41 | protected $patterns = array(); |
||
42 | |||
43 | /** |
||
44 | * This array will hold content of strings and regular expressions that have |
||
45 | * been extracted from the JS source code, so we can reliably match "code", |
||
46 | * without having to worry about potential "code-like" characters inside. |
||
47 | * |
||
1042 | daniel-mar | 48 | * @internal |
49 | * |
||
597 | daniel-mar | 50 | * @var string[] |
51 | */ |
||
52 | public $extracted = array(); |
||
53 | |||
54 | /** |
||
55 | * Init the minify class - optionally, code may be passed along already. |
||
56 | */ |
||
57 | public function __construct(/* $data = null, ... */) |
||
58 | { |
||
59 | // it's possible to add the source through the constructor as well ;) |
||
60 | if (func_num_args()) { |
||
61 | call_user_func_array(array($this, 'add'), func_get_args()); |
||
62 | } |
||
63 | } |
||
64 | |||
65 | /** |
||
66 | * Add a file or straight-up code to be minified. |
||
67 | * |
||
68 | * @param string|string[] $data |
||
69 | * |
||
70 | * @return static |
||
71 | */ |
||
72 | public function add($data /* $data = null, ... */) |
||
73 | { |
||
74 | // bogus "usage" of parameter $data: scrutinizer warns this variable is |
||
75 | // not used (we're using func_get_args instead to support overloading), |
||
76 | // but it still needs to be defined because it makes no sense to have |
||
77 | // this function without argument :) |
||
78 | $args = array($data) + func_get_args(); |
||
79 | |||
80 | // this method can be overloaded |
||
81 | foreach ($args as $data) { |
||
82 | if (is_array($data)) { |
||
83 | call_user_func_array(array($this, 'add'), $data); |
||
84 | continue; |
||
85 | } |
||
86 | |||
87 | // redefine var |
||
88 | $data = (string) $data; |
||
89 | |||
90 | // load data |
||
91 | $value = $this->load($data); |
||
92 | $key = ($data != $value) ? $data : count($this->data); |
||
93 | |||
94 | // replace CR linefeeds etc. |
||
95 | // @see https://github.com/matthiasmullie/minify/pull/139 |
||
96 | $value = str_replace(array("\r\n", "\r"), "\n", $value); |
||
97 | |||
98 | // store data |
||
99 | $this->data[$key] = $value; |
||
100 | } |
||
101 | |||
102 | return $this; |
||
103 | } |
||
104 | |||
105 | /** |
||
106 | * Add a file to be minified. |
||
107 | * |
||
108 | * @param string|string[] $data |
||
109 | * |
||
110 | * @return static |
||
111 | * |
||
112 | * @throws IOException |
||
113 | */ |
||
114 | public function addFile($data /* $data = null, ... */) |
||
115 | { |
||
116 | // bogus "usage" of parameter $data: scrutinizer warns this variable is |
||
117 | // not used (we're using func_get_args instead to support overloading), |
||
118 | // but it still needs to be defined because it makes no sense to have |
||
119 | // this function without argument :) |
||
120 | $args = array($data) + func_get_args(); |
||
121 | |||
122 | // this method can be overloaded |
||
123 | foreach ($args as $path) { |
||
124 | if (is_array($path)) { |
||
125 | call_user_func_array(array($this, 'addFile'), $path); |
||
126 | continue; |
||
127 | } |
||
128 | |||
129 | // redefine var |
||
130 | $path = (string) $path; |
||
131 | |||
132 | // check if we can read the file |
||
133 | if (!$this->canImportFile($path)) { |
||
1042 | daniel-mar | 134 | throw new IOException('The file "' . $path . '" could not be opened for reading. Check if PHP has enough permissions.'); |
597 | daniel-mar | 135 | } |
136 | |||
137 | $this->add($path); |
||
138 | } |
||
139 | |||
140 | return $this; |
||
141 | } |
||
142 | |||
143 | /** |
||
144 | * Minify the data & (optionally) saves it to a file. |
||
145 | * |
||
146 | * @param string[optional] $path Path to write the data to |
||
147 | * |
||
148 | * @return string The minified data |
||
149 | */ |
||
150 | public function minify($path = null) |
||
151 | { |
||
152 | $content = $this->execute($path); |
||
153 | |||
154 | // save to path |
||
155 | if ($path !== null) { |
||
156 | $this->save($content, $path); |
||
157 | } |
||
158 | |||
159 | return $content; |
||
160 | } |
||
161 | |||
162 | /** |
||
163 | * Minify & gzip the data & (optionally) saves it to a file. |
||
164 | * |
||
165 | * @param string[optional] $path Path to write the data to |
||
166 | * @param int[optional] $level Compression level, from 0 to 9 |
||
167 | * |
||
168 | * @return string The minified & gzipped data |
||
169 | */ |
||
170 | public function gzip($path = null, $level = 9) |
||
171 | { |
||
172 | $content = $this->execute($path); |
||
173 | $content = gzencode($content, $level, FORCE_GZIP); |
||
174 | |||
175 | // save to path |
||
176 | if ($path !== null) { |
||
177 | $this->save($content, $path); |
||
178 | } |
||
179 | |||
180 | return $content; |
||
181 | } |
||
182 | |||
183 | /** |
||
184 | * Minify the data & write it to a CacheItemInterface object. |
||
185 | * |
||
186 | * @param CacheItemInterface $item Cache item to write the data to |
||
187 | * |
||
188 | * @return CacheItemInterface Cache item with the minifier data |
||
189 | */ |
||
190 | public function cache(CacheItemInterface $item) |
||
191 | { |
||
192 | $content = $this->execute(); |
||
193 | $item->set($content); |
||
194 | |||
195 | return $item; |
||
196 | } |
||
197 | |||
198 | /** |
||
199 | * Minify the data. |
||
200 | * |
||
201 | * @param string[optional] $path Path to write the data to |
||
202 | * |
||
203 | * @return string The minified data |
||
204 | */ |
||
205 | abstract public function execute($path = null); |
||
206 | |||
207 | /** |
||
208 | * Load data. |
||
209 | * |
||
210 | * @param string $data Either a path to a file or the content itself |
||
211 | * |
||
212 | * @return string |
||
213 | */ |
||
214 | protected function load($data) |
||
215 | { |
||
216 | // check if the data is a file |
||
217 | if ($this->canImportFile($data)) { |
||
218 | $data = file_get_contents($data); |
||
219 | |||
220 | // strip BOM, if any |
||
221 | if (substr($data, 0, 3) == "\xef\xbb\xbf") { |
||
222 | $data = substr($data, 3); |
||
223 | } |
||
224 | } |
||
225 | |||
226 | return $data; |
||
227 | } |
||
228 | |||
229 | /** |
||
230 | * Save to file. |
||
231 | * |
||
232 | * @param string $content The minified data |
||
233 | * @param string $path The path to save the minified data to |
||
234 | * |
||
235 | * @throws IOException |
||
236 | */ |
||
237 | protected function save($content, $path) |
||
238 | { |
||
239 | $handler = $this->openFileForWriting($path); |
||
240 | |||
241 | $this->writeToFile($handler, $content); |
||
242 | |||
243 | @fclose($handler); |
||
244 | } |
||
245 | |||
246 | /** |
||
247 | * Register a pattern to execute against the source content. |
||
248 | * |
||
842 | daniel-mar | 249 | * If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work. |
250 | * If you need that functionality, use a callback instead. |
||
251 | * |
||
597 | daniel-mar | 252 | * @param string $pattern PCRE pattern |
253 | * @param string|callable $replacement Replacement value for matched pattern |
||
254 | */ |
||
255 | protected function registerPattern($pattern, $replacement = '') |
||
256 | { |
||
257 | // study the pattern, we'll execute it more than once |
||
258 | $pattern .= 'S'; |
||
259 | |||
260 | $this->patterns[] = array($pattern, $replacement); |
||
261 | } |
||
262 | |||
263 | /** |
||
1308 | daniel-mar | 264 | * Both JS and CSS use the same form of multi-line comment, so putting the common code here. |
265 | */ |
||
266 | protected function stripMultilineComments() |
||
267 | { |
||
268 | // First extract comments we want to keep, so they can be restored later |
||
269 | // PHP only supports $this inside anonymous functions since 5.4 |
||
270 | $minifier = $this; |
||
271 | $callback = function ($match) use ($minifier) { |
||
272 | $count = count($minifier->extracted); |
||
1469 | daniel-mar | 273 | $placeholder = '/*' . $count . '*/'; |
1308 | daniel-mar | 274 | $minifier->extracted[$placeholder] = $match[0]; |
275 | |||
276 | return $placeholder; |
||
277 | }; |
||
278 | $this->registerPattern('/ |
||
279 | # optional newline |
||
280 | \n? |
||
281 | |||
282 | # start comment |
||
283 | \/\* |
||
284 | |||
285 | # comment content |
||
286 | (?: |
||
287 | # either starts with an ! |
||
288 | ! |
||
289 | | |
||
290 | # or, after some number of characters which do not end the comment |
||
291 | (?:(?!\*\/).)*? |
||
292 | |||
293 | # there is either a @license or @preserve tag |
||
294 | @(?:license|preserve) |
||
295 | ) |
||
296 | |||
297 | # then match to the end of the comment |
||
298 | .*?\*\/\n? |
||
299 | |||
300 | /ixs', $callback); |
||
301 | |||
302 | // Then strip all other comments |
||
303 | $this->registerPattern('/\/\*.*?\*\//s', ''); |
||
304 | } |
||
305 | |||
306 | /** |
||
597 | daniel-mar | 307 | * We can't "just" run some regular expressions against JavaScript: it's a |
308 | * complex language. E.g. having an occurrence of // xyz would be a comment, |
||
309 | * unless it's used within a string. Of you could have something that looks |
||
310 | * like a 'string', but inside a comment. |
||
311 | * The only way to accurately replace these pieces is to traverse the JS one |
||
312 | * character at a time and try to find whatever starts first. |
||
313 | * |
||
314 | * @param string $content The content to replace patterns in |
||
315 | * |
||
316 | * @return string The (manipulated) content |
||
317 | */ |
||
318 | protected function replace($content) |
||
319 | { |
||
842 | daniel-mar | 320 | $contentLength = strlen($content); |
321 | $output = ''; |
||
322 | $processedOffset = 0; |
||
597 | daniel-mar | 323 | $positions = array_fill(0, count($this->patterns), -1); |
324 | $matches = array(); |
||
325 | |||
842 | daniel-mar | 326 | while ($processedOffset < $contentLength) { |
597 | daniel-mar | 327 | // find first match for all patterns |
328 | foreach ($this->patterns as $i => $pattern) { |
||
329 | list($pattern, $replacement) = $pattern; |
||
330 | |||
331 | // we can safely ignore patterns for positions we've unset earlier, |
||
332 | // because we know these won't show up anymore |
||
333 | if (array_key_exists($i, $positions) == false) { |
||
334 | continue; |
||
335 | } |
||
336 | |||
337 | // no need to re-run matches that are still in the part of the |
||
338 | // content that hasn't been processed |
||
842 | daniel-mar | 339 | if ($positions[$i] >= $processedOffset) { |
597 | daniel-mar | 340 | continue; |
341 | } |
||
342 | |||
343 | $match = null; |
||
842 | daniel-mar | 344 | if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE, $processedOffset)) { |
597 | daniel-mar | 345 | $matches[$i] = $match; |
346 | |||
347 | // we'll store the match position as well; that way, we |
||
348 | // don't have to redo all preg_matches after changing only |
||
349 | // the first (we'll still know where those others are) |
||
350 | $positions[$i] = $match[0][1]; |
||
351 | } else { |
||
352 | // if the pattern couldn't be matched, there's no point in |
||
353 | // executing it again in later runs on this same content; |
||
354 | // ignore this one until we reach end of content |
||
355 | unset($matches[$i], $positions[$i]); |
||
356 | } |
||
357 | } |
||
358 | |||
359 | // no more matches to find: everything's been processed, break out |
||
360 | if (!$matches) { |
||
842 | daniel-mar | 361 | // output the remaining content |
362 | $output .= substr($content, $processedOffset); |
||
597 | daniel-mar | 363 | break; |
364 | } |
||
365 | |||
366 | // see which of the patterns actually found the first thing (we'll |
||
367 | // only want to execute that one, since we're unsure if what the |
||
368 | // other found was not inside what the first found) |
||
842 | daniel-mar | 369 | $matchOffset = min($positions); |
370 | $firstPattern = array_search($matchOffset, $positions); |
||
371 | $match = $matches[$firstPattern]; |
||
597 | daniel-mar | 372 | |
373 | // execute the pattern that matches earliest in the content string |
||
842 | daniel-mar | 374 | list(, $replacement) = $this->patterns[$firstPattern]; |
597 | daniel-mar | 375 | |
842 | daniel-mar | 376 | // add the part of the input between $processedOffset and the first match; |
377 | // that content wasn't matched by anything |
||
378 | $output .= substr($content, $processedOffset, $matchOffset - $processedOffset); |
||
379 | // add the replacement for the match |
||
380 | $output .= $this->executeReplacement($replacement, $match); |
||
381 | // advance $processedOffset past the match |
||
382 | $processedOffset = $matchOffset + strlen($match[0][0]); |
||
597 | daniel-mar | 383 | } |
384 | |||
842 | daniel-mar | 385 | return $output; |
597 | daniel-mar | 386 | } |
387 | |||
388 | /** |
||
842 | daniel-mar | 389 | * If $replacement is a callback, execute it, passing in the match data. |
390 | * If it's a string, just pass it through. |
||
597 | daniel-mar | 391 | * |
392 | * @param string|callable $replacement Replacement value |
||
842 | daniel-mar | 393 | * @param array $match Match data, in PREG_OFFSET_CAPTURE form |
597 | daniel-mar | 394 | * |
395 | * @return string |
||
396 | */ |
||
842 | daniel-mar | 397 | protected function executeReplacement($replacement, $match) |
597 | daniel-mar | 398 | { |
842 | daniel-mar | 399 | if (!is_callable($replacement)) { |
400 | return $replacement; |
||
597 | daniel-mar | 401 | } |
842 | daniel-mar | 402 | // convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects |
403 | foreach ($match as &$matchItem) { |
||
404 | $matchItem = $matchItem[0]; |
||
405 | } |
||
1042 | daniel-mar | 406 | |
842 | daniel-mar | 407 | return $replacement($match); |
597 | daniel-mar | 408 | } |
409 | |||
410 | /** |
||
411 | * Strings are a pattern we need to match, in order to ignore potential |
||
412 | * code-like content inside them, but we just want all of the string |
||
413 | * content to remain untouched. |
||
414 | * |
||
415 | * This method will replace all string content with simple STRING# |
||
416 | * placeholder text, so we've rid all strings from characters that may be |
||
417 | * misinterpreted. Original string content will be saved in $this->extracted |
||
418 | * and after doing all other minifying, we can restore the original content |
||
419 | * via restoreStrings(). |
||
420 | * |
||
421 | * @param string[optional] $chars |
||
422 | * @param string[optional] $placeholderPrefix |
||
423 | */ |
||
424 | protected function extractStrings($chars = '\'"', $placeholderPrefix = '') |
||
425 | { |
||
426 | // PHP only supports $this inside anonymous functions since 5.4 |
||
427 | $minifier = $this; |
||
428 | $callback = function ($match) use ($minifier, $placeholderPrefix) { |
||
429 | // check the second index here, because the first always contains a quote |
||
430 | if ($match[2] === '') { |
||
431 | /* |
||
432 | * Empty strings need no placeholder; they can't be confused for |
||
433 | * anything else anyway. |
||
434 | * But we still needed to match them, for the extraction routine |
||
435 | * to skip over this particular string. |
||
436 | */ |
||
437 | return $match[0]; |
||
438 | } |
||
439 | |||
440 | $count = count($minifier->extracted); |
||
1042 | daniel-mar | 441 | $placeholder = $match[1] . $placeholderPrefix . $count . $match[1]; |
442 | $minifier->extracted[$placeholder] = $match[1] . $match[2] . $match[1]; |
||
597 | daniel-mar | 443 | |
444 | return $placeholder; |
||
445 | }; |
||
446 | |||
447 | /* |
||
448 | * The \\ messiness explained: |
||
449 | * * Don't count ' or " as end-of-string if it's escaped (has backslash |
||
450 | * in front of it) |
||
451 | * * Unless... that backslash itself is escaped (another leading slash), |
||
452 | * in which case it's no longer escaping the ' or " |
||
453 | * * So there can be either no backslash, or an even number |
||
454 | * * multiply all of that times 4, to account for the escaping that has |
||
455 | * to be done to pass the backslash into the PHP string without it being |
||
456 | * considered as escape-char (times 2) and to get it in the regex, |
||
457 | * escaped (times 2) |
||
458 | */ |
||
1042 | daniel-mar | 459 | $this->registerPattern('/([' . $chars . '])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback); |
597 | daniel-mar | 460 | } |
461 | |||
462 | /** |
||
463 | * This method will restore all extracted data (strings, regexes) that were |
||
464 | * replaced with placeholder text in extract*(). The original content was |
||
465 | * saved in $this->extracted. |
||
466 | * |
||
467 | * @param string $content |
||
468 | * |
||
469 | * @return string |
||
470 | */ |
||
471 | protected function restoreExtractedData($content) |
||
472 | { |
||
473 | if (!$this->extracted) { |
||
474 | // nothing was extracted, nothing to restore |
||
475 | return $content; |
||
476 | } |
||
477 | |||
478 | $content = strtr($content, $this->extracted); |
||
479 | |||
480 | $this->extracted = array(); |
||
481 | |||
482 | return $content; |
||
483 | } |
||
484 | |||
485 | /** |
||
486 | * Check if the path is a regular file and can be read. |
||
487 | * |
||
488 | * @param string $path |
||
489 | * |
||
490 | * @return bool |
||
491 | */ |
||
492 | protected function canImportFile($path) |
||
493 | { |
||
494 | $parsed = parse_url($path); |
||
495 | if ( |
||
496 | // file is elsewhere |
||
1469 | daniel-mar | 497 | isset($parsed['host']) |
597 | daniel-mar | 498 | // file responds to queries (may change, or need to bypass cache) |
1469 | daniel-mar | 499 | || isset($parsed['query']) |
597 | daniel-mar | 500 | ) { |
501 | return false; |
||
502 | } |
||
503 | |||
1469 | daniel-mar | 504 | try { |
505 | return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path); |
||
506 | } |
||
507 | // catch openbasedir exceptions which are not caught by @ on is_file() |
||
508 | catch (\Exception $e) { |
||
509 | return false; |
||
510 | } |
||
597 | daniel-mar | 511 | } |
512 | |||
513 | /** |
||
514 | * Attempts to open file specified by $path for writing. |
||
515 | * |
||
516 | * @param string $path The path to the file |
||
517 | * |
||
518 | * @return resource Specifier for the target file |
||
519 | * |
||
520 | * @throws IOException |
||
521 | */ |
||
522 | protected function openFileForWriting($path) |
||
523 | { |
||
524 | if ($path === '' || ($handler = @fopen($path, 'w')) === false) { |
||
1042 | daniel-mar | 525 | throw new IOException('The file "' . $path . '" could not be opened for writing. Check if PHP has enough permissions.'); |
597 | daniel-mar | 526 | } |
527 | |||
528 | return $handler; |
||
529 | } |
||
530 | |||
531 | /** |
||
532 | * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions. |
||
533 | * |
||
534 | * @param resource $handler The resource to write to |
||
535 | * @param string $content The content to write |
||
536 | * @param string $path The path to the file (for exception printing only) |
||
537 | * |
||
538 | * @throws IOException |
||
539 | */ |
||
540 | protected function writeToFile($handler, $content, $path = '') |
||
541 | { |
||
542 | if ( |
||
1469 | daniel-mar | 543 | !is_resource($handler) |
544 | || ($result = @fwrite($handler, $content)) === false |
||
545 | || ($result < strlen($content)) |
||
597 | daniel-mar | 546 | ) { |
1042 | daniel-mar | 547 | throw new IOException('The file "' . $path . '" could not be written to. Check your disk space and file permissions.'); |
597 | daniel-mar | 548 | } |
549 | } |
||
925 | daniel-mar | 550 | |
1042 | daniel-mar | 551 | protected static function str_replace_first($search, $replace, $subject) |
552 | { |
||
925 | daniel-mar | 553 | $pos = strpos($subject, $search); |
554 | if ($pos !== false) { |
||
555 | return substr_replace($subject, $replace, $pos, strlen($search)); |
||
556 | } |
||
1042 | daniel-mar | 557 | |
925 | daniel-mar | 558 | return $subject; |
559 | } |
||
597 | daniel-mar | 560 | } |