Subversion Repositories oidplus

Rev

Rev 1308 | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. <?php
  2.  
  3. /**
  4.  * Abstract minifier class.
  5.  *
  6.  * Please report bugs on https://github.com/matthiasmullie/minify/issues
  7.  *
  8.  * @author Matthias Mullie <minify@mullie.eu>
  9.  * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  10.  * @license MIT License
  11.  */
  12.  
  13. namespace MatthiasMullie\Minify;
  14.  
  15. use MatthiasMullie\Minify\Exceptions\IOException;
  16. use Psr\Cache\CacheItemInterface;
  17.  
  18. /**
  19.  * Abstract minifier class.
  20.  *
  21.  * Please report bugs on https://github.com/matthiasmullie/minify/issues
  22.  *
  23.  * @author Matthias Mullie <minify@mullie.eu>
  24.  * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  25.  * @license MIT License
  26.  */
  27. abstract class Minify
  28. {
  29.     /**
  30.      * The data to be minified.
  31.      *
  32.      * @var string[]
  33.      */
  34.     protected $data = array();
  35.  
  36.     /**
  37.      * Array of patterns to match.
  38.      *
  39.      * @var string[]
  40.      */
  41.     protected $patterns = array();
  42.  
  43.     /**
  44.      * This array will hold content of strings and regular expressions that have
  45.      * been extracted from the JS source code, so we can reliably match "code",
  46.      * without having to worry about potential "code-like" characters inside.
  47.      *
  48.      * @internal
  49.      *
  50.      * @var string[]
  51.      */
  52.     public $extracted = array();
  53.  
  54.     /**
  55.      * Init the minify class - optionally, code may be passed along already.
  56.      */
  57.     public function __construct(/* $data = null, ... */)
  58.     {
  59.         // it's possible to add the source through the constructor as well ;)
  60.         if (func_num_args()) {
  61.             call_user_func_array(array($this, 'add'), func_get_args());
  62.         }
  63.     }
  64.  
  65.     /**
  66.      * Add a file or straight-up code to be minified.
  67.      *
  68.      * @param string|string[] $data
  69.      *
  70.      * @return static
  71.      */
  72.     public function add($data /* $data = null, ... */)
  73.     {
  74.         // bogus "usage" of parameter $data: scrutinizer warns this variable is
  75.         // not used (we're using func_get_args instead to support overloading),
  76.         // but it still needs to be defined because it makes no sense to have
  77.         // this function without argument :)
  78.         $args = array($data) + func_get_args();
  79.  
  80.         // this method can be overloaded
  81.         foreach ($args as $data) {
  82.             if (is_array($data)) {
  83.                 call_user_func_array(array($this, 'add'), $data);
  84.                 continue;
  85.             }
  86.  
  87.             // redefine var
  88.             $data = (string) $data;
  89.  
  90.             // load data
  91.             $value = $this->load($data);
  92.             $key = ($data != $value) ? $data : count($this->data);
  93.  
  94.             // replace CR linefeeds etc.
  95.             // @see https://github.com/matthiasmullie/minify/pull/139
  96.             $value = str_replace(array("\r\n", "\r"), "\n", $value);
  97.  
  98.             // store data
  99.             $this->data[$key] = $value;
  100.         }
  101.  
  102.         return $this;
  103.     }
  104.  
  105.     /**
  106.      * Add a file to be minified.
  107.      *
  108.      * @param string|string[] $data
  109.      *
  110.      * @return static
  111.      *
  112.      * @throws IOException
  113.      */
  114.     public function addFile($data /* $data = null, ... */)
  115.     {
  116.         // bogus "usage" of parameter $data: scrutinizer warns this variable is
  117.         // not used (we're using func_get_args instead to support overloading),
  118.         // but it still needs to be defined because it makes no sense to have
  119.         // this function without argument :)
  120.         $args = array($data) + func_get_args();
  121.  
  122.         // this method can be overloaded
  123.         foreach ($args as $path) {
  124.             if (is_array($path)) {
  125.                 call_user_func_array(array($this, 'addFile'), $path);
  126.                 continue;
  127.             }
  128.  
  129.             // redefine var
  130.             $path = (string) $path;
  131.  
  132.             // check if we can read the file
  133.             if (!$this->canImportFile($path)) {
  134.                 throw new IOException('The file "' . $path . '" could not be opened for reading. Check if PHP has enough permissions.');
  135.             }
  136.  
  137.             $this->add($path);
  138.         }
  139.  
  140.         return $this;
  141.     }
  142.  
  143.     /**
  144.      * Minify the data & (optionally) saves it to a file.
  145.      *
  146.      * @param string[optional] $path Path to write the data to
  147.      *
  148.      * @return string The minified data
  149.      */
  150.     public function minify($path = null)
  151.     {
  152.         $content = $this->execute($path);
  153.  
  154.         // save to path
  155.         if ($path !== null) {
  156.             $this->save($content, $path);
  157.         }
  158.  
  159.         return $content;
  160.     }
  161.  
  162.     /**
  163.      * Minify & gzip the data & (optionally) saves it to a file.
  164.      *
  165.      * @param string[optional] $path  Path to write the data to
  166.      * @param int[optional]    $level Compression level, from 0 to 9
  167.      *
  168.      * @return string The minified & gzipped data
  169.      */
  170.     public function gzip($path = null, $level = 9)
  171.     {
  172.         $content = $this->execute($path);
  173.         $content = gzencode($content, $level, FORCE_GZIP);
  174.  
  175.         // save to path
  176.         if ($path !== null) {
  177.             $this->save($content, $path);
  178.         }
  179.  
  180.         return $content;
  181.     }
  182.  
  183.     /**
  184.      * Minify the data & write it to a CacheItemInterface object.
  185.      *
  186.      * @param CacheItemInterface $item Cache item to write the data to
  187.      *
  188.      * @return CacheItemInterface Cache item with the minifier data
  189.      */
  190.     public function cache(CacheItemInterface $item)
  191.     {
  192.         $content = $this->execute();
  193.         $item->set($content);
  194.  
  195.         return $item;
  196.     }
  197.  
  198.     /**
  199.      * Minify the data.
  200.      *
  201.      * @param string[optional] $path Path to write the data to
  202.      *
  203.      * @return string The minified data
  204.      */
  205.     abstract public function execute($path = null);
  206.  
  207.     /**
  208.      * Load data.
  209.      *
  210.      * @param string $data Either a path to a file or the content itself
  211.      *
  212.      * @return string
  213.      */
  214.     protected function load($data)
  215.     {
  216.         // check if the data is a file
  217.         if ($this->canImportFile($data)) {
  218.             $data = file_get_contents($data);
  219.  
  220.             // strip BOM, if any
  221.             if (substr($data, 0, 3) == "\xef\xbb\xbf") {
  222.                 $data = substr($data, 3);
  223.             }
  224.         }
  225.  
  226.         return $data;
  227.     }
  228.  
  229.     /**
  230.      * Save to file.
  231.      *
  232.      * @param string $content The minified data
  233.      * @param string $path    The path to save the minified data to
  234.      *
  235.      * @throws IOException
  236.      */
  237.     protected function save($content, $path)
  238.     {
  239.         $handler = $this->openFileForWriting($path);
  240.  
  241.         $this->writeToFile($handler, $content);
  242.  
  243.         @fclose($handler);
  244.     }
  245.  
  246.     /**
  247.      * Register a pattern to execute against the source content.
  248.      *
  249.      * If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work.
  250.      * If you need that functionality, use a callback instead.
  251.      *
  252.      * @param string          $pattern     PCRE pattern
  253.      * @param string|callable $replacement Replacement value for matched pattern
  254.      */
  255.     protected function registerPattern($pattern, $replacement = '')
  256.     {
  257.         // study the pattern, we'll execute it more than once
  258.         $pattern .= 'S';
  259.  
  260.         $this->patterns[] = array($pattern, $replacement);
  261.     }
  262.  
  263.     /**
  264.      * Both JS and CSS use the same form of multi-line comment, so putting the common code here.
  265.      */
  266.     protected function stripMultilineComments()
  267.     {
  268.         // First extract comments we want to keep, so they can be restored later
  269.         // PHP only supports $this inside anonymous functions since 5.4
  270.         $minifier = $this;
  271.         $callback = function ($match) use ($minifier) {
  272.             $count = count($minifier->extracted);
  273.             $placeholder = '/*' . $count . '*/';
  274.             $minifier->extracted[$placeholder] = $match[0];
  275.  
  276.             return $placeholder;
  277.         };
  278.         $this->registerPattern('/
  279.            # optional newline
  280.            \n?
  281.  
  282.            # start comment
  283.            \/\*
  284.  
  285.            # comment content
  286.            (?:
  287.                # either starts with an !
  288.                !
  289.            |
  290.                # or, after some number of characters which do not end the comment
  291.                (?:(?!\*\/).)*?
  292.  
  293.                # there is either a @license or @preserve tag
  294.                @(?:license|preserve)
  295.            )
  296.  
  297.            # then match to the end of the comment
  298.            .*?\*\/\n?
  299.  
  300.            /ixs', $callback);
  301.  
  302.         // Then strip all other comments
  303.         $this->registerPattern('/\/\*.*?\*\//s', '');
  304.     }
  305.  
  306.     /**
  307.      * We can't "just" run some regular expressions against JavaScript: it's a
  308.      * complex language. E.g. having an occurrence of // xyz would be a comment,
  309.      * unless it's used within a string. Of you could have something that looks
  310.      * like a 'string', but inside a comment.
  311.      * The only way to accurately replace these pieces is to traverse the JS one
  312.      * character at a time and try to find whatever starts first.
  313.      *
  314.      * @param string $content The content to replace patterns in
  315.      *
  316.      * @return string The (manipulated) content
  317.      */
  318.     protected function replace($content)
  319.     {
  320.         $contentLength = strlen($content);
  321.         $output = '';
  322.         $processedOffset = 0;
  323.         $positions = array_fill(0, count($this->patterns), -1);
  324.         $matches = array();
  325.  
  326.         while ($processedOffset < $contentLength) {
  327.             // find first match for all patterns
  328.             foreach ($this->patterns as $i => $pattern) {
  329.                 list($pattern, $replacement) = $pattern;
  330.  
  331.                 // we can safely ignore patterns for positions we've unset earlier,
  332.                 // because we know these won't show up anymore
  333.                 if (array_key_exists($i, $positions) == false) {
  334.                     continue;
  335.                 }
  336.  
  337.                 // no need to re-run matches that are still in the part of the
  338.                 // content that hasn't been processed
  339.                 if ($positions[$i] >= $processedOffset) {
  340.                     continue;
  341.                 }
  342.  
  343.                 $match = null;
  344.                 if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE, $processedOffset)) {
  345.                     $matches[$i] = $match;
  346.  
  347.                     // we'll store the match position as well; that way, we
  348.                     // don't have to redo all preg_matches after changing only
  349.                     // the first (we'll still know where those others are)
  350.                     $positions[$i] = $match[0][1];
  351.                 } else {
  352.                     // if the pattern couldn't be matched, there's no point in
  353.                     // executing it again in later runs on this same content;
  354.                     // ignore this one until we reach end of content
  355.                     unset($matches[$i], $positions[$i]);
  356.                 }
  357.             }
  358.  
  359.             // no more matches to find: everything's been processed, break out
  360.             if (!$matches) {
  361.                 // output the remaining content
  362.                 $output .= substr($content, $processedOffset);
  363.                 break;
  364.             }
  365.  
  366.             // see which of the patterns actually found the first thing (we'll
  367.             // only want to execute that one, since we're unsure if what the
  368.             // other found was not inside what the first found)
  369.             $matchOffset = min($positions);
  370.             $firstPattern = array_search($matchOffset, $positions);
  371.             $match = $matches[$firstPattern];
  372.  
  373.             // execute the pattern that matches earliest in the content string
  374.             list(, $replacement) = $this->patterns[$firstPattern];
  375.  
  376.             // add the part of the input between $processedOffset and the first match;
  377.             // that content wasn't matched by anything
  378.             $output .= substr($content, $processedOffset, $matchOffset - $processedOffset);
  379.             // add the replacement for the match
  380.             $output .= $this->executeReplacement($replacement, $match);
  381.             // advance $processedOffset past the match
  382.             $processedOffset = $matchOffset + strlen($match[0][0]);
  383.         }
  384.  
  385.         return $output;
  386.     }
  387.  
  388.     /**
  389.      * If $replacement is a callback, execute it, passing in the match data.
  390.      * If it's a string, just pass it through.
  391.      *
  392.      * @param string|callable $replacement Replacement value
  393.      * @param array           $match       Match data, in PREG_OFFSET_CAPTURE form
  394.      *
  395.      * @return string
  396.      */
  397.     protected function executeReplacement($replacement, $match)
  398.     {
  399.         if (!is_callable($replacement)) {
  400.             return $replacement;
  401.         }
  402.         // convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects
  403.         foreach ($match as &$matchItem) {
  404.             $matchItem = $matchItem[0];
  405.         }
  406.  
  407.         return $replacement($match);
  408.     }
  409.  
  410.     /**
  411.      * Strings are a pattern we need to match, in order to ignore potential
  412.      * code-like content inside them, but we just want all of the string
  413.      * content to remain untouched.
  414.      *
  415.      * This method will replace all string content with simple STRING#
  416.      * placeholder text, so we've rid all strings from characters that may be
  417.      * misinterpreted. Original string content will be saved in $this->extracted
  418.      * and after doing all other minifying, we can restore the original content
  419.      * via restoreStrings().
  420.      *
  421.      * @param string[optional] $chars
  422.      * @param string[optional] $placeholderPrefix
  423.      */
  424.     protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
  425.     {
  426.         // PHP only supports $this inside anonymous functions since 5.4
  427.         $minifier = $this;
  428.         $callback = function ($match) use ($minifier, $placeholderPrefix) {
  429.             // check the second index here, because the first always contains a quote
  430.             if ($match[2] === '') {
  431.                 /*
  432.                  * Empty strings need no placeholder; they can't be confused for
  433.                  * anything else anyway.
  434.                  * But we still needed to match them, for the extraction routine
  435.                  * to skip over this particular string.
  436.                  */
  437.                 return $match[0];
  438.             }
  439.  
  440.             $count = count($minifier->extracted);
  441.             $placeholder = $match[1] . $placeholderPrefix . $count . $match[1];
  442.             $minifier->extracted[$placeholder] = $match[1] . $match[2] . $match[1];
  443.  
  444.             return $placeholder;
  445.         };
  446.  
  447.         /*
  448.          * The \\ messiness explained:
  449.          * * Don't count ' or " as end-of-string if it's escaped (has backslash
  450.          * in front of it)
  451.          * * Unless... that backslash itself is escaped (another leading slash),
  452.          * in which case it's no longer escaping the ' or "
  453.          * * So there can be either no backslash, or an even number
  454.          * * multiply all of that times 4, to account for the escaping that has
  455.          * to be done to pass the backslash into the PHP string without it being
  456.          * considered as escape-char (times 2) and to get it in the regex,
  457.          * escaped (times 2)
  458.          */
  459.         $this->registerPattern('/([' . $chars . '])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
  460.     }
  461.  
  462.     /**
  463.      * This method will restore all extracted data (strings, regexes) that were
  464.      * replaced with placeholder text in extract*(). The original content was
  465.      * saved in $this->extracted.
  466.      *
  467.      * @param string $content
  468.      *
  469.      * @return string
  470.      */
  471.     protected function restoreExtractedData($content)
  472.     {
  473.         if (!$this->extracted) {
  474.             // nothing was extracted, nothing to restore
  475.             return $content;
  476.         }
  477.  
  478.         $content = strtr($content, $this->extracted);
  479.  
  480.         $this->extracted = array();
  481.  
  482.         return $content;
  483.     }
  484.  
  485.     /**
  486.      * Check if the path is a regular file and can be read.
  487.      *
  488.      * @param string $path
  489.      *
  490.      * @return bool
  491.      */
  492.     protected function canImportFile($path)
  493.     {
  494.         $parsed = parse_url($path);
  495.         if (
  496.             // file is elsewhere
  497.             isset($parsed['host'])
  498.             // file responds to queries (may change, or need to bypass cache)
  499.             || isset($parsed['query'])
  500.         ) {
  501.             return false;
  502.         }
  503.  
  504.         try {
  505.             return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path);
  506.         }
  507.         // catch openbasedir exceptions which are not caught by @ on is_file()
  508.         catch (\Exception $e) {
  509.             return false;
  510.         }
  511.     }
  512.  
  513.     /**
  514.      * Attempts to open file specified by $path for writing.
  515.      *
  516.      * @param string $path The path to the file
  517.      *
  518.      * @return resource Specifier for the target file
  519.      *
  520.      * @throws IOException
  521.      */
  522.     protected function openFileForWriting($path)
  523.     {
  524.         if ($path === '' || ($handler = @fopen($path, 'w')) === false) {
  525.             throw new IOException('The file "' . $path . '" could not be opened for writing. Check if PHP has enough permissions.');
  526.         }
  527.  
  528.         return $handler;
  529.     }
  530.  
  531.     /**
  532.      * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions.
  533.      *
  534.      * @param resource $handler The resource to write to
  535.      * @param string   $content The content to write
  536.      * @param string   $path    The path to the file (for exception printing only)
  537.      *
  538.      * @throws IOException
  539.      */
  540.     protected function writeToFile($handler, $content, $path = '')
  541.     {
  542.         if (
  543.             !is_resource($handler)
  544.             || ($result = @fwrite($handler, $content)) === false
  545.             || ($result < strlen($content))
  546.         ) {
  547.             throw new IOException('The file "' . $path . '" could not be written to. Check your disk space and file permissions.');
  548.         }
  549.     }
  550.  
  551.     protected static function str_replace_first($search, $replace, $subject)
  552.     {
  553.         $pos = strpos($subject, $search);
  554.         if ($pos !== false) {
  555.             return substr_replace($subject, $replace, $pos, strlen($search));
  556.         }
  557.  
  558.         return $subject;
  559.     }
  560. }
  561.