* Dariusz Rumiński * * This source file is subject to the MIT license that is bundled * with this source code in the file LICENSE. */ namespace PhpCsFixer; /** * This class replaces preg_* functions to better handling UTF8 strings, * ensuring no matter "u" modifier is present or absent subject will be handled correctly. * * @author Kuba Werłos * * @internal */ final class Preg { /** * @param array $matches * @param int-mask $flags * * @param-out ($flags is PREG_OFFSET_CAPTURE * ? array * : ($flags is PREG_UNMATCHED_AS_NULL * ? array * : ($flags is int-mask&768 * ? array * : array * ) * ) * ) $matches * * @throws PregException */ public static function match(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): bool { $result = @preg_match(self::addUtf8Modifier($pattern), $subject, $matches, $flags, $offset); if (false !== $result && PREG_NO_ERROR === preg_last_error()) { return 1 === $result; } $result = @preg_match(self::removeUtf8Modifier($pattern), $subject, $matches, $flags, $offset); if (false !== $result && PREG_NO_ERROR === preg_last_error()) { return 1 === $result; } throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern); } /** * @param array $matches * @param int-mask $flags * * @param-out ($flags is PREG_PATTERN_ORDER * ? array> * : ($flags is PREG_SET_ORDER * ? list> * : ($flags is int-mask&(256|257) * ? array> * : ($flags is int-mask&258 * ? list> * : ($flags is int-mask&(512|513) * ? array> * : ($flags is int-mask&514 * ? list> * : ($flags is int-mask&770 * ? list> * : ($flags is 0 ? array> : array) * ) * ) * ) * ) * ) * ) * ) $matches * * @throws PregException */ public static function matchAll(string $pattern, string $subject, ?array &$matches = null, int $flags = PREG_PATTERN_ORDER, int $offset = 0): int { $result = @preg_match_all(self::addUtf8Modifier($pattern), $subject, $matches, $flags, $offset); if (false !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } $result = @preg_match_all(self::removeUtf8Modifier($pattern), $subject, $matches, $flags, $offset); if (false !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern); } /** * @param array|string $subject * * @param-out int $count * * @throws PregException */ public static function replace(string $pattern, string $replacement, $subject, int $limit = -1, ?int &$count = null): string { $result = @preg_replace(self::addUtf8Modifier($pattern), $replacement, $subject, $limit, $count); if (null !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } $result = @preg_replace(self::removeUtf8Modifier($pattern), $replacement, $subject, $limit, $count); if (null !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern); } /** * @param-out int $count * * @throws PregException */ public static function replaceCallback(string $pattern, callable $callback, string $subject, int $limit = -1, ?int &$count = null): string { $result = @preg_replace_callback(self::addUtf8Modifier($pattern), $callback, $subject, $limit, $count); if (null !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } $result = @preg_replace_callback(self::removeUtf8Modifier($pattern), $callback, $subject, $limit, $count); if (null !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern); } /** * @return list * * @throws PregException */ public static function split(string $pattern, string $subject, int $limit = -1, int $flags = 0): array { $result = @preg_split(self::addUtf8Modifier($pattern), $subject, $limit, $flags); if (false !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } $result = @preg_split(self::removeUtf8Modifier($pattern), $subject, $limit, $flags); if (false !== $result && PREG_NO_ERROR === preg_last_error()) { return $result; } throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern); } private static function addUtf8Modifier(string $pattern): string { return $pattern.'u'; } private static function removeUtf8Modifier(string $pattern): string { if ('' === $pattern) { return ''; } $delimiter = $pattern[0]; $endDelimiterPosition = strrpos($pattern, $delimiter); return substr($pattern, 0, $endDelimiterPosition).str_replace('u', '', substr($pattern, $endDelimiterPosition)); } /** * Create the generic PregException message and tell more about such kind of error in the message. */ private static function newPregException(int $error, string $errorMsg, string $method, string $pattern): PregException { $result = null; $errorMessage = null; try { $result = ExecutorWithoutErrorHandler::execute(static fn () => preg_match($pattern, '')); } catch (ExecutorWithoutErrorHandlerException $e) { $result = false; $errorMessage = $e->getMessage(); } if (false !== $result) { return new PregException(\sprintf('Unknown error occurred when calling %s: %s.', $method, $errorMsg), $error); } $code = preg_last_error(); $message = \sprintf( '(code: %d) %s', $code, preg_replace('~preg_[a-z_]+[()]{2}: ~', '', $errorMessage) ); return new PregException( \sprintf('%s(): Invalid PCRE pattern "%s": %s (version: %s)', $method, $pattern, $message, PCRE_VERSION), $code ); } }