Files
archived-json-path/JsonCrawler.php
2026-03-11 12:00:19 +01:00

1174 lines
41 KiB
PHP

<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\JsonPath;
use Psr\Container\ContainerInterface;
use Symfony\Component\JsonPath\Exception\InvalidArgumentException;
use Symfony\Component\JsonPath\Exception\InvalidJsonPathException;
use Symfony\Component\JsonPath\Exception\InvalidJsonStringInputException;
use Symfony\Component\JsonPath\Exception\JsonCrawlerException;
use Symfony\Component\JsonPath\Tokenizer\JsonPathToken;
use Symfony\Component\JsonPath\Tokenizer\JsonPathTokenizer;
use Symfony\Component\JsonPath\Tokenizer\TokenType;
use Symfony\Component\JsonStreamer\Exception\UnexpectedValueException;
use Symfony\Component\JsonStreamer\Read\Splitter;
use Symfony\Contracts\Service\ServiceProviderInterface;
/**
* Crawls a JSON document using a JSON Path as described in the RFC 9535.
*
* @see https://datatracker.ietf.org/doc/html/rfc9535
*
* @author Alexandre Daubois <alex.daubois@gmail.com>
*/
final class JsonCrawler implements JsonCrawlerInterface
{
private const COMPARISON_OPERATORS = [
'!=' => 2,
'==' => 2,
'>=' => 2,
'<=' => 2,
'>' => 1,
'<' => 1,
];
/**
* @param resource|string $raw
* @param ContainerInterface|ServiceProviderInterface<callable(mixed ...$arguments): mixed>|null $functionsProvider
* @param array<string, array{arity?: int|null, return_type?: FunctionReturnType|null}> $functionsMetadata
*/
public function __construct(
private readonly mixed $raw,
private readonly ?ContainerInterface $functionsProvider = null,
private readonly array $functionsMetadata = [],
) {
if (!\is_string($raw) && !\is_resource($raw)) {
throw new InvalidArgumentException(\sprintf('Expected string or resource, got "%s".', get_debug_type($raw)));
}
}
public function find(string|JsonPath $query): array
{
return $this->evaluate(\is_string($query) ? new JsonPath($query) : $query);
}
private function evaluate(JsonPath $query): array
{
try {
if ($this->isComplexBracketExpression($query)) {
preg_match('/^\$\[([^\[\]]+)]$/', $query, $matches);
if (\is_resource($json = $this->raw)) {
if (0 !== ftell($this->raw)) {
rewind($this->raw);
}
if (false === $json = stream_get_contents($this->raw)) {
throw new \RuntimeException('Failed to read from resource stream.');
}
}
try {
$data = json_decode($json, false, 512, \JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
throw new InvalidJsonStringInputException($e->getMessage(), $e);
}
return $this->normalizeStorage($this->evaluateBracket($matches[1], $data));
}
$tokens = JsonPathTokenizer::tokenize($query);
if (\is_resource($json = $this->raw)) {
if (!class_exists(Splitter::class)) {
throw new \LogicException('The JsonStreamer package is required to evaluate a path against a resource. Try running "composer require symfony/json-streamer".');
}
try {
$simplified = JsonPathUtils::findSmallestDeserializableStringAndPath($tokens, $this->raw);
$tokens = $simplified['tokens'];
$json = $simplified['json'];
if (!$json) {
throw new \LogicException(); // fallback to reading the entire stream
}
} catch (\LogicException|UnexpectedValueException) {
if (0 !== ftell($this->raw)) {
rewind($this->raw);
}
if (false === $json = stream_get_contents($this->raw)) {
throw new \RuntimeException('Failed to read from resource stream.');
}
$tokens = JsonPathTokenizer::tokenize($query);
}
}
try {
$data = json_decode($json, false, 512, \JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
throw new InvalidJsonStringInputException($e->getMessage(), $e);
}
return $this->normalizeStorage($this->evaluateTokensOnDecodedData($tokens, $data));
} catch (InvalidArgumentException $e) {
throw $e;
} catch (InvalidJsonPathException $e) {
throw new JsonCrawlerException($query, $e->getMessage(), previous: $e);
}
}
private function isComplexBracketExpression(JsonPath $query): bool
{
if (!preg_match('/^\$\[([^\[\]]+)]$/', (string) $query, $matches)) {
return false;
}
$bracketContent = $matches[1];
if (!str_contains($bracketContent, ',') || !str_contains($bracketContent, '?')) {
return false;
}
return $this->isValidMixedBracketExpression($bracketContent);
}
private function evaluateTokensOnDecodedData(array $tokens, mixed $data): array
{
$current = [$data];
$tokenCount = \count($tokens);
for ($i = 0; $i < $tokenCount; ++$i) {
$token = $tokens[$i];
$next = [];
// recursive token followed by bracket with property selectors
if (TokenType::Recursive === $token->type
&& isset($tokens[$i + 1])
&& TokenType::Bracket === $tokens[$i + 1]->type
&& (str_contains($tokens[$i + 1]->value, '"') || str_contains($tokens[$i + 1]->value, "'"))
) {
$bracketToken = $tokens[$i + 1];
foreach ($current as $value) {
$recursiveResults = $this->evaluateToken($token, $value);
foreach ($recursiveResults as $recursiveValue) {
if (\is_array($recursiveValue) && !array_is_list($recursiveValue) || $recursiveValue instanceof \stdClass) {
$bracketResults = $this->evaluateToken($bracketToken, $recursiveValue);
$next = array_merge($next, $bracketResults);
}
}
}
++$i;
} else {
foreach ($current as $value) {
$next = array_merge($next, $this->evaluateToken($token, $value));
}
}
$current = $next;
}
return $current;
}
private function evaluateToken(JsonPathToken $token, mixed $value): array
{
return match ($token->type) {
TokenType::Name => $this->evaluateName($token->value, $value),
TokenType::Bracket => $this->evaluateBracket($token->value, $value),
TokenType::Recursive => $this->evaluateRecursive($value),
};
}
private function evaluateName(string $name, mixed $value): array
{
if (!$this->isArrayOrObject($value)) {
return [];
}
if ('*' === $name) {
return array_values((array) $value);
}
return $this->getValueIfKeyExists($value, $name);
}
private function evaluateBracket(string $expr, mixed $value): array
{
if (!$this->isArrayOrObject($value)) {
return [];
}
if (str_contains($expr, ',') && (str_starts_with($trimmed = trim($expr), ',') || str_ends_with($trimmed, ','))) {
throw new JsonCrawlerException($expr, 'Expression cannot have leading or trailing commas');
}
if ('*' === $expr = JsonPathUtils::normalizeWhitespace($expr)) {
return array_values((array) $value);
}
// single negative index
if (preg_match('/^-\d+$/', $expr)) {
if (JsonPathUtils::hasLeadingZero($expr) || JsonPathUtils::isIntegerOverflow($expr) || '-0' === $expr) {
throw new JsonCrawlerException($expr, 'invalid index selector');
}
// numeric indices only work on lists
if (!\is_array($value)) {
return [];
}
$index = \count($value) + (int) $expr;
return isset($value[$index]) ? [$value[$index]] : [];
}
// single positive index
if (preg_match('/^\d+$/', $expr)) {
if (JsonPathUtils::hasLeadingZero($expr) || JsonPathUtils::isIntegerOverflow($expr)) {
throw new JsonCrawlerException($expr, 'invalid index selector');
}
// numeric indices only work on lists
if (!\is_array($value)) {
return [];
}
$index = (int) $expr;
return isset($value[$index]) ? [$value[$index]] : [];
}
// start and end index
if (preg_match('/^-?\d+(?:\s*,\s*-?\d+)*$/', $expr)) {
foreach (explode(',', $expr) as $exprPart) {
if (JsonPathUtils::hasLeadingZero($exprPart = trim($exprPart)) || JsonPathUtils::isIntegerOverflow($exprPart) || '-0' === $exprPart) {
throw new JsonCrawlerException($expr, 'invalid index selector');
}
}
// numeric indices only work on lists
if (!\is_array($value)) {
return [];
}
$result = [];
foreach (explode(',', $expr) as $index) {
$index = (int) trim($index);
if ($index < 0) {
$index = \count($value) + $index;
}
if (isset($value[$index])) {
$result[] = $value[$index];
}
}
return $result;
}
if (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d*+))?$/', $expr, $matches)) {
if (!\is_array($value) || !array_is_list($value)) {
return [];
}
$startStr = trim($matches[1]);
$endStr = trim($matches[2]);
$stepStr = trim($matches[3] ?? '1');
if (
JsonPathUtils::hasLeadingZero($startStr)
|| JsonPathUtils::hasLeadingZero($endStr)
|| JsonPathUtils::hasLeadingZero($stepStr)
) {
throw new JsonCrawlerException($expr, 'slice selector numbers cannot have leading zeros');
}
if ('-0' === $startStr || '-0' === $endStr || '-0' === $stepStr) {
throw new JsonCrawlerException($expr, 'slice selector cannot contain negative zero');
}
if (
JsonPathUtils::isIntegerOverflow($startStr)
|| JsonPathUtils::isIntegerOverflow($endStr)
|| JsonPathUtils::isIntegerOverflow($stepStr)
) {
throw new JsonCrawlerException($expr, 'slice selector integer overflow');
}
$length = \count($value);
$start = '' !== $startStr ? (int) $startStr : null;
$end = '' !== $endStr ? (int) $endStr : null;
$step = '' !== $stepStr ? (int) $stepStr : 1;
if (0 === $step) {
return [];
}
if (null === $start) {
$start = $step > 0 ? 0 : $length - 1;
} else {
if ($start < 0) {
$start = $length + $start;
}
if (0 < $step && $start >= $length) {
return [];
}
$start = max(0, min($start, $length - 1));
}
if (null === $end) {
$end = $step > 0 ? $length : -1;
} else {
if ($end < 0) {
$end = $length + $end;
}
if ($step > 0) {
$end = max(0, min($end, $length));
} else {
$end = max(-1, min($end, $length - 1));
}
}
$result = [];
for ($i = $start; $step > 0 ? $i < $end : $i > $end; $i += $step) {
if (isset($value[$i])) {
$result[] = $value[$i];
}
}
return $result;
}
// comma-separated expressions with at least one filter (e.g. "?@.a,?@.b", "?@.a,1", "1,?@.a=='b',1:")
if (str_contains($expr, ',') && str_contains($expr, '?') && $this->isValidMixedBracketExpression($expr)) {
$parts = JsonPathUtils::parseCommaSeparatedValues($expr);
$result = [];
foreach ($parts as $part) {
$part = trim($part);
if (preg_match('/^\?(.*)$/', $part, $matches)) {
$result = array_merge($result, $this->evaluateFilter(trim($matches[1]), $value));
continue;
}
$selectorResult = $this->evaluateBracket($part, $value);
$result = array_merge($result, $selectorResult);
}
return $result;
}
// filter expressions
if (preg_match('/^\?(.*)$/', $expr, $matches)) {
$filterExpr = trim($matches[1]);
// is it a function call?
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr)) {
$filterExpr = "($filterExpr)";
}
$needsParentheses = true;
if (null !== $unwrapped = self::unwrapParentheses($filterExpr)) {
$needsParentheses = false;
$filterExpr = $unwrapped;
}
if ($needsParentheses && !str_starts_with($filterExpr, '(')) {
$filterExpr = "($filterExpr)";
}
$this->validateFilterExpression($filterExpr);
return $this->evaluateFilter($filterExpr, $value);
}
// comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']`
if (str_contains($expr, ',')) {
$parts = JsonPathUtils::parseCommaSeparatedValues($expr);
$result = [];
$allStringKeys = true;
foreach ($parts as $part) {
$part = trim($part);
if (!preg_match('/^([\'"])(.*)\1$/', $part)) {
$allStringKeys = false;
break;
}
}
if ($allStringKeys) {
if (!\is_array($value) || !array_is_list($value)) {
foreach ($parts as $part) {
$part = trim($part);
if (!preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
continue;
}
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
$result = array_merge($result, $this->getValueIfKeyExists($value, $key));
}
return $result;
}
foreach ($value as $item) {
if (!\is_array($item)) {
continue;
}
foreach ($parts as $part) {
$part = trim($part);
if (!preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
continue;
}
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
$result = array_merge($result, $this->getValueIfKeyExists($item, $key));
}
}
return $result;
}
foreach ($parts as $part) {
$part = trim($part);
if ('*' === $part) {
$result = array_merge($result, array_values((array) $value));
} elseif (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $part, $matches)) {
// slice notation
$sliceResult = $this->evaluateBracket($part, $value);
$result = array_merge($result, $sliceResult);
} elseif (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
if (\is_array($value) && array_is_list($value)) {
// for arrays, find ALL objects that contain this key
foreach ($value as $item) {
if ($this->getValueIfKeyExists($item, $key)) {
$result[] = $item;
}
}
} else {
$result = array_merge($result, $this->getValueIfKeyExists($value, $key));
}
} elseif (preg_match('/^-?\d+$/', $part)) {
// numeric index
$index = (int) $part;
if ($index < 0) {
$index = \count($value) + $index;
}
if (\is_array($value) && array_is_list($value) && \array_key_exists($index, $value)) {
$result[] = $value[$index];
}
}
}
return $result;
}
if (preg_match('/^([\'"])(.*)\1$/', $expr, $matches)) {
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
if (\is_array($value)) {
return [];
}
return $this->getValueIfKeyExists($value, $key);
}
throw new InvalidJsonPathException(\sprintf('Unsupported bracket expression "%s".', $expr));
}
private function evaluateFilter(string $expr, mixed $value): array
{
if (!$this->isArrayOrObject($value)) {
return [];
}
$result = [];
foreach ($value as $item) {
if ($this->evaluateFilterExpression($expr, $item)) {
$result[] = $item;
}
}
return $result;
}
private function evaluateFilterExpression(string $expr, mixed $context): bool
{
$expr = JsonPathUtils::normalizeWhitespace($expr);
if (null !== $unwrapped = self::unwrapParentheses($expr)) {
$expr = $unwrapped;
}
if (str_starts_with($expr, '!')) {
return !$this->evaluateFilterExpression(trim(substr($expr, 1)), $context);
}
if ($logicalOp = $this->findRightmostLogicalOperator($expr)) {
$left = trim(substr($expr, 0, $logicalOp['position']));
$right = trim(substr($expr, $logicalOp['position'] + \strlen($logicalOp['operator'])));
if ('||' === $logicalOp['operator']) {
return $this->evaluateFilterExpression($left, $context) || $this->evaluateFilterExpression($right, $context);
}
return $this->evaluateFilterExpression($left, $context) && $this->evaluateFilterExpression($right, $context);
}
foreach (self::COMPARISON_OPERATORS as $op => $len) {
if (str_contains($expr, $op)) {
if (false === $opPos = $this->findOperatorPosition($expr, $op)) {
continue;
}
$leftValue = $this->evaluateScalar(trim(substr($expr, 0, $opPos)), $context);
$rightValue = $this->evaluateScalar(trim(substr($expr, $opPos + $len)), $context);
return $this->compare($leftValue, $rightValue, $op);
}
}
if ('@' === $expr || '$' === $expr) {
return true;
}
if (str_starts_with($expr, '$')) {
try {
return (bool) $this->evaluate(new JsonPath($expr));
} catch (JsonCrawlerException) {
return false;
}
}
if (str_starts_with($expr, '@.')) {
return $this->isArrayOrObject($context) && $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.substr($expr, 1))), $context);
}
if (str_starts_with($expr, '@[') && str_ends_with($expr, ']')) {
return $this->isArrayOrObject($context) && $this->evaluateBracket(substr($expr, 2, -1), $context);
}
// function calls
if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) {
$functionName = trim($matches[1]);
if (!isset(JsonPathTokenizer::RFC9535_FUNCTION_ARITY[$functionName]) && !$this->functionsProvider?->has($functionName)) {
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
}
$this->validateFunctionTestReturnType($expr);
$functionResult = $this->evaluateFunction($functionName, $matches[2], $context);
return is_numeric($functionResult) ? $functionResult > 0 : (bool) $functionResult;
}
return false;
}
private function findRightmostLogicalOperator(string $expr): ?array
{
$rightmostPos = -1;
$rightmostOp = null;
$depth = 0;
$exprLen = \strlen($expr);
for ($i = 0; $i < $exprLen; ++$i) {
$char = $expr[$i];
if ('(' === $char) {
++$depth;
} elseif (')' === $char) {
--$depth;
} elseif (0 === $depth && '||' === substr($expr, $i, 2)) {
$rightmostPos = $i;
$rightmostOp = '||';
++$i;
}
}
if (!$rightmostOp) {
$depth = 0;
for ($i = 0; $i < $exprLen; ++$i) {
$char = $expr[$i];
if ('(' === $char) {
++$depth;
} elseif (')' === $char) {
--$depth;
} elseif (0 === $depth && '&&' === substr($expr, $i, 2)) {
$rightmostPos = $i;
$rightmostOp = '&&';
++$i;
}
}
}
return $rightmostOp ? ['operator' => $rightmostOp, 'position' => $rightmostPos] : null;
}
private function evaluateScalar(string $expr, mixed $context): mixed
{
$expr = JsonPathUtils::normalizeWhitespace($expr);
if (JsonPathUtils::isJsonNumber($expr)) {
return str_contains($expr, '.') || str_contains(strtolower($expr), 'e') ? (float) $expr : (int) $expr;
}
// only validate tokens that look like standalone numbers
if (preg_match('/^[\d+\-.eE]+$/', $expr) && preg_match('/\d/', $expr)) {
throw new JsonCrawlerException($expr, \sprintf('Invalid number format "%s"', $expr));
}
if ('@' === $expr) {
return $context;
}
if ('true' === $expr) {
return true;
}
if ('false' === $expr) {
return false;
}
if ('null' === $expr) {
return null;
}
// string literals
if (preg_match('/^([\'"])(.*)\1$/', $expr, $matches)) {
return JsonPathUtils::unescapeString($matches[2], $matches[1]);
}
// absolute path references
if (str_starts_with($expr, '$')) {
if ($this->isNonSingularQuery($expr)) {
throw new JsonCrawlerException($expr, 'non-singular query is not comparable');
}
return $this->evaluate(new JsonPath($expr))[0] ?? null;
}
// current node references
if (str_starts_with($expr, '@')) {
if (!$this->isArrayOrObject($context)) {
return null;
}
$path = substr($expr, 1);
if (str_starts_with($path, '[') && str_ends_with($path, ']')) {
$bracketContent = substr($path, 1, -1);
$result = $this->evaluateBracket($bracketContent, $context);
return $result ? $result[0] : Nothing::Nothing;
}
$results = $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.$path)), $context);
return $results ? $results[0] : Nothing::Nothing;
}
// function calls
if (preg_match('/^(\w++)\((.*)\)$/', $expr, $matches)) {
$functionName = trim($matches[1]);
if (!isset(JsonPathTokenizer::RFC9535_FUNCTION_ARITY[$functionName]) && !$this->functionsProvider?->has($functionName)) {
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
}
return $this->evaluateFunction($functionName, $matches[2], $context);
}
return null;
}
private function evaluateFunction(string $name, string $args, mixed $context): mixed
{
$argStrings = ($args = trim($args)) ? JsonPathUtils::parseCommaSeparatedValues($args) : [];
$expectedArgCount = JsonPathTokenizer::RFC9535_FUNCTION_ARITY[$name] ?? $this->functionsMetadata[$name]['arity'] ?? null;
if (null !== $expectedArgCount && \count($argStrings) !== $expectedArgCount) {
throw new JsonCrawlerException($args, \sprintf('the JsonPath function "%s" requires exactly %d argument(s).', $name, $expectedArgCount));
}
// Parse and evaluate arguments
$argList = [];
$nodelistSizes = [];
foreach ($argStrings as $arg) {
$arg = trim($arg);
if (str_starts_with($arg, '$')) { // special handling for absolute paths
$results = $this->evaluate(new JsonPath($arg));
$argList[] = $results[0] ?? null;
$nodelistSizes[] = \count($results);
} elseif (!str_starts_with($arg, '@')) { // special handling for @ to track nodelist size
$argList[] = $this->evaluateScalar($arg, $context);
$nodelistSizes[] = 1;
} elseif ('@' === $arg) {
$argList[] = $context;
$nodelistSizes[] = 1;
} elseif (!$this->isArrayOrObject($context)) {
$argList[] = null;
$nodelistSizes[] = 0;
} elseif (str_starts_with($pathPart = substr($arg, 1), '[')) {
// handle bracket expressions like @['a','d']
$results = $this->evaluateBracket(substr($pathPart, 1, -1), $context);
$argList[] = $results;
$nodelistSizes[] = \count($results);
} else {
// handle dot notation like @.a
$results = $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.$pathPart)), $context);
$argList[] = $results[0] ?? null;
$nodelistSizes[] = \count($results);
}
}
$value = $argList[0] ?? null;
$nodelistSize = $nodelistSizes[0] ?? 0;
if ($nodelistSize > 1 && \in_array($name, JsonPathTokenizer::SINGULAR_ARGUMENT_FUNCTIONS, true)) {
throw new JsonCrawlerException($args, \sprintf('non-singular query is not allowed as argument to "%s" function', $name));
}
if ($this->functionsProvider?->has($name)) {
try {
return $this->functionsProvider->get($name)(...$argList);
} catch (\Exception $e) {
throw new InvalidJsonPathException(\sprintf('An error occurred while executing the custom JsonPath function "%s": ', $name).$e->getMessage(), null, $e);
}
}
return match ($name) {
'length' => match (true) {
\is_string($value) => mb_strlen($value),
\is_array($value) => \count($value),
$value instanceof \stdClass => \count(get_object_vars($value)),
default => Nothing::Nothing,
},
'count' => $nodelistSize,
'match' => match (true) {
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/u', $this->transformJsonPathRegex($argList[1])), $value),
default => false,
},
'search' => match (true) {
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match("/{$this->transformJsonPathRegex($argList[1])}/u", $value),
default => false,
},
'value' => 1 < $nodelistSize ? Nothing::Nothing : (1 === $nodelistSize ? (\is_array($value) ? ($value[0] ?? null) : $value) : $value),
default => null,
};
}
private function evaluateRecursive(mixed $value): array
{
if (!$this->isArrayOrObject($value)) {
return [];
}
$result = [];
$result[] = $value;
foreach ($value as $item) {
if ($this->isArrayOrObject($item)) {
$childResults = $this->evaluateRecursive($item);
$result = array_merge($result, $childResults);
}
}
return $result;
}
private function compare(mixed $left, mixed $right, string $operator): bool
{
return match ($operator) {
'==' => $this->compareEquality($left, $right),
'!=' => !$this->compareEquality($left, $right),
'>', '>=', '<', '<=' => $this->compareOrdering($left, $right, $operator),
default => false,
};
}
private function compareEquality(mixed $left, mixed $right): bool
{
$leftIsNothing = Nothing::Nothing === $left;
$rightIsNothing = Nothing::Nothing === $right;
if (
$leftIsNothing && $rightIsNothing
|| ($leftIsNothing && 0 === $right || 0 === $left && $rightIsNothing)
) {
return true;
}
if ($leftIsNothing || $rightIsNothing) {
return false;
}
if ((\is_int($left) || \is_float($left)) && (\is_int($right) || \is_float($right))) {
return $left == $right;
}
if (\is_string($left) && \is_string($right) || \is_bool($left) && \is_bool($right)) {
return $left === $right;
}
if (null === $left && null === $right) {
return true;
}
// arrays must have equal length and equal corresponding elements
if (\is_array($left) && \is_array($right)) {
return $this->compareArraysDeep($left, $right);
}
// objects must have identical names and equal corresponding values
if ($left instanceof \stdClass && $right instanceof \stdClass) {
return $this->compareObjectsDeep($left, $right);
}
// null (missing property) equals 0 when compared to function results
if (null === $left && 0 === $right || 0 === $left && null === $right) {
return true;
}
// different types are not equal
return false;
}
private function compareArraysDeep(array $left, array $right): bool
{
$leftIsList = array_is_list($left);
$rightIsList = array_is_list($right);
$leftCount = \count($left);
if ($leftIsList !== $rightIsList || $leftCount !== \count($right)) {
return false;
}
foreach ($left as $key => $value) {
if (!\array_key_exists($key, $right) || !$this->compareEquality($value, $right[$key])) {
return false;
}
}
return true;
}
private function compareObjectsDeep(\stdClass $left, \stdClass $right): bool
{
$leftVars = get_object_vars($left);
$rightVars = get_object_vars($right);
if (\count($leftVars) !== \count($rightVars)) {
return false;
}
foreach ($leftVars as $key => $value) {
if (!property_exists($right, $key) || !$this->compareEquality($value, $rightVars[$key])) {
return false;
}
}
return true;
}
private function compareOrdering(mixed $left, mixed $right, string $operator): bool
{
if (null === $left || null === $right) {
return match ($operator) {
'>=', '<=' => $left === $right,
default => false,
};
}
if ((\is_int($left) || \is_float($left)) && (\is_int($right) || \is_float($right)) || \is_bool($left) && \is_bool($right)) {
$comparison = $left - $right;
} elseif (\is_string($left) && \is_string($right)) {
$comparison = strcmp($left, $right);
} else {
return false;
}
return match ($operator) {
'>' => $comparison > 0,
'>=' => $comparison >= 0,
'<' => $comparison < 0,
'<=' => $comparison <= 0,
default => false,
};
}
private function isNonSingularQuery(string $expr): bool
{
try {
$tokens = JsonPathTokenizer::tokenize(new JsonPath($expr));
foreach ($tokens as $token) {
if (TokenType::Bracket === $token->type) {
$trimmedValue = trim($token->value);
if (
str_contains($token->value, ',')
|| '*' === $trimmedValue
|| preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d*+))?$/', $trimmedValue)
) {
return true;
}
}
if (TokenType::Name === $token->type && '*' === $token->value || TokenType::Recursive === $token->type) {
return true;
}
}
return false;
} catch (InvalidJsonPathException) {
return true;
}
}
private function findOperatorPosition(string $expr, string $op): int|false
{
$bracketDepth = 0;
$parenthesisDepth = 0;
$length = \strlen($expr);
$opLength = \strlen($op);
for ($i = 0; $i <= $length - $opLength; ++$i) {
$char = $expr[$i];
if ('[' === $char) {
++$bracketDepth;
} elseif (']' === $char) {
--$bracketDepth;
} elseif ('(' === $char) {
++$parenthesisDepth;
} elseif (')' === $char) {
--$parenthesisDepth;
} elseif (!$bracketDepth && !$parenthesisDepth && substr($expr, $i, $opLength) === $op) {
return $i;
}
}
return false;
}
private function validateFilterExpression(string $expr): void
{
$expr = trim($expr);
if (null !== $unwrapped = self::unwrapParentheses($expr)) {
$this->validateFilterExpression($unwrapped);
return;
}
if (str_starts_with($expr, '!')) {
$this->validateFilterExpression(trim(substr($expr, 1)));
return;
}
if ($logicalOp = $this->findRightmostLogicalOperator($expr)) {
$this->validateFilterExpression(trim(substr($expr, 0, $logicalOp['position']))); // left
$this->validateFilterExpression(trim(substr($expr, $logicalOp['position'] + \strlen($logicalOp['operator'])))); // right
return;
}
foreach (self::COMPARISON_OPERATORS as $op => $len) {
if (str_contains($expr, $op)) {
if (false === $opPos = $this->findOperatorPosition($expr, $op)) {
continue;
}
$left = trim(substr($expr, 0, $opPos));
$right = trim(substr($expr, $opPos + $len));
if (
str_starts_with($left, '$') && $this->isNonSingularQuery($left)
|| str_starts_with($right, '$') && $this->isNonSingularQuery($right)
) {
throw new JsonCrawlerException($left, 'non-singular query is not comparable');
}
if (
str_starts_with($left, '@') && JsonPathTokenizer::isNonSingularRelativeQuery($left)
|| str_starts_with($right, '@') && JsonPathTokenizer::isNonSingularRelativeQuery($right)
) {
throw new JsonCrawlerException($left, 'non-singular query is not comparable');
}
$this->validateFunctionArguments($left);
$this->validateFunctionArguments($right);
$this->validateFunctionReturnType($left);
$this->validateFunctionReturnType($right);
return;
}
}
$this->validateFunctionTestReturnType($expr);
}
private function validateFunctionArguments(string $expr): void
{
if (!preg_match('/^(\w+)\((.*)\)$/', trim($expr), $matches)) {
return;
}
if (!\in_array($functionName = $matches[1], JsonPathTokenizer::SINGULAR_ARGUMENT_FUNCTIONS, true)) {
return;
}
$arg = trim($matches[2]);
if (str_starts_with($arg, '@') && JsonPathTokenizer::isNonSingularRelativeQuery($arg)) {
throw new JsonCrawlerException($arg, \sprintf('non-singular query is not allowed as argument to "%s" function', $functionName));
}
}
private function validateFunctionReturnType(string $expr): void
{
if (!preg_match('/^(\w+)\s*\(/', trim($expr), $matches)) {
return;
}
$functionName = $matches[1];
$returnType = $this->functionsMetadata[$functionName]['return_type'] ?? null;
if (null === $returnType || FunctionReturnType::Value === $returnType) {
return;
}
throw new JsonCrawlerException($expr, \sprintf('the result of the custom JsonPath function "%s" (%s) cannot be used in comparisons', $functionName, $returnType->name.'Type'));
}
private function validateFunctionTestReturnType(string $expr): void
{
if (!preg_match('/^(\w+)\s*\(/', trim($expr), $matches)) {
return;
}
$functionName = $matches[1];
$returnType = $this->functionsMetadata[$functionName]['return_type'] ?? null;
if (FunctionReturnType::Value !== $returnType) {
return;
}
throw new JsonCrawlerException($expr, \sprintf('the result of the custom JsonPath function "%s" (%s) cannot be used in test expressions', $functionName, $returnType->name.'Type'));
}
/**
* Transforms JSONPath regex patterns to comply with RFC 9485.
*
* @see https://www.rfc-editor.org/rfc/rfc9485.html#name-pcre-re2-and-ruby-regexps
*/
private function transformJsonPathRegex(string $pattern): string
{
$result = '';
$inCharClass = false;
$i = -1;
while (null !== $char = $pattern[++$i] ?? null) {
switch ($char) {
case '\\': $char .= $pattern[++$i] ?? '';
break;
case '[': $inCharClass = true;
break;
case ']': $inCharClass = false;
break;
case '.': $inCharClass || $char = '[^\r\n]';
break;
}
$result .= $char;
}
return $result;
}
private static function unwrapParentheses(string $expr): ?string
{
if (!str_starts_with($expr, '(') || !str_ends_with($expr, ')')) {
return null;
}
$depth = 0;
$i = -1;
while (null !== $char = $expr[++$i] ?? null) {
if ('(' === $char) {
++$depth;
} elseif (')' === $char && 0 === --$depth && isset($expr[$i + 1])) {
return null;
}
}
return trim(substr($expr, 1, -1));
}
private function isArrayOrObject(mixed $value): bool
{
return \is_array($value) || $value instanceof \stdClass;
}
private function normalizeStorage(\stdClass|array $data): array
{
return array_map(fn ($value) => $value instanceof \stdClass || $value && \is_array($value) ? $this->normalizeStorage($value) : $value, (array) $data);
}
private function isValidMixedBracketExpression(string $expr): bool
{
$parts = JsonPathUtils::parseCommaSeparatedValues($expr);
$hasFilter = false;
$validMixed = true;
foreach ($parts as $part) {
$part = trim($part);
if (preg_match('/^\?/', $part)) {
$hasFilter = true;
// complete filter expression and not part of a comparison?
if (!preg_match('/^\?[^?]*$/', $part)) {
$validMixed = false;
break;
}
} elseif (!preg_match('/^(\*|-?\d+|-?\d*:-?\d*(?::-?\d+)?|[\'"].*[\'"])$/', $part)) { // is it a valid non-filter selector (index, wildcard, slice)?
$validMixed = false;
break;
}
}
return $hasFilter && $validMixed && 1 < \count($parts);
}
private function getValueIfKeyExists(mixed $value, string $key): array
{
return $this->isArrayOrObject($value) && \array_key_exists($key, $arrayValue = (array) $value) ? [$arrayValue[$key]] : [];
}
}