Skip to content

Commit

Permalink
Fix JSON formatter crashes with invalid UTF in error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
weirdan committed Sep 8, 2024
1 parent dd24f5b commit bb57f39
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 2 deletions.
60 changes: 58 additions & 2 deletions src/Psalm/Internal/Json/Json.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@

use RuntimeException;

use function array_walk_recursive;
use function bin2hex;
use function is_string;
use function json_encode;
use function json_last_error_msg;
use function preg_replace_callback;

use const JSON_PRETTY_PRINT;
use const JSON_UNESCAPED_SLASHES;
Expand All @@ -19,28 +23,80 @@
final class Json
{
public const PRETTY = JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE;
// from https://stackoverflow.com/a/11709412
private const INVALID_UTF_REGEXP = <<<'EOF'
/(
[\xC0-\xC1] # Invalid UTF-8 Bytes
| [\xF5-\xFF] # Invalid UTF-8 Bytes
| \xE0[\x80-\x9F] # Overlong encoding of prior code point
| \xF0[\x80-\x8F] # Overlong encoding of prior code point
| [\xC2-\xDF](?![\x80-\xBF]) # Invalid UTF-8 Sequence Start
| [\xE0-\xEF](?![\x80-\xBF]{2}) # Invalid UTF-8 Sequence Start
| [\xF0-\xF4](?![\x80-\xBF]{3}) # Invalid UTF-8 Sequence Start
| (?<=[\x00-\x7F\xF5-\xFF])[\x80-\xBF] # Invalid UTF-8 Sequence Middle
| (?<!
[\xC2-\xDF]
|[\xE0-\xEF]
|[\xE0-\xEF][\x80-\xBF]
|[\xF0-\xF4]
|[\xF0-\xF4][\x80-\xBF]
|[\xF0-\xF4][\x80-\xBF]{2}
)[\x80-\xBF] # Overlong Sequence
| (?<=[\xE0-\xEF])[\x80-\xBF](?![\x80-\xBF]) # Short 3 byte sequence
| (?<=[\xF0-\xF4])[\x80-\xBF](?![\x80-\xBF]{2}) # Short 4 byte sequence
| (?<=[\xF0-\xF4][\x80-\xBF])[\x80-\xBF](?![\x80-\xBF]) # Short 4 byte sequence (2)
)/x
EOF;

/**
* @var int
*/
public const DEFAULT = 0;

/**
* @param mixed $data
* @param array<array-key, mixed> $data
* @psalm-pure
*/
public static function encode($data, ?int $options = null): string
public static function encode(array $data, ?int $options = null): string
{
if ($options === null) {
$options = self::DEFAULT;
}

$result = json_encode($data, $options);

if ($result == false) {
$result = json_encode(self::scrub($data), $options);
}

if ($result === false) {
/** @psalm-suppress ImpureFunctionCall */
throw new RuntimeException('Cannot create JSON string: '.json_last_error_msg());
}

return $result;
}

/** @psalm-pure */
private static function scrub(array $data): array
{
/** @psalm-suppress ImpureFunctionCall */
array_walk_recursive(
$data,
/**
* @psalm-pure
* @param mixed $value
*/
function (&$value): void {
if (is_string($value)) {
$value = preg_replace_callback(
self::INVALID_UTF_REGEXP,
static fn(array $matches): string => '<Invalid UTF-8: 0x' . bin2hex($matches[0] ?? '') . '>',
$value,
);
}
},
);
return $data;
}
}
15 changes: 15 additions & 0 deletions tests/Internal/JsonTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?php

namespace Psalm\Tests\Internal;

use Psalm\Internal\Json\Json;
use Psalm\Tests\TestCase;

final class JsonTest extends TestCase
{
public function testConvertsInvalidUtf(): void
{
$invalidUtf = "\xd1"; // incomplete sequence like "ы"[0]
$this->assertEquals('{"data":"<Invalid UTF-8: 0xd1>"}', Json::encode(["data" => $invalidUtf]));
}
}

0 comments on commit bb57f39

Please sign in to comment.