UriString.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. <?php
  2. /**
  3. * League.Uri (https://uri.thephpleague.com)
  4. *
  5. * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. declare(strict_types=1);
  11. namespace League\Uri;
  12. use BackedEnum;
  13. use Deprecated;
  14. use League\Uri\Exceptions\SyntaxError;
  15. use League\Uri\Idna\Converter as IdnaConverter;
  16. use Stringable;
  17. use Throwable;
  18. use function array_map;
  19. use function array_merge;
  20. use function array_pop;
  21. use function array_reduce;
  22. use function defined;
  23. use function explode;
  24. use function filter_var;
  25. use function function_exists;
  26. use function implode;
  27. use function preg_match;
  28. use function sprintf;
  29. use function str_replace;
  30. use function strpos;
  31. use function strtolower;
  32. use function substr;
  33. use const FILTER_FLAG_IPV4;
  34. use const FILTER_VALIDATE_IP;
  35. /**
  36. * A class to parse a URI string according to RFC3986.
  37. *
  38. * @link https://tools.ietf.org/html/rfc3986
  39. * @package League\Uri
  40. * @author Ignace Nyamagana Butera <nyamsprod@gmail.com>
  41. * @since 6.0.0
  42. *
  43. * @phpstan-type AuthorityMap array{user: ?string, pass: ?string, host: ?string, port: ?int}
  44. * @phpstan-type ComponentMap array{scheme: ?string, user: ?string, pass: ?string, host: ?string, port: ?int, path: string, query: ?string, fragment: ?string}
  45. * @phpstan-type InputComponentMap array{scheme? : ?string, user? : ?string, pass? : ?string, host? : ?string, port? : ?int, path? : ?string, query? : ?string, fragment? : ?string}
  46. */
  47. final class UriString
  48. {
  49. /**
  50. * Default URI component values.
  51. *
  52. * @var ComponentMap
  53. */
  54. private const URI_COMPONENTS = [
  55. 'scheme' => null, 'user' => null, 'pass' => null, 'host' => null,
  56. 'port' => null, 'path' => '', 'query' => null, 'fragment' => null,
  57. ];
  58. /**
  59. * Simple URI which do not need any parsing.
  60. *
  61. * @var array<string, array<string>>
  62. */
  63. private const URI_SHORTCUTS = [
  64. '' => ['path' => ''],
  65. '#' => ['fragment' => ''],
  66. '?' => ['query' => ''],
  67. '?#' => ['query' => '', 'fragment' => ''],
  68. '/' => ['path' => '/'],
  69. '//' => ['host' => ''],
  70. '///' => ['host' => '', 'path' => '/'],
  71. ];
  72. /**
  73. * Range of invalid characters in URI 3986 string.
  74. *
  75. * @var string
  76. */
  77. private const REGEXP_VALID_URI_RFC3986_CHARS = '/^(?:[A-Za-z0-9\-._~:\/?#[\]@!$&\'()*+,;=%]|%[0-9A-Fa-f]{2})*$/';
  78. /**
  79. * Range of invalid characters in URI 3987 string.
  80. *
  81. * @var string
  82. */
  83. private const REGEXP_INVALID_URI_RFC3987_CHARS = '/[\x00-\x1f\x7f\s]/';
  84. /**
  85. * RFC3986 regular expression URI splitter.
  86. *
  87. * @link https://tools.ietf.org/html/rfc3986#appendix-B
  88. * @var string
  89. */
  90. private const REGEXP_URI_PARTS = ',^
  91. (?<scheme>(?<scontent>[^:/?\#]+):)? # URI scheme component
  92. (?<authority>//(?<acontent>[^/?\#]*))? # URI authority part
  93. (?<path>[^?\#]*) # URI path component
  94. (?<query>\?(?<qcontent>[^\#]*))? # URI query component
  95. (?<fragment>\#(?<fcontent>.*))? # URI fragment component
  96. ,x';
  97. /**
  98. * URI scheme regular expression.
  99. *
  100. * @link https://tools.ietf.org/html/rfc3986#section-3.1
  101. * @var string
  102. */
  103. private const REGEXP_URI_SCHEME = '/^([a-z][a-z\d+.-]*)?$/i';
  104. /**
  105. * Invalid path for URI without scheme and authority regular expression.
  106. *
  107. * @link https://tools.ietf.org/html/rfc3986#section-3.3
  108. * @var string
  109. */
  110. private const REGEXP_INVALID_PATH = ',^(([^/]*):)(.*)?/,';
  111. /**
  112. * Host and Port splitter regular expression.
  113. *
  114. * @var string
  115. */
  116. private const REGEXP_HOST_PORT = ',^(?<host>\[.*\]|[^:]*)(:(?<port>.*))?$,';
  117. /** @var array<string,int> */
  118. private const DOT_SEGMENTS = ['.' => 1, '..' => 1];
  119. /**
  120. * Generate an IRI string representation (RFC3987) from its parsed representation
  121. * returned by League\UriString::parse() or PHP's parse_url.
  122. *
  123. * If you supply your own array, you are responsible for providing
  124. * valid components without their URI delimiters.
  125. *
  126. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  127. * @link https://tools.ietf.org/html/rfc3986#section-7.5
  128. */
  129. public static function toIriString(BackedEnum|Stringable|string $uri): string
  130. {
  131. $components = self::parse($uri);
  132. $port = null;
  133. if (isset($components['port'])) {
  134. $port = (int) $components['port'];
  135. unset($components['port']);
  136. }
  137. if (null !== $components['host']) {
  138. $components['host'] = IdnaConverter::toUnicode($components['host'])->domain();
  139. }
  140. $components['path'] = Encoder::decodePath($components['path']);
  141. $components['user'] = Encoder::decodeNecessary($components['user']);
  142. $components['pass'] = Encoder::decodeNecessary($components['pass']);
  143. $components['query'] = Encoder::decodeQuery($components['query']);
  144. $components['fragment'] = Encoder::decodeFragment($components['fragment']);
  145. return self::build([
  146. ...array_map(fn (?string $value) => match (true) {
  147. null === $value,
  148. !str_contains($value, '%20') => $value,
  149. default => str_replace('%20', ' ', $value),
  150. }, $components),
  151. ...['port' => $port],
  152. ]);
  153. }
  154. /**
  155. * Generate a URI string representation from its parsed representation
  156. * returned by League\UriString::parse() or PHP's parse_url.
  157. *
  158. * If you supply your own array, you are responsible for providing
  159. * valid components without their URI delimiters.
  160. *
  161. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  162. * @link https://tools.ietf.org/html/rfc3986#section-7.5
  163. *
  164. * @param InputComponentMap $components
  165. */
  166. public static function build(array $components): string
  167. {
  168. return self::buildUri(
  169. $components['scheme'] ?? null,
  170. self::buildAuthority($components),
  171. $components['path'] ?? null,
  172. $components['query'] ?? null,
  173. $components['fragment'] ?? null,
  174. );
  175. }
  176. /**
  177. * Generates a URI string representation based on RFC3986 algorithm.
  178. *
  179. * Valid URI component MUST be provided without their URI delimiters
  180. * but properly encoded.
  181. *
  182. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  183. * @link https://tools.ietf.org/html/rfc3986#section-7.5§
  184. */
  185. public static function buildUri(
  186. ?string $scheme = null,
  187. ?string $authority = null,
  188. ?string $path = null,
  189. ?string $query = null,
  190. ?string $fragment = null,
  191. ): string {
  192. self::validateComponents($scheme, $authority, $path);
  193. $uri = '';
  194. if (null !== $scheme) {
  195. $uri .= $scheme.':';
  196. }
  197. if (null !== $authority) {
  198. $uri .= '//'.$authority;
  199. }
  200. $uri .= $path;
  201. if (null !== $query) {
  202. $uri .= '?'.$query;
  203. }
  204. if (null !== $fragment) {
  205. $uri .= '#'.$fragment;
  206. }
  207. return $uri;
  208. }
  209. /**
  210. * Generate a URI authority representation from its parsed representation.
  211. *
  212. * @param InputComponentMap $components
  213. */
  214. public static function buildAuthority(array $components): ?string
  215. {
  216. if (!isset($components['host'])) {
  217. (!isset($components['user']) && !isset($components['pass'])) || throw new SyntaxError('The user info component must not be set if the host is not defined.');
  218. !isset($components['port']) || throw new SyntaxError('The port component must not be set if the host is not defined.');
  219. return null;
  220. }
  221. $userInfo = $components['user'] ?? null;
  222. if (isset($components['pass'])) {
  223. $userInfo .= ':'.$components['pass'];
  224. }
  225. $authority = '';
  226. if (isset($userInfo)) {
  227. $authority .= $userInfo.'@';
  228. }
  229. $authority .= $components['host'];
  230. if (isset($components['port'])) {
  231. $authority .= ':'.$components['port'];
  232. }
  233. return $authority;
  234. }
  235. /**
  236. * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
  237. *
  238. * @throws SyntaxError if the URI is not parsable
  239. *
  240. * @return ComponentMap
  241. */
  242. public static function parseNormalized(Stringable|string $uri): array
  243. {
  244. $components = self::parse($uri);
  245. if (null !== $components['scheme']) {
  246. $components['scheme'] = strtolower($components['scheme']);
  247. }
  248. $components['host'] = self::normalizeHost($components['host']);
  249. $path = $components['path'];
  250. $authority = self::buildAuthority($components);
  251. //dot segment only happens when:
  252. // - the path is absolute
  253. // - the scheme and/or the authority are defined
  254. if ('/' === ($path[0] ?? '') || '' !== $components['scheme'].$authority) {
  255. $path = self::removeDotSegments($path);
  256. }
  257. // if there is an authority, the path must be absolute
  258. if ('' !== $path && '/' !== $path[0]) {
  259. if (null !== $authority) {
  260. $path = '/'.$path;
  261. }
  262. }
  263. $components['path'] = (string) Encoder::normalizePath($path);
  264. $components['query'] = Encoder::normalizeQuery($components['query']);
  265. $components['fragment'] = Encoder::normalizeFragment($components['fragment']);
  266. $components['user'] = Encoder::normalizeUser($components['user']);
  267. $components['pass'] = Encoder::normalizePassword($components['pass']);
  268. return $components;
  269. }
  270. /**
  271. * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
  272. *
  273. * @throws SyntaxError if the URI is not parsable
  274. */
  275. public static function normalize(Stringable|string $uri): string
  276. {
  277. return self::build(self::parseNormalized($uri));
  278. }
  279. /**
  280. * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
  281. *
  282. * @throws SyntaxError if the URI is not parsable
  283. */
  284. public static function normalizeAuthority(Stringable|string|null $authority): ?string
  285. {
  286. if (null === $authority) {
  287. return null;
  288. }
  289. $components = self::parseAuthority($authority);
  290. $components['host'] = self::normalizeHost($components['host'] ?? null);
  291. $components['user'] = Encoder::normalizeUser($components['user']);
  292. $components['pass'] = Encoder::normalizePassword($components['pass']);
  293. return (string) self::buildAuthority($components);
  294. }
  295. /**
  296. * Resolves a URI against a base URI using RFC3986 rules.
  297. *
  298. * This method MUST retain the state of the submitted URI instance, and return
  299. * a URI instance of the same type that contains the applied modifications.
  300. *
  301. * This method MUST be transparent when dealing with error and exceptions.
  302. * It MUST not alter or silence them apart from validating its own parameters.
  303. *
  304. * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-5
  305. *
  306. * @throws SyntaxError if the BaseUri is not absolute or in absence of a BaseUri if the uri is not absolute
  307. */
  308. public static function resolve(BackedEnum|Stringable|string $uri, BackedEnum|Stringable|string|null $baseUri = null): string
  309. {
  310. if ($uri instanceof BackedEnum) {
  311. $uri = (string) $uri->value;
  312. }
  313. if ($baseUri instanceof BackedEnum) {
  314. $baseUri = (string) $baseUri->value;
  315. }
  316. $uri = (string) $uri;
  317. if ('' === $uri) {
  318. $uri = $baseUri ?? throw new SyntaxError("The uri can not be the empty string when there's no base URI.");
  319. }
  320. $uriComponents = self::parse($uri);
  321. $baseUriComponents = $uriComponents;
  322. if (null !== $baseUri && $uri !== (string) $baseUri) {
  323. $baseUriComponents = self::parse($baseUri);
  324. }
  325. null !== $baseUriComponents['scheme'] || throw new SyntaxError('The base URI must be an absolute URI or null; If the base URI is null the URI must be an absolute URI.');
  326. $authority = self::buildAuthority($uriComponents);
  327. $path = self::removeDotSegments($uriComponents['path']);
  328. if ('' !== $path && '/' !== $path[0] && (null !== $authority || $uriComponents['path'] !== $path)) {
  329. $path = '/'.$path;
  330. }
  331. if (null !== $uriComponents['scheme'] && '' !== $uriComponents['scheme']) {
  332. return self::buildUri($uriComponents['scheme'], $authority, $path, $uriComponents['query'], $uriComponents['fragment']);
  333. }
  334. if (null !== $authority) {
  335. return self::buildUri($baseUriComponents['scheme'], $authority, $path, $uriComponents['query'], $uriComponents['fragment']);
  336. }
  337. [$resolvedPath, $query] = self::resolvePathAndQuery($uriComponents, $baseUriComponents);
  338. $baseAuthority = self::buildAuthority($baseUriComponents);
  339. $path = self::removeDotSegments($resolvedPath);
  340. if ('' !== $path && '/' !== $path[0] && (null !== $baseAuthority || $resolvedPath !== $path)) {
  341. $path = '/'.$path;
  342. }
  343. return self::buildUri($baseUriComponents['scheme'], $baseAuthority, $path, $query, $uriComponents['fragment']);
  344. }
  345. /**
  346. * Filter Dot segment according to RFC3986.
  347. *
  348. * @see http://tools.ietf.org/html/rfc3986#section-5.2.4
  349. */
  350. public static function removeDotSegments(Stringable|string $path): string
  351. {
  352. $path = (string) $path;
  353. if (!str_contains($path, '.')) {
  354. return $path;
  355. }
  356. $reducer = function (array $carry, string $segment): array {
  357. if ('..' === $segment) {
  358. array_pop($carry);
  359. return $carry;
  360. }
  361. if (!isset(self::DOT_SEGMENTS[$segment])) {
  362. $carry[] = $segment;
  363. }
  364. return $carry;
  365. };
  366. $oldSegments = explode('/', $path);
  367. $newPath = implode('/', array_reduce($oldSegments, $reducer(...), []));
  368. if (isset(self::DOT_SEGMENTS[$oldSegments[array_key_last($oldSegments)]])) {
  369. $newPath .= '/';
  370. }
  371. return $newPath;
  372. }
  373. /**
  374. * Resolves an URI path and query component.
  375. *
  376. * @param ComponentMap $uri
  377. * @param ComponentMap $baseUri
  378. *
  379. * @return array{0:string, 1:string|null}
  380. */
  381. private static function resolvePathAndQuery(array $uri, array $baseUri): array
  382. {
  383. if (str_starts_with($uri['path'], '/')) {
  384. return [$uri['path'], $uri['query']];
  385. }
  386. if ('' === $uri['path']) {
  387. return [$baseUri['path'], $uri['query'] ?? $baseUri['query']];
  388. }
  389. $targetPath = $uri['path'];
  390. if (null !== self::buildAuthority($baseUri) && '' === $baseUri['path']) {
  391. $targetPath = '/'.$targetPath;
  392. }
  393. if ('' !== $baseUri['path']) {
  394. $segments = explode('/', $baseUri['path']);
  395. array_pop($segments);
  396. if ([] !== $segments) {
  397. $targetPath = implode('/', $segments).'/'.$targetPath;
  398. }
  399. }
  400. return [$targetPath, $uri['query']];
  401. }
  402. public static function containsRfc3986Chars(Stringable|string $uri): bool
  403. {
  404. return 1 === preg_match(self::REGEXP_VALID_URI_RFC3986_CHARS, (string) $uri);
  405. }
  406. public static function containsRfc3987Chars(Stringable|string $uri): bool
  407. {
  408. return 1 !== preg_match(self::REGEXP_INVALID_URI_RFC3987_CHARS, (string) $uri);
  409. }
  410. /**
  411. * Parse a URI string into its components.
  412. *
  413. * This method parses a URI and returns an associative array containing any
  414. * of the various components of the URI that are present.
  415. *
  416. * <code>
  417. * $components = UriString::parse('http://foo@test.example.com:42?query#');
  418. * var_export($components);
  419. * //will display
  420. * array(
  421. * 'scheme' => 'http', // the URI scheme component
  422. * 'user' => 'foo', // the URI user component
  423. * 'pass' => null, // the URI pass component
  424. * 'host' => 'test.example.com', // the URI host component
  425. * 'port' => 42, // the URI port component
  426. * 'path' => '', // the URI path component
  427. * 'query' => 'query', // the URI query component
  428. * 'fragment' => '', // the URI fragment component
  429. * );
  430. * </code>
  431. *
  432. * The returned array is similar to PHP's parse_url return value with the following
  433. * differences:
  434. *
  435. * <ul>
  436. * <li>All components are always present in the returned array</li>
  437. * <li>Empty and undefined component are treated differently. And empty component is
  438. * set to the empty string while an undefined component is set to the `null` value.</li>
  439. * <li>The path component is never undefined</li>
  440. * <li>The method parses the URI following the RFC3986 rules, but you are still
  441. * required to validate the returned components against its related scheme specific rules.</li>
  442. * </ul>
  443. *
  444. * @link https://tools.ietf.org/html/rfc3986
  445. *
  446. * @throws SyntaxError if the URI contains invalid characters
  447. * @throws SyntaxError if the URI contains an invalid scheme
  448. * @throws SyntaxError if the URI contains an invalid path
  449. *
  450. * @return ComponentMap
  451. */
  452. public static function parse(BackedEnum|Stringable|string|int $uri): array
  453. {
  454. if ($uri instanceof BackedEnum) {
  455. $uri = $uri->value;
  456. }
  457. $uri = (string) $uri;
  458. if (isset(self::URI_SHORTCUTS[$uri])) {
  459. /** @var ComponentMap $components */
  460. $components = [...self::URI_COMPONENTS, ...self::URI_SHORTCUTS[$uri]];
  461. return $components;
  462. }
  463. self::containsRfc3987Chars($uri) || throw new SyntaxError(sprintf('The uri `%s` contains invalid characters', $uri));
  464. //if the first character is a known URI delimiter, parsing can be simplified
  465. $first_char = $uri[0];
  466. //The URI is made of the fragment only
  467. if ('#' === $first_char) {
  468. [, $fragment] = explode('#', $uri, 2);
  469. $components = self::URI_COMPONENTS;
  470. $components['fragment'] = $fragment;
  471. return $components;
  472. }
  473. //The URI is made of the query and fragment
  474. if ('?' === $first_char) {
  475. [, $partial] = explode('?', $uri, 2);
  476. [$query, $fragment] = explode('#', $partial, 2) + [1 => null];
  477. $components = self::URI_COMPONENTS;
  478. $components['query'] = $query;
  479. $components['fragment'] = $fragment;
  480. return $components;
  481. }
  482. //use RFC3986 URI regexp to split the URI
  483. preg_match(self::REGEXP_URI_PARTS, $uri, $parts);
  484. $parts += ['query' => '', 'fragment' => ''];
  485. if (':' === ($parts['scheme'] ?? null) || 1 !== preg_match(self::REGEXP_URI_SCHEME, $parts['scontent'] ?? '')) {
  486. throw new SyntaxError(sprintf('The uri `%s` contains an invalid scheme', $uri));
  487. }
  488. if ('' === ($parts['scheme'] ?? '').($parts['authority'] ?? '') && 1 === preg_match(self::REGEXP_INVALID_PATH, $parts['path'] ?? '')) {
  489. throw new SyntaxError(sprintf('The uri `%s` contains an invalid path.', $uri));
  490. }
  491. /** @var ComponentMap $components */
  492. $components = array_merge(
  493. self::URI_COMPONENTS,
  494. '' === ($parts['authority'] ?? null) ? [] : self::parseAuthority($parts['acontent'] ?? null),
  495. [
  496. 'path' => $parts['path'] ?? '',
  497. 'scheme' => '' === ($parts['scheme'] ?? null) ? null : ($parts['scontent'] ?? null),
  498. 'query' => '' === $parts['query'] ? null : ($parts['qcontent'] ?? null),
  499. 'fragment' => '' === $parts['fragment'] ? null : ($parts['fcontent'] ?? null),
  500. ]
  501. );
  502. return $components;
  503. }
  504. /**
  505. * Assert the URI internal state is valid.
  506. *
  507. * @link https://tools.ietf.org/html/rfc3986#section-3
  508. * @link https://tools.ietf.org/html/rfc3986#section-3.3
  509. *
  510. * @throws SyntaxError
  511. */
  512. private static function validateComponents(?string $scheme, ?string $authority, ?string $path): void
  513. {
  514. if (null !== $authority) {
  515. if (null !== $path && '' !== $path && '/' !== $path[0]) {
  516. throw new SyntaxError('If an authority is present the path must be empty or start with a `/`.');
  517. }
  518. return;
  519. }
  520. if (null === $path || '' === $path) {
  521. return;
  522. }
  523. if (str_starts_with($path, '//')) {
  524. throw new SyntaxError('If there is no authority the path `'.$path.'` cannot start with a `//`.');
  525. }
  526. if (null !== $scheme || false === ($pos = strpos($path, ':'))) {
  527. return;
  528. }
  529. if (!str_contains(substr($path, 0, $pos), '/')) {
  530. throw new SyntaxError('In absence of a scheme and an authority the first path segment cannot contain a colon (":") character.');
  531. }
  532. }
  533. /**
  534. * Parses the URI authority part.
  535. *
  536. * @link https://tools.ietf.org/html/rfc3986#section-3.2
  537. *
  538. * @throws SyntaxError If the port component is invalid
  539. *
  540. * @return AuthorityMap
  541. */
  542. public static function parseAuthority(BackedEnum|Stringable|string|null $authority): array
  543. {
  544. $components = ['user' => null, 'pass' => null, 'host' => null, 'port' => null];
  545. if (null === $authority) {
  546. return $components;
  547. }
  548. if ($authority instanceof BackedEnum) {
  549. $authority = $authority->value;
  550. }
  551. $authority = (string) $authority;
  552. $components['host'] = '';
  553. if ('' === $authority) {
  554. return $components;
  555. }
  556. $parts = explode('@', $authority, 2);
  557. if (isset($parts[1])) {
  558. [$components['user'], $components['pass']] = explode(':', $parts[0], 2) + [1 => null];
  559. }
  560. preg_match(self::REGEXP_HOST_PORT, $parts[1] ?? $parts[0], $matches);
  561. $matches += ['port' => ''];
  562. $components['port'] = self::filterPort($matches['port']);
  563. $components['host'] = self::filterHost($matches['host'] ?? '');
  564. return $components;
  565. }
  566. /**
  567. * Filter and format the port component.
  568. *
  569. * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
  570. *
  571. * @throws SyntaxError if the registered name is invalid
  572. */
  573. private static function filterPort(string $port): ?int
  574. {
  575. return match (true) {
  576. '' === $port => null,
  577. 1 === preg_match('/^\d*$/', $port) => (int) $port,
  578. default => throw new SyntaxError(sprintf('The port `%s` is invalid', $port)),
  579. };
  580. }
  581. /**
  582. * Returns whether a hostname is valid.
  583. *
  584. * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
  585. *
  586. * @throws SyntaxError if the registered name is invalid
  587. */
  588. private static function filterHost(Stringable|string|null $host): ?string
  589. {
  590. try {
  591. return HostRecord::from($host)->value;
  592. } catch (Throwable) {
  593. throw new SyntaxError(sprintf('Host `%s` is invalid : the IP host is malformed', $host));
  594. }
  595. }
  596. /**
  597. * Tells whether the scheme component is valid.
  598. */
  599. public static function isValidScheme(BackedEnum|Stringable|string|null $scheme): bool
  600. {
  601. if ($scheme instanceof BackedEnum) {
  602. $scheme = $scheme->value;
  603. }
  604. return null === $scheme || 1 === preg_match('/^[A-Za-z]([-A-Za-z\d+.]+)?$/', (string) $scheme);
  605. }
  606. private static function normalizeHost(BackedEnum|Stringable|string|null $host): ?string
  607. {
  608. if ($host instanceof BackedEnum) {
  609. $host = $host->value;
  610. }
  611. if (null !== $host) {
  612. $host = (string) $host;
  613. }
  614. if (null === $host || false !== filter_var($host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
  615. return $host;
  616. }
  617. $host = (string) Encoder::normalizeHost($host);
  618. static $isSupported = null;
  619. $isSupported ??= (function_exists('\idn_to_ascii') && defined('\INTL_IDNA_VARIANT_UTS46'));
  620. if (! $isSupported) {
  621. return $host;
  622. }
  623. $idnaHost = IdnaConverter::toAscii($host);
  624. if (!$idnaHost->hasErrors()) {
  625. return $idnaHost->domain();
  626. }
  627. return $host;
  628. }
  629. /**
  630. * DEPRECATION WARNING! This method will be removed in the next major point release.
  631. *
  632. * @deprecated Since version 7.6.0
  633. * @codeCoverageIgnore
  634. * @see HostRecoord::validate()
  635. *
  636. * Create a new instance from the environment.
  637. */
  638. #[Deprecated(message:'use League\Uri\HostRecord::validate() instead', since:'league/uri:7.6.0')]
  639. public static function isValidHost(Stringable|string|null $host): bool
  640. {
  641. return HostRecord::isValid($host);
  642. }
  643. }