No Description

Unescaper.php 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. /**
  12. * Unescaper encapsulates unescaping rules for single and double-quoted
  13. * YAML strings.
  14. *
  15. * @author Matthew Lewinski <matthew@lewinski.org>
  16. */
  17. class Unescaper
  18. {
  19. // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
  20. // must be converted to that encoding.
  21. // @deprecated since 2.5, to be removed in 3.0
  22. const ENCODING = 'UTF-8';
  23. // Regex fragment that matches an escaped character in a double quoted
  24. // string.
  25. const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
  26. /**
  27. * Unescapes a single quoted string.
  28. *
  29. * @param string $value A single quoted string.
  30. *
  31. * @return string The unescaped string.
  32. */
  33. public function unescapeSingleQuotedString($value)
  34. {
  35. return str_replace('\'\'', '\'', $value);
  36. }
  37. /**
  38. * Unescapes a double quoted string.
  39. *
  40. * @param string $value A double quoted string.
  41. *
  42. * @return string The unescaped string.
  43. */
  44. public function unescapeDoubleQuotedString($value)
  45. {
  46. $self = $this;
  47. $callback = function ($match) use ($self) {
  48. return $self->unescapeCharacter($match[0]);
  49. };
  50. // evaluate the string
  51. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  52. }
  53. /**
  54. * Unescapes a character that was found in a double-quoted string.
  55. *
  56. * @param string $value An escaped character
  57. *
  58. * @return string The unescaped character
  59. */
  60. public function unescapeCharacter($value)
  61. {
  62. switch ($value{1}) {
  63. case '0':
  64. return "\x0";
  65. case 'a':
  66. return "\x7";
  67. case 'b':
  68. return "\x8";
  69. case 't':
  70. return "\t";
  71. case "\t":
  72. return "\t";
  73. case 'n':
  74. return "\n";
  75. case 'v':
  76. return "\xB";
  77. case 'f':
  78. return "\xC";
  79. case 'r':
  80. return "\r";
  81. case 'e':
  82. return "\x1B";
  83. case ' ':
  84. return ' ';
  85. case '"':
  86. return '"';
  87. case '/':
  88. return '/';
  89. case '\\':
  90. return '\\';
  91. case 'N':
  92. // U+0085 NEXT LINE
  93. return "\xC2\x85";
  94. case '_':
  95. // U+00A0 NO-BREAK SPACE
  96. return "\xC2\xA0";
  97. case 'L':
  98. // U+2028 LINE SEPARATOR
  99. return "\xE2\x80\xA8";
  100. case 'P':
  101. // U+2029 PARAGRAPH SEPARATOR
  102. return "\xE2\x80\xA9";
  103. case 'x':
  104. return self::utf8chr(hexdec(substr($value, 2, 2)));
  105. case 'u':
  106. return self::utf8chr(hexdec(substr($value, 2, 4)));
  107. case 'U':
  108. return self::utf8chr(hexdec(substr($value, 2, 8)));
  109. }
  110. }
  111. /**
  112. * Get the UTF-8 character for the given code point.
  113. *
  114. * @param int $c The unicode code point
  115. *
  116. * @return string The corresponding UTF-8 character
  117. */
  118. private static function utf8chr($c)
  119. {
  120. if (0x80 > $c %= 0x200000) {
  121. return chr($c);
  122. }
  123. if (0x800 > $c) {
  124. return chr(0xC0 | $c >> 6).chr(0x80 | $c & 0x3F);
  125. }
  126. if (0x10000 > $c) {
  127. return chr(0xE0 | $c >> 12).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  128. }
  129. return chr(0xF0 | $c >> 18).chr(0x80 | $c >> 12 & 0x3F).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  130. }
  131. }