This function scans a string according to Sect. 7 of RFC 7159. While scanning, bytes are escaped and copied into buffer token_buffer. Then the function returns successfully, token_buffer is not null-terminated (as it may contain \0 bytes), and token_buffer.size() is the number of bytes in the string.
2690 case std::char_traits<char>::eof():
2692 error_message =
"invalid string: missing closing quote";
2693 return token_type::parse_error;
2699 return token_type::value_string;
2743 const int codepoint1 = get_codepoint();
2744 int codepoint = codepoint1;
2748 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
2749 return token_type::parse_error;
2753 if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF)
2758 const int codepoint2 = get_codepoint();
2762 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
2763 return token_type::parse_error;
2767 if (
JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF))
2782 error_message =
"invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF";
2783 return token_type::parse_error;
2788 error_message =
"invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF";
2789 return token_type::parse_error;
2794 if (
JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF))
2796 error_message =
"invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
2797 return token_type::parse_error;
2802 assert(0x00 <= codepoint and codepoint <= 0x10FFFF);
2805 if (codepoint < 0x80)
2810 else if (codepoint <= 0x7FF)
2813 add(0xC0 | (codepoint >> 6));
2814 add(0x80 | (codepoint & 0x3F));
2816 else if (codepoint <= 0xFFFF)
2819 add(0xE0 | (codepoint >> 12));
2820 add(0x80 | ((codepoint >> 6) & 0x3F));
2821 add(0x80 | (codepoint & 0x3F));
2826 add(0xF0 | (codepoint >> 18));
2827 add(0x80 | ((codepoint >> 12) & 0x3F));
2828 add(0x80 | ((codepoint >> 6) & 0x3F));
2829 add(0x80 | (codepoint & 0x3F));
2837 error_message =
"invalid string: forbidden character after backslash";
2838 return token_type::parse_error;
2847 error_message =
"invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
2848 return token_type::parse_error;
2853 error_message =
"invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
2854 return token_type::parse_error;
2859 error_message =
"invalid string: control character U+0002 (STX) must be escaped to \\u0002";
2860 return token_type::parse_error;
2865 error_message =
"invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
2866 return token_type::parse_error;
2871 error_message =
"invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
2872 return token_type::parse_error;
2877 error_message =
"invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
2878 return token_type::parse_error;
2883 error_message =
"invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
2884 return token_type::parse_error;
2889 error_message =
"invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
2890 return token_type::parse_error;
2895 error_message =
"invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
2896 return token_type::parse_error;
2901 error_message =
"invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
2902 return token_type::parse_error;
2907 error_message =
"invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
2908 return token_type::parse_error;
2913 error_message =
"invalid string: control character U+000B (VT) must be escaped to \\u000B";
2914 return token_type::parse_error;
2919 error_message =
"invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
2920 return token_type::parse_error;
2925 error_message =
"invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
2926 return token_type::parse_error;
2931 error_message =
"invalid string: control character U+000E (SO) must be escaped to \\u000E";
2932 return token_type::parse_error;
2937 error_message =
"invalid string: control character U+000F (SI) must be escaped to \\u000F";
2938 return token_type::parse_error;
2943 error_message =
"invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
2944 return token_type::parse_error;
2949 error_message =
"invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
2950 return token_type::parse_error;
2955 error_message =
"invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
2956 return token_type::parse_error;
2961 error_message =
"invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
2962 return token_type::parse_error;
2967 error_message =
"invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
2968 return token_type::parse_error;
2973 error_message =
"invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
2974 return token_type::parse_error;
2979 error_message =
"invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
2980 return token_type::parse_error;
2985 error_message =
"invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
2986 return token_type::parse_error;
2991 error_message =
"invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
2992 return token_type::parse_error;
2997 error_message =
"invalid string: control character U+0019 (EM) must be escaped to \\u0019";
2998 return token_type::parse_error;
3003 error_message =
"invalid string: control character U+001A (SUB) must be escaped to \\u001A";
3004 return token_type::parse_error;
3009 error_message =
"invalid string: control character U+001B (ESC) must be escaped to \\u001B";
3010 return token_type::parse_error;
3015 error_message =
"invalid string: control character U+001C (FS) must be escaped to \\u001C";
3016 return token_type::parse_error;
3021 error_message =
"invalid string: control character U+001D (GS) must be escaped to \\u001D";
3022 return token_type::parse_error;
3027 error_message =
"invalid string: control character U+001E (RS) must be escaped to \\u001E";
3028 return token_type::parse_error;
3033 error_message =
"invalid string: control character U+001F (US) must be escaped to \\u001F";
3034 return token_type::parse_error;
3171 return token_type::parse_error;
3179 if (
JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
3181 return token_type::parse_error;
3203 if (
JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
3205 return token_type::parse_error;
3213 if (
JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
3215 return token_type::parse_error;
3223 if (
JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
3225 return token_type::parse_error;
3235 if (
JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
3237 return token_type::parse_error;
3245 if (
JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
3247 return token_type::parse_error;
3255 error_message =
"invalid string: ill-formed UTF-8 byte";
3256 return token_type::parse_error;
void reset() noexcept
reset token_buffer; current character is beginning of token
std::char_traits< char >::int_type current
the current character