Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Comment: | {2734} ensure GetJSONField() would handle UTF-16 surrogate pairs incoming as \u####\u#### escapes - very unlikely, but may result into CESU-8 decoded content - included basic regression test from https://en.wikipedia.org/wiki/CESU-8 |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
f7705237a4d9e406eabd3974c49350d4 |
User & Date: | ab 2016-06-11 08:54:30 |
2016-06-11
| ||
09:44 | add a missing *.res files for a all Samples to allow compile from a command line just after open a repository check-in: fa49a0a359 user: pavel.mash tags: trunk | |
08:54 | {2734} ensure GetJSONField() would handle UTF-16 surrogate pairs incoming as \u####\u#### escapes - very unlikely, but may result into CESU-8 decoded content - included basic regression test from https://en.wikipedia.org/wiki/CESU-8 check-in: f7705237a4 user: ab tags: trunk | |
2016-06-10
| ||
13:37 | {2733} refactored ObjectToJSONFile() to be a function returning the file writing result - as proposed by oz check-in: 47a6abc1e2 user: ab tags: trunk | |
Changes to SynCommons.pas.
45940
45941
45942
45943
45944
45945
45946
45947
45948
45949
45950
45951
45952
45953
45954
.....
46016
46017
46018
46019
46020
46021
46022
46023
46024
46025
46026
46027
46028
46029
46030
46031
46032
46033
46034
46035
46036
46037
46038
46039
46040
46041
46042
46043
46044
46045
46046
46047
46048
46049
46050
46051
46052
46053
46054
46055
46056
46057
46058
46059
|
end; /// decode a JSON field into an UTF-8 encoded buffer, stored inplace of JSON data function GetJSONField(P: PUTF8Char; out PDest: PUTF8Char; wasString: PBoolean=nil; EndOfObject: PUTF8Char=nil): PUTF8Char; // this code is very fast var D: PUTF8Char; b,c4: integer; label slash,num; begin if wasString<>nil then wasString^ := false; // default is 'no string' PDest := nil; // PDest=nil indicates error or unexpected end (#0) result := nil; if P=nil then exit; ................................................................................ case P^ of // unescape JSON string #0: exit; // to avoid potential buffer overflow issue for \#0 'b': D^ := #08; 't': D^ := #09; 'n': D^ := #$0a; 'f': D^ := #$0c; 'r': D^ := #$0d; 'u': begin // inlined decoding of '\0123' UTF-16 codepoint into UTF-8 c4 := ConvertHexToBin[ord(P[1])]; if c4<=15 then begin b := ConvertHexToBin[ord(P[2])]; if b<=15 then begin c4 := c4 shl 4+b; b := ConvertHexToBin[ord(P[3])]; if b<=15 then begin c4 := c4 shl 4+b; b := ConvertHexToBin[ord(P[4])]; if b<=15 then begin c4 := c4 shl 4+b; if c4<>0 then begin if c4<=$7F then begin D^ := AnsiChar(c4); inc(D); end else if c4>$7ff then begin D^ := AnsiChar($E0 or (c4 shr 12)); D[1] := AnsiChar($80 or ((c4 shr 6) and $3F)); D[2] := AnsiChar($80 or (c4 and $3F)); inc(D,3); end else begin D^ := AnsiChar($C0 or (c4 shr 6)); D[1] := AnsiChar($80 or (c4 and $3F)); inc(D,2); end; inc(P,5); continue; end; end; end; end; end; D^ := '?'; // bad formated hexa number -> '?0123' end; else D^ := P^; // litterals: '\"' -> '"' |
|
|
|
|
|
|
|
|
|
>
>
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|
|
|
>
|
|
<
|
45940
45941
45942
45943
45944
45945
45946
45947
45948
45949
45950
45951
45952
45953
45954
.....
46016
46017
46018
46019
46020
46021
46022
46023
46024
46025
46026
46027
46028
46029
46030
46031
46032
46033
46034
46035
46036
46037
46038
46039
46040
46041
46042
46043
46044
46045
46046
46047
46048
46049
46050
46051
46052
46053
46054
46055
46056
46057
46058
46059
46060
46061
46062
46063
46064
46065
46066
46067
46068
46069
46070
46071
46072
46073
46074
46075
46076
46077
46078
46079
46080
46081
46082
46083
46084
46085
46086
46087
46088
46089
46090
46091
46092
46093
46094
|
end; /// decode a JSON field into an UTF-8 encoded buffer, stored inplace of JSON data function GetJSONField(P: PUTF8Char; out PDest: PUTF8Char; wasString: PBoolean=nil; EndOfObject: PUTF8Char=nil): PUTF8Char; // this code is very fast var D: PUTF8Char; b,c4,surrogate,j: integer; label slash,num; begin if wasString<>nil then wasString^ := false; // default is 'no string' PDest := nil; // PDest=nil indicates error or unexpected end (#0) result := nil; if P=nil then exit; ................................................................................ case P^ of // unescape JSON string #0: exit; // to avoid potential buffer overflow issue for \#0 'b': D^ := #08; 't': D^ := #09; 'n': D^ := #$0a; 'f': D^ := #$0c; 'r': D^ := #$0d; 'u': begin // inlined decoding of '\u0123' UTF-16 codepoint into UTF-8 c4 := ConvertHexToBin[ord(P[1])]; if c4<=15 then begin b := ConvertHexToBin[ord(P[2])]; if b<=15 then begin c4 := c4 shl 4+b; b := ConvertHexToBin[ord(P[3])]; if b<=15 then begin c4 := c4 shl 4+b; b := ConvertHexToBin[ord(P[4])]; if b<=15 then begin c4 := c4 shl 4+b; case c4 of 0: begin D^ := '?'; // \u0000 is an invalid value inc(D); end; 1..$7f: begin D^ := AnsiChar(c4); inc(D); end; $80..$7ff: begin D[0] := AnsiChar($C0 or (c4 shr 6)); D[1] := AnsiChar($80 or (c4 and $3F)); inc(D,2); end; UTF16_HISURROGATE_MIN..UTF16_LOSURROGATE_MAX: if PWord(P+5)^=ord('\')+ord('u') shl 8 then begin inc(P,6); surrogate := (ConvertHexToBin[ord(P[1])] shl 12)+ (ConvertHexToBin[ord(P[2])] shl 8)+ (ConvertHexToBin[ord(P[3])] shl 4)+ ConvertHexToBin[ord(P[4])]; // optimistic approach case c4 of // inlined UTF16CharToUtf8() UTF16_HISURROGATE_MIN..UTF16_HISURROGATE_MAX: c4 := ((c4-$D7C0)shl 10)+(surrogate xor UTF16_LOSURROGATE_MIN); UTF16_LOSURROGATE_MIN..UTF16_LOSURROGATE_MAX: c4 := ((surrogate-$D7C0)shl 10)+(c4 xor UTF16_LOSURROGATE_MIN); end; case c4 of 0..$7ff: b := 2; $800..$ffff: b := 3; $10000..$1FFFFF: b := 4; $200000..$3FFFFFF: b := 5; else b := 6; end; for j := b-1 downto 1 do begin D[j] := AnsiChar((c4 and $3f)+$80); c4 := c4 shr 6; end; D^ := AnsiChar(Byte(c4) or UTF8_FIRSTBYTE[b]); inc(D,b); end else begin D^ := '?'; // unexpected surrogate without its pair inc(D); end; else begin D[0] := AnsiChar($E0 or (c4 shr 12)); D[1] := AnsiChar($80 or ((c4 shr 6) and $3F)); D[2] := AnsiChar($80 or (c4 and $3F)); inc(D,3); end; end; inc(P,5); continue; end; end; end; end; D^ := '?'; // bad formated hexa number -> '?0123' end; else D^ := P^; // litterals: '\"' -> '"' |
Changes to SynSelfTests.pas.
5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 |
Check(J='{"name":"john","year":1982,"pi":3.14159}'); JSONDecode(J,['year','pi','john','name'],V); Check(length(V)=4); Check(V[0]='1982'); Check(V[1]='3.14159'); Check(V[2]=nil); Check(V[3]='john'); J := JSONEncode(['name','john','ab','[','a','b',']']); Check(J='{"name":"john","ab":["a","b"]}'); J := JSONEncode(['name','john','ab','[','a','b']); Check(J='{"name":"john","ab":["a","b"]}'); J := JSONEncode(['name','john','ab','[']); Check(J='{"name":"john","ab":[]}'); J := JSONEncode(['name','john','ab','{']); |
> > > > > > > > |
5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 |
Check(J='{"name":"john","year":1982,"pi":3.14159}'); JSONDecode(J,['year','pi','john','name'],V); Check(length(V)=4); Check(V[0]='1982'); Check(V[1]='3.14159'); Check(V[2]=nil); Check(V[3]='john'); J := '{surrogate:"\uD801\uDC00"}'; // see https://en.wikipedia.org/wiki/CESU-8 JSONDecode(J,['surrogate'],V); Check(length(V)=1); Check(StrLen(V[0])=4); Check(V[0][0]=#$F0); Check(V[0][1]=#$90); Check(V[0][2]=#$90); Check(V[0][3]=#$80); J := JSONEncode(['name','john','ab','[','a','b',']']); Check(J='{"name":"john","ab":["a","b"]}'); J := JSONEncode(['name','john','ab','[','a','b']); Check(J='{"name":"john","ab":["a","b"]}'); J := JSONEncode(['name','john','ab','[']); Check(J='{"name":"john","ab":[]}'); J := JSONEncode(['name','john','ab','{']); |
Changes to SynopseCommit.inc.
1 |
'1.18.2733'
|
| |
1 |
'1.18.2734'
|