mORMot and Open Source friends
Check-in [f7705237a4]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:{2734} ensure GetJSONField() would handle UTF-16 surrogate pairs incoming as \u####\u#### escapes - very unlikely, but may result into CESU-8 decoded content - included basic regression test from https://en.wikipedia.org/wiki/CESU-8
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f7705237a4d9e406eabd3974c49350d4e1c5d893
User & Date: ab 2016-06-11 08:54:30
Context
2016-06-11
09:44
add a missing *.res files for a all Samples to allow compile from a command line just after open a repository check-in: fa49a0a359 user: pavel.mash tags: trunk
08:54
{2734} ensure GetJSONField() would handle UTF-16 surrogate pairs incoming as \u####\u#### escapes - very unlikely, but may result into CESU-8 decoded content - included basic regression test from https://en.wikipedia.org/wiki/CESU-8 check-in: f7705237a4 user: ab tags: trunk
2016-06-10
13:37
{2733} refactored ObjectToJSONFile() to be a function returning the file writing result - as proposed by oz check-in: 47a6abc1e2 user: ab tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to SynCommons.pas.

45940
45941
45942
45943
45944
45945
45946
45947
45948
45949
45950
45951
45952
45953
45954
.....
46016
46017
46018
46019
46020
46021
46022
46023
46024
46025
46026
46027
46028
46029
46030
46031
46032
46033
46034
46035
46036
46037
46038
46039
46040
46041



46042
46043
46044



























46045




46046

46047
46048
46049

46050
46051
46052
46053
46054
46055
46056
46057
46058
46059
end;

/// decode a JSON field into an UTF-8 encoded buffer, stored inplace of JSON data
function GetJSONField(P: PUTF8Char; out PDest: PUTF8Char;
  wasString: PBoolean=nil; EndOfObject: PUTF8Char=nil): PUTF8Char;
// this code is very fast
var D: PUTF8Char;
    b,c4: integer;
label slash,num;
begin
  if wasString<>nil then
    wasString^ := false; // default is 'no string'
  PDest := nil; // PDest=nil indicates error or unexpected end (#0)
  result := nil;
  if P=nil then exit;
................................................................................
      case P^ of // unescape JSON string
        #0: exit; // to avoid potential buffer overflow issue for \#0
        'b': D^ := #08;
        't': D^ := #09;
        'n': D^ := #$0a;
        'f': D^ := #$0c;
        'r': D^ := #$0d;
        'u': begin // inlined decoding of '\0123' UTF-16 codepoint into UTF-8
          c4 := ConvertHexToBin[ord(P[1])];
          if c4<=15 then begin
            b := ConvertHexToBin[ord(P[2])];
            if b<=15 then begin
              c4 := c4 shl 4+b;
              b := ConvertHexToBin[ord(P[3])];
              if b<=15 then begin
                c4 := c4 shl 4+b;
                b := ConvertHexToBin[ord(P[4])];
                if b<=15 then begin
                  c4 := c4 shl 4+b;
                  if c4<>0 then begin
                    if c4<=$7F then begin
                      D^ := AnsiChar(c4);
                      inc(D);
                    end else
                    if c4>$7ff then begin
                      D^ := AnsiChar($E0 or (c4 shr 12));



                      D[1] := AnsiChar($80 or ((c4 shr 6) and $3F));
                      D[2] := AnsiChar($80 or (c4 and $3F));
                      inc(D,3);



























                    end else begin




                      D^ := AnsiChar($C0 or (c4 shr 6));

                      D[1] := AnsiChar($80 or (c4 and $3F));
                      inc(D,2);
                    end;

                    inc(P,5);
                    continue;
                  end;
                end;
              end;
            end;
          end;
          D^ := '?'; // bad formated hexa number -> '?0123'
        end;
        else D^ := P^; // litterals: '\"' -> '"'






|







 







|











|
|
|
|
|
|
|
>
>
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
|
>
|
|
|
>
|
|
<







45940
45941
45942
45943
45944
45945
45946
45947
45948
45949
45950
45951
45952
45953
45954
.....
46016
46017
46018
46019
46020
46021
46022
46023
46024
46025
46026
46027
46028
46029
46030
46031
46032
46033
46034
46035
46036
46037
46038
46039
46040
46041
46042
46043
46044
46045
46046
46047
46048
46049
46050
46051
46052
46053
46054
46055
46056
46057
46058
46059
46060
46061
46062
46063
46064
46065
46066
46067
46068
46069
46070
46071
46072
46073
46074
46075
46076
46077
46078
46079
46080
46081
46082
46083
46084
46085
46086
46087

46088
46089
46090
46091
46092
46093
46094
end;

/// decode a JSON field into an UTF-8 encoded buffer, stored inplace of JSON data
function GetJSONField(P: PUTF8Char; out PDest: PUTF8Char;
  wasString: PBoolean=nil; EndOfObject: PUTF8Char=nil): PUTF8Char;
// this code is very fast
var D: PUTF8Char;
    b,c4,surrogate,j: integer;
label slash,num;
begin
  if wasString<>nil then
    wasString^ := false; // default is 'no string'
  PDest := nil; // PDest=nil indicates error or unexpected end (#0)
  result := nil;
  if P=nil then exit;
................................................................................
      case P^ of // unescape JSON string
        #0: exit; // to avoid potential buffer overflow issue for \#0
        'b': D^ := #08;
        't': D^ := #09;
        'n': D^ := #$0a;
        'f': D^ := #$0c;
        'r': D^ := #$0d;
        'u': begin // inlined decoding of '\u0123' UTF-16 codepoint into UTF-8
          c4 := ConvertHexToBin[ord(P[1])];
          if c4<=15 then begin
            b := ConvertHexToBin[ord(P[2])];
            if b<=15 then begin
              c4 := c4 shl 4+b;
              b := ConvertHexToBin[ord(P[3])];
              if b<=15 then begin
                c4 := c4 shl 4+b;
                b := ConvertHexToBin[ord(P[4])];
                if b<=15 then begin
                  c4 := c4 shl 4+b;
                  case c4 of
                  0: begin
                    D^ := '?'; // \u0000 is an invalid value
                    inc(D);
                  end;
                  1..$7f: begin
                    D^ := AnsiChar(c4);
                    inc(D);
                  end;
                  $80..$7ff: begin
                    D[0] := AnsiChar($C0 or (c4 shr 6));
                    D[1] := AnsiChar($80 or (c4 and $3F));
                    inc(D,2);
                  end;
                  UTF16_HISURROGATE_MIN..UTF16_LOSURROGATE_MAX:
                    if PWord(P+5)^=ord('\')+ord('u') shl 8 then begin
                      inc(P,6);
                      surrogate := (ConvertHexToBin[ord(P[1])] shl 12)+
                                   (ConvertHexToBin[ord(P[2])] shl 8)+
                                   (ConvertHexToBin[ord(P[3])] shl 4)+
                                    ConvertHexToBin[ord(P[4])]; // optimistic approach
                      case c4 of // inlined UTF16CharToUtf8()
                      UTF16_HISURROGATE_MIN..UTF16_HISURROGATE_MAX:
                        c4 := ((c4-$D7C0)shl 10)+(surrogate xor UTF16_LOSURROGATE_MIN);
                      UTF16_LOSURROGATE_MIN..UTF16_LOSURROGATE_MAX:
                        c4 := ((surrogate-$D7C0)shl 10)+(c4 xor UTF16_LOSURROGATE_MIN);
                      end;
                      case c4 of
                      0..$7ff: b := 2;
                      $800..$ffff: b := 3;
                      $10000..$1FFFFF: b := 4;
                      $200000..$3FFFFFF: b := 5;
                      else b := 6;
                      end;
                      for j := b-1 downto 1 do begin
                        D[j] := AnsiChar((c4 and $3f)+$80);
                        c4 := c4 shr 6;
                      end;
                      D^ := AnsiChar(Byte(c4) or UTF8_FIRSTBYTE[b]);
                      inc(D,b);
                    end else begin
                      D^ := '?'; // unexpected surrogate without its pair
                      inc(D);
                    end;
                  else begin
                    D[0] := AnsiChar($E0 or (c4 shr 12));
                    D[1] := AnsiChar($80 or ((c4 shr 6) and $3F));
                    D[2] := AnsiChar($80 or (c4 and $3F));
                    inc(D,3);
                  end;
                  end;
                  inc(P,5);
                  continue;

                end;
              end;
            end;
          end;
          D^ := '?'; // bad formated hexa number -> '?0123'
        end;
        else D^ := P^; // litterals: '\"' -> '"'

Changes to SynSelfTests.pas.

5650
5651
5652
5653
5654
5655
5656








5657
5658
5659
5660
5661
5662
5663
  Check(J='{"name":"john","year":1982,"pi":3.14159}');
  JSONDecode(J,['year','pi','john','name'],V);
  Check(length(V)=4);
  Check(V[0]='1982');
  Check(V[1]='3.14159');
  Check(V[2]=nil);
  Check(V[3]='john');








  J := JSONEncode(['name','john','ab','[','a','b',']']);
  Check(J='{"name":"john","ab":["a","b"]}');
  J := JSONEncode(['name','john','ab','[','a','b']);
  Check(J='{"name":"john","ab":["a","b"]}');
  J := JSONEncode(['name','john','ab','[']);
  Check(J='{"name":"john","ab":[]}');
  J := JSONEncode(['name','john','ab','{']);






>
>
>
>
>
>
>
>







5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
  Check(J='{"name":"john","year":1982,"pi":3.14159}');
  JSONDecode(J,['year','pi','john','name'],V);
  Check(length(V)=4);
  Check(V[0]='1982');
  Check(V[1]='3.14159');
  Check(V[2]=nil);
  Check(V[3]='john');
  J := '{surrogate:"\uD801\uDC00"}'; // see https://en.wikipedia.org/wiki/CESU-8
  JSONDecode(J,['surrogate'],V);
  Check(length(V)=1);
  Check(StrLen(V[0])=4);
  Check(V[0][0]=#$F0);
  Check(V[0][1]=#$90);
  Check(V[0][2]=#$90);
  Check(V[0][3]=#$80);
  J := JSONEncode(['name','john','ab','[','a','b',']']);
  Check(J='{"name":"john","ab":["a","b"]}');
  J := JSONEncode(['name','john','ab','[','a','b']);
  Check(J='{"name":"john","ab":["a","b"]}');
  J := JSONEncode(['name','john','ab','[']);
  Check(J='{"name":"john","ab":[]}');
  J := JSONEncode(['name','john','ab','{']);

Changes to SynopseCommit.inc.

1
'1.18.2733'
|
1
'1.18.2734'