You are not logged in.
Pages: 1
Hi, Arnaud.
Here is my module for fast conversion
//Sha 2012
unit SynConversionTables;
interface
uses
Windows, // GetACP
SysUtils; // PWordArray
type
PConversionTable= ^TConversionTable;
TConversionTable= packed record
WideToAnsiW: packed array[0..127] of word;
WideToAnsiA: packed array[0..127] of byte;
WideToAnsiCount: integer;
CodePage: integer; //table code page
pDataExt: pWordArray;
DataCount: integer;
AnsiToWide: packed array[0..255] of word;
end;
var //READ ONLY VARS
pSynTableDefault: PConversionTable= nil; //table for default code page
pSynTable1252: PConversionTable= nil; //win1252 Latin table
pSynTable1251: PConversionTable= nil; //win1251 Cyrillic table
type
PtrInt= {$ifdef UNICODE} NativeInt {$else} integer {$endif};
//extended version of WinAnsiTableSortedFind
function FindAnsiChar(wc: cardinal; pTable: PConversionTable): PtrInt;
//get table by index
function GetSynTable(i: integer): PConversionTable;
//test of conversion tables
function TestSynTables: boolean;
implementation
const
Win1252Ext: packed array[0..31] of word = (
8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376);
Win1251Ext: packed array[0..127] of word = (
1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225, 8364, 8240, 1033, 8249, 1034, 1036, 1035, 1039,
1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119,
160, 1038, 1118, 1032, 164, 1168, 166, 167, 1025, 169, 1028, 171, 172, 173, 174, 1031,
176, 177, 1030, 1110, 1169, 181, 182, 183, 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111,
1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103);
var
SynDefaultExt: packed array[0..127] of word;
SynDefaultCP: integer= 0; //default code page
SynTables: array of array of word;
function FindAnsiChar(wc: cardinal; pTable: PConversionTable): PtrInt;
var
Cur, Left, Right: PtrInt;
begin;
//fast search of main 32 letters for win1251
if (pTable=pSynTable1251) and (cardinal(wc-1040)<32) then begin;
Result:=wc-848;
exit;
end;
Right:=pTable.WideToAnsiCount;
Left:=0;
while Left<Right do begin;
Cur:=(Left + Right) shr 1;
if wc>pTable.WideToAnsiW[Cur] then Right:=Cur else Left:=Cur + 1;
end;
dec(Right);
if (Right>=0) and (wc=pTable.WideToAnsiW[Right])
then Result:=pTable.WideToAnsiA[Right]
else Result:=-1;
end;
function GetSynTable(i: integer): PConversionTable;
begin;
if (i>=0) and (i<Length(SynTables))
then Result:=@SynTables[i,0]
else Result:=nil;
end;
function TestFindChars(pTable: PConversionTable): integer;
var
i: integer;
begin;
Result:=0;
for i:=0 to pTable.DataCount-1 do if pTable.pDataExt[i]>255 then begin;
dec(Result);
if FindAnsiChar(pTable.pDataExt[i],pTable)<>i+128 then exit;
end;
Result:=-Result;
end;
function TestCountChars(pTable: PConversionTable): integer;
var
i: integer;
begin;
Result:=0;
for i:=$100 to $FFFF do if FindAnsiChar(i,pTable)>=0 then inc(Result);
end;
function TestSynTables: boolean;
var
i, FoundAll, CountAll: integer;
pTable: PConversionTable;
begin;
Result:=true;
for i:=0 to Length(SynTables)-1 do begin;
pTable:=@SynTables[i,0];
FoundAll:=TestFindChars(pTable);
CountAll:=TestCountChars(pTable);
Result:=Result and (FoundAll>0) and (CountAll=FoundAll);
end;
end;
procedure AddConversionTable(CodePage: integer; pDataExt: PWordArray; DataCount: integer);
var
save: array[0..127] of cardinal;
tmp: cardinal;
i, len, max: integer;
pTable: PConversionTable;
begin;
if (CodePage<=0) or (DataCount<=0) or (DataCount>128) then exit;
len:=Length(SynTables);
for i:=0 to len-1 do begin;
pTable:=@SynTables[i,0];
if pTable.CodePage=CodePage then exit;
end;
SetLength(SynTables,len+1);
SetLength(SynTables[len], SizeOf(TConversionTable) div SizeOf(word));
pTable:=@SynTables[len,0];
pTable.CodePage:=CodePage;
pTable.pDataExt:=pDataExt;
pTable.DataCount:=DataCount;
for i:=0 to 255 do pTable.AnsiToWide[i]:=i;
len:=0;
for i:=0 to DataCount-1 do if pDataExt[i]>255 then inc(len);
pTable.WideToAnsiCount:=len;
len:=0;
max:=0;
for i:=DataCount-1 downto 0 do begin;
pTable.AnsiToWide[i+128]:=pDataExt[i];
if pDataExt[i]>255 then begin;
save[len]:=integer(pDataExt[i]) shl 8 or (i+128);
if save[max]<save[len] then max:=len;
inc(len);
end;
end;
dec(len); // last index
// insertion sort of save[0..len]
tmp:=save[0]; save[0]:=save[max]; save[max]:=tmp;
i:=1;
while i<len do begin;
inc(i);
tmp:=save[i];
if tmp>save[i-1] then begin;
max:=i;
repeat;
save[max]:=save[max-1];
dec(max);
until tmp<=save[max-1];
save[max]:=tmp;
end;
end;
for i:=0 to len do begin;
pTable.WideToAnsiW[i]:=save[i] shr 8;
pTable.WideToAnsiA[i]:=byte(save[i]);
end;
if CodePage=1252 then pSynTable1252:=pTable;
if CodePage=1251 then pSynTable1251:=pTable;
if CodePage=SynDefaultCP then pSynTableDefault:=pTable;
end;
function InitConversionTables: boolean;
var
c: array[0..127] of byte;
i: integer;
begin;
SynDefaultCP:=GetACP;
AddConversionTable(1252, @Win1252Ext[0], Length(Win1252Ext)); //Latin
AddConversionTable(1251, @Win1251Ext[0], Length(Win1251Ext)); //Cyrillic
if pSynTableDefault=nil then begin;
for i:=0 to 127 do c[i]:=i+128;
MultiByteToWideChar(SynDefaultCP,0,@c[0],128,@SynDefaultExt[0],128);
AddConversionTable(SynDefaultCP, @SynDefaultExt[0], 128);
end;
Result:=(pSynTableDefault<>nil) and TestSynTables;
end;
procedure FinalConversionTables;
begin;
SynDefaultCP:=0;
pSynTableDefault:=nil;
pSynTable1252:=nil;
pSynTable1251:=nil;
SynTables:=nil;
end;
initialization
InitConversionTables;
finalization
FinalConversionTables;
end.
I suggest to replace WinAnsiTableSortedFind(wc) with FindAnsiChar(wc,pSynTable1252),
for example:
//Sha: new version
function WideCharToWinAnsiChar(wc: cardinal): AnsiChar;
begin // code generated for this function is very fast
if wc<256 then
//if WinAnsiTable[wc]<256 then begin
if pSynTable1252.AnsiToWide[wc]<256 then begin //Sha: use new table
result := AnsiChar(wc);
exit;
end else begin
result := ' '; // invalid ansi char for this code page (e.g. #128)
exit;
end else begin // wc>255:
//wc := WinAnsiTableSortedFind(wc);
wc:=FindAnsiChar(wc, pSynTable1252); //Sha: use new version of search
if integer(wc)>=0 then
result := AnsiChar(byte(wc)) else
result := ' '; // space for invalid wide char
exit;
end;
end;
//Sha: new version
function WideCharToWinAnsi(wc: cardinal): integer;
begin
if wc<256 then
//if WinAnsiTable[wc]<256 then
if pSynTable1252.AnsiToWide[wc]<256 then //Sha: use new table
result := wc else
result := -1 else // invalid ansi char for this code page (e.g. #128)
//result := WinAnsiTableSortedFind(wc);
result := FindAnsiChar(wc, pSynTable1252); //Sha: use new version of search
end;
//Sha: new version
function IsWinAnsiU(UTF8Text: PUTF8Char): boolean;
var c: Cardinal;
begin
result := false;
if UTF8Text<>nil then
repeat
c := byte(UTF8Text^); inc(UTF8Text);
if c=0 then break else
if c and $80=0 then
continue else begin
if UTF8Text^=#0 then break;
if c and $20=0 then begin
c := c shl 6+byte(UTF8Text^)-$00003080; inc(UTF8Text);
end else begin
c := c shl 6+byte(UTF8Text^); inc(UTF8Text);
if UTF8Text^=#0 then break;
c := c shl 6+byte(UTF8Text^)-$000E2080; inc(UTF8Text);
end;
if c>255 then begin
//if WinAnsiTableSortedFind(c)<0 then
if FindAnsiChar(c, pSynTable1252)<0 then //Sha: use new version of search
exit; // invalid char in the WinAnsi code page
end else
//if WinAnsiTable[c]>255 then
if pSynTable1252.AnsiToWide[c]>255 then //Sha: use new table
exit; // invalid char in the WinAnsi code page
end;
until false;
result := true;
end;
//Sha: new version
function UTF8ToWinPChar(dest: PAnsiChar; source: PUTF8Char; count: integer): integer;
var c: cardinal;
begd: PAnsiChar;
endSource: PUTF8Char;
begin
result := 0;
if source=nil then exit;
begd := dest;
endSource := source+count;
repeat
c := byte(source^); inc(source);
if byte(c) and $80=0 then begin
dest^ := AnsiChar(byte(c)); inc(dest);
if source<endsource then continue else break;
end else begin
if source>=endsource then break;
if c and $20=0 then begin
c := c shl 6+byte(source^)-$00003080; inc(source);
if c and $ffffff00=0 then begin
//if WinAnsiTable[c]>255 then
if pSynTable1252.AnsiToWide[c]>255 then //Sha: use new table
dest^ := ' ' else // invalid char in the WinAnsi code page
dest^ := AnsiChar(c);
inc(dest); // #128..#255 -> direct copy
if source<endsource then continue else break;
end;
end else begin
c := c shl 6+byte(source^); inc(source);
if source>=endsource then break;
c := c shl 6+byte(source^)-$000E2080; inc(source);
end;
// #256.. -> slower but accurate conversion
//c := WinAnsiTableSortedFind(c);
c := FindAnsiChar(c, pSynTable1252); //Sha: use new version of search
if integer(c)>=0 then begin
dest^ := AnsiChar(Byte(c)); // don't add invalid wide char
inc(dest);
end;
if source>=endsource then break;
end;
until false;
result := dest-begd;
end;
//Sha: new version
procedure UTF8ToShortString(var dest: shortstring; source: PUTF8Char);
var c: cardinal;
len: integer;
begin
len := 0;
if source<>nil then
repeat
c := byte(source^); inc(source);
if c=0 then break else
if c and $80=0 then begin
inc(len); dest[len] := AnsiChar(c);
if len<255 then continue else break;
end else begin
if source^=#0 then break;
if c and $20=0 then begin
c := c shl 6+byte(source^)-$00003080; inc(source);
end else begin
c := c shl 6+byte(source^); inc(source);
if source^=#0 then break;
c := c shl 6+byte(source^)-$000E2080; inc(source);
end;
// #256.. -> slower but accurate conversion
inc(len);
//c := WinAnsiTableSortedFind(c);
c := FindAnsiChar(c, pSynTable1252); //Sha: use new version of search
if integer(c)<0 then
c := ord('?');
dest[len] := AnsiChar(byte(c)); // #128..#255 -> direct copy
if len<255 then continue else break;
end;
until false;
dest[0] := AnsiChar(len);
end;
//Sha: new version
procedure RawUnicodeToWinPChar(dest: PAnsiChar; source: PWideChar; WideCharCount: Integer);
var i: integer;
wc: integer;
begin
for i := 0 to WideCharCount-1 do begin
wc := integer(source[i]);
if wc<256 then
//if WinAnsiTable[wc]<256 then
if pSynTable1252.AnsiToWide[wc]<256 then //Sha: use new table
dest[i] := AnsiChar(wc) else
dest[i] := ' ' else begin
//wc := WinAnsiTableSortedFind(wc);
wc := FindAnsiChar(wc, pSynTable1252); //Sha: use new version of search
if integer(wc)>=0 then
dest[i] := AnsiChar(byte(wc)) else
dest[i] := ' '; // space for invalid wide char
end;
end;
end;
It is easy to create new fast common functions w/o Windows API
by adding parameter, for example
//Sha: new function
function WideCharToSynAnsiChar(wc: cardinal; pSynTable: PConversionTable): AnsiChar;
begin // code generated for this function is very fast
if wc<256 then
if pSynTable.AnsiToWide[wc]<256 then begin
result := AnsiChar(wc);
exit;
end else begin
result := ' '; // invalid ansi char for this code page (e.g. #128)
exit;
end else begin // wc>255:
wc:=FindAnsiChar(wc, pSynTable);
if integer(wc)>=0 then
result := AnsiChar(byte(wc)) else
result := ' '; // space for invalid wide char
exit;
end;
end;
Unit has internal full self-test:
//how to validate all tables in use
procedure TForm1.bValidateClick(Sender: TObject);
const
msg: array[boolean] of string= ('failed', 'passed');
begin;
Memo1.Lines.Add('Test of conversion tables ' + msg[TestSynTables]);
end;
It is easy to add support for other code pages.
Just copy/paste data from TMemo to the unit.
//how to fill your default table
procedure TForm1.bShowWideClick(Sender: TObject);
var
c: array[0..127] of byte;
w: array[0..127] of word;
i: integer;
begin;
for i:=0 to 127 do c[i]:=i+128;
MultiByteToWideChar(GetACP,0,@c[0],128,@w[0],128);
i:=0;
while i<=128-16 do begin;
Memo1.Lines.Add(Format('{%d:} %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, ',
[i+128, w[i+0],w[i+1],w[i+2], w[i+3], w[i+4], w[i+5], w[i+6], w[i+7],
w[i+8],w[i+9],w[i+10],w[i+11],w[i+12],w[i+13],w[i+14],w[i+15]]));
i:=i+16;
end;
end;
Offline
Thanks.
I'll see how to add this without breaking the existing features.
But IMHO the Windows APIs are not so slow, when it deals with CP 1251 and such.
All framework core is already optimized for UTF-8 encoding, so those conversions will take place only before calling the UI part.
So I suspect the current implementation is not slow.
Offline
I have tested some new functions. They are faster than framework's ones even on WinAnsiStrings.
I hope I will write full set of Ansi/Unicode/UTF8 conversions in a week.
Last edited by Sha (2012-01-23 18:03:16)
Offline
Hi
New unicode functions are here
Some coments (russian) are here
Timings at E6850, table headers:
Charset 1251 - russian text
Charset 1252 - english (ASCII) text
Charset 1252* - english (ASCII) text, but first char is russian
Sha - functions from ShaUnicode.pas
Syn - functions from SynCommons.pas
WinAnsi - special functions from SynCommons.pas for WinAnsiString
AnsiToUnicode time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 563 562 547
WinAnsi 766
Syn 734 734 735
UnicodeToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 500 531 515
WinAnsi 938
Syn 828 1703 813
UnicodeToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 563 422 547
Syn 734 703 719
AnsiToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 594 219 594
WinAnsi 593
Syn 1469 1438 1453
Utf8ToUnicode time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 547 594 563
Syn 671 641 672
Utf8ToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 687 219 703
WinAnsi 594
Syn 1047 1015 1032
Results at i5-2300
AnsiToUnicode time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 468 452 468
WinAnsi 0 718 0
Syn 686 702 702
UnicodeToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 390 375 390
WinAnsi 0 936 0
Syn 982 1685 983
UnicodeToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 530 406 530
Syn 702 702 702
AnsiToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 562 171 562
WinAnsi 0 546 0
Syn 1404 1388 1404
Utf8ToUnicode time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 499 530 499
Syn 609 608 609
Utf8ToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 655 218 640
WinAnsi 0 562 0
Syn 1201 1185 1186
Some bugs fixed in ShaUnicode.pas 2012-02-05
Last edited by Sha (2012-02-06 10:38:17)
Offline
Thanks a lot for sharing your code!
Some remarks:
- ShaAnsiToUnicode() will be correct only for 7 bits ascii - so I guess this is about Ansi7ToString() which is not often called so I did not modified it;
- I've updated RawUnicodeToUtf8(), WinAnsiBufferToUtf8(), UTF8ToWideChar(), UTF8ToWinPChar() to handle any trailing 7 bit ASCII AnsiChars, by pairs - this is a very nice trick in practice;
- I did not introduce CP 1252 specific optimization yet, since I'd like to implement a clean class-driven approach here - I've added it to the mORMot roadmap.
Offline
Thanks a lot for sharing your code!
Some remarks:
- ShaAnsiToUnicode() will be correct only for 7 bits ascii - so I guess this is about Ansi7ToString() which is not often called so I did not modified it;
- I've updated RawUnicodeToUtf8(), WinAnsiBufferToUtf8(), UTF8ToWideChar(), UTF8ToWinPChar() to handle any trailing 7 bit ASCII AnsiChars, by pairs - this is a very nice trick in practice;
- I did not introduce CP 1252 specific optimization yet, since I'd like to implement a clean class-driven approach here - I've added it to the mORMot roadmap.
- ShaAnsiToUnicode() uses pointer to the translation table as well as all Ansi-functions from ShaUnicode.pas. So it trasforms correctly any Ansi-characters. You just need to call the function with pConvLatin or pConvDefault as second parameter.
- I think it is simple to use new unicode functions from ShaUnicode.pas changing *all* calls in SynCommons.pas:
WinAnsiToRawUnicode/StringToRawUnicode/... --> ShaAnsiToUnicode
RawUnicodeToWinAnsi/RawUnicodeToString/... --> ShaUnicodeToAnsi
WinAnsiToUTF8/AnsiCharToUTF8/... --> ShaAnsiToUTF8
RawUnicodeToUTF8/... --> ShaUnicodeToUTF8
UTF8DecodeToRawUnicode/... --> ShaUTF8ToUnicode
UTF8ToString/... --> ShaUTF8ToAnsi
- Main optimization for win1252 and user default code page is done in ShaUnicode.pas. Of course, we always may do more.
Offline
- ShaAnsiToUnicode() uses pointer to the translation table as well as all Ansi-functions from ShaUnicode.pas. So it trasforms correctly any Ansi-characters. You just need to call the function with pConvLatin or pConvDefault as second parameter.
Indeed, I overlooked the code and did not see the 64 KB of lookup table defined.
The upcoming TSynAnsiConvert class will use similar tables.
Offline
That's it.
I've done a huge code refactoring to include a generic optimized way of converting Ansi content using several charsets/codepages.
I tried to add all your speed ups (if not too specific) into the framework.
I've created two new TSynAnsiConvert and TSynAnsiFixedWidth classes, able to process Unicode to/from Ansi conversion in all possible code pages, with generic access methods and optimized handling of fixed width encodings.
See http://synopse.info/fossil/info/fef5fd8640
Code page 1251 will now be as fast as possible.
Due to this code refactoring, e.g. in SQLite3i18n, some methods have disappeared.
Offline
It is just an implementation pattern.
GetMem() + FreeMem() with no try..finally blocks "could" be a little less expensive than a local string allocation, which always generates an implicit try...finally block by the compiler.
So if the stack buffer is used, there won't be no try...finally block generated.
Such trick should work only if you are sure that no exception will raise within the process - or you may leak memory (FreeMem is not called if there is an exception risen).
Nothing definitive. Just something I wanted to code like that.
Offline
SetString() will create a new allocation buffer, whereas SetLength() will reuse an existing one.
It means that SetString() will never move data, whereas SetLength() could, if the string was already containing something.
Most of the time, we will overwrite the string content, so move() the data is just a time waste.
In case of a function returning a string, the "result" variable is in fact passed by reference: that is, the previous content is available. Therefore, SetLength() of a huge result could unnecessarily move a lot of data.
That's why I try:
- either set the string to '' before SetLength()
- either use SetString() with an existing buffer or with nil, which will in fact set to '' then call SetLength()
I've posted this on the blog - see http://blog.synopse.info/post/2012/02/0 … -SetString
Offline
Yes, I see :-)
But the question is
why sometimes the framework calculates data in the buffer and then copies data to result string,
and sometimes it calculates data in the result string and then reallocates it?
Offline
Due to FastMM4 (the memory manager) implementation, reallocation is sometimes a no cost operation (it is a so-called in-place reallocation).
The safer is IMHO to use SetString() or set to '' then call SetLength().
Calculating data in the result string is not so bad, since it won't create any temporary try..finally block, whereas using a stack-based buffer will.
Like always, profiling on real application will make the difference, here...
But I suspect that most process will use the fixed stack-based temporary memory (for a text with up to 255 WideChars), and won't require any heap allocation.
About the new TSynAnsiConvert classes, what is nice with it is that it centralizes all OS-specific API calls within the class.
This will make conversion to Mac OS X (or Linux) easier.
Offline
Speed tests (2 times each test)
of new ShaUnicode functions (unbuffered version 2012-02-09)
and new framework functions at E6850
AnsiToUnicode time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 562 563 562
WinAnsi 735
Syn 734 735 734
Sha 562 563 547
WinAnsi 734
Syn 750 735 734
UnicodeToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 500 531 531
WinAnsi 813
Syn 1109 1109 1110
Sha 500 531 531
WinAnsi 813
Syn 1109 1109 1110
UnicodeToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 2266 359 485
Syn 2734 360 718
Sha 2266 359 469
Syn 2734 375 704
AnsiToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 2547 140 485
WinAnsi 218
Syn 3422 203 329
Sha 2546 141 484
WinAnsi 219
Syn 3422 203 328
Utf8ToUnicode time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 2109 453 406
Syn 2500 516 687
Sha 2110 453 406
Syn 2500 516 687
Utf8ToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 2297 141 609
WinAnsi 219
Syn 3047 218 329
Sha 2250 156 609
WinAnsi 203
Syn 3063 219 328
Code page 1252 is very fast, but 1251 is not.
Offline
The code for CP 1251 and 1252 is exactly the same: both use the TSynAnsiFixedWidth class, and the same algorithms.
I guess something is wrong... somewhere.
Could you provide some sample code?
Here are my tests results:
CodePage 1250 UTF8:95.83ms Unicode:44.02ms
CodePage 1251 UTF8:95.97ms Unicode:44.15ms
CodePage 1252 UTF8:94.41ms Unicode:43.99ms
CodePage 1253 UTF8:94.16ms Unicode:43.78ms
CodePage 1254 UTF8:94.15ms Unicode:44.53ms
CodePage 1255 UTF8:94.13ms Unicode:44.13ms
CodePage 1256 UTF8:94.27ms Unicode:44.02ms
CodePage 1257 UTF8:94.57ms Unicode:43.78ms
CodePage 1250 UTF8:94.39ms Unicode:44.08ms
CodePage 1251 UTF8:94.31ms Unicode:44.14ms
CodePage 1252 UTF8:94.49ms Unicode:44.00ms
CodePage 1253 UTF8:94.13ms Unicode:43.92ms
CodePage 1254 UTF8:94.22ms Unicode:43.95ms
CodePage 1255 UTF8:94.31ms Unicode:43.78ms
CodePage 1256 UTF8:94.94ms Unicode:44.09ms
CodePage 1257 UTF8:94.42ms Unicode:43.81ms
With the following code, which will convert, for all those code pages, some random string to/from UTF8/Unicode.
Each time, 10001 strings of 0..1250 characters long are converted.
procedure TestCP;
var T: TPrecisionTimer;
C: TSynAnsiConvert;
CP,i: Integer;
ST: array[0..10000] of RawByteString;
begin
for i := 0 to high(ST) do
ST[i] := TSynTestCase.RandomString(i shr 3);
for CP := 1250 to 1257 do begin
write('CodePage ',CP,' UTF8:');
C := TSynAnsiConvert.Engine(CP);
T.Start;
for i := 0 to high(ST) do
Assert(C.UTF8ToAnsi(C.AnsiToUTF8(ST[i]))=ST[i]);
Write(T.Stop,' Unicode:');
T.Start;
for i := 0 to high(ST) do
Assert(C.RawUnicodeToAnsi(C.AnsiToRawUnicode(ST[i]))=ST[i]);
Writeln(T.Stop);
end;
end;
You can see that the conversion speed is very consistent.
I guess there is something wrong in your test code.
Offline
I have used russian and english texts. Very simple tests. Some of them are here.
var
//137 chars
s1251: RawByteString='Лишь годные дятлы собираются в стаи, юникодом пугая мозги января. Их песни не стихнут, они не устанут. А елка как кактус беспокоит меня. ';
s1252: RawByteString='Only woodpeckers gather in flights, they frighten with unicode the brains of January. They were not tired, their songs will not abate. ';
a1251, a1252, a255: RawByteString;
u1251, u1252, u255: RawUnicode;
t1251, t1252, t255: RawUTF8;
Ticks: array[0..18] of cardinal;
TicksLast: integer;
Iterations: integer;
function NextTimer: integer;
begin;
inc(TicksLast);
if TicksLast<=High(Ticks) then Ticks[TicksLast]:=GetTickCount;
Result:=Iterations;
end;
function FirstTimer: integer;
begin;
TicksLast:=-1;
Result:=NextTimer;
end;
//--------------------------------------------------------------------------------------------------
procedure TForm1.FormCreate(Sender: TObject);
var
i: integer;
begin;
Iterations:=10000;
a1251:=s1251;
a1252:=s1252;
{}
for i:=1 to 8 do begin;
a1251:=a1251 + a1251;
a1252:=a1252 + a1252;
end;
//35209 chars
a1251:=a1251 + s1251;
a1252:=a1252 + s1252;
{}
a255:=a1252; a255[1]:=#255;
u1251:=ShaAnsiToUnicode(a1251,pConvDefault);
u1252:=ShaAnsiToUnicode(a1252,pConvDefault);
u255 :=ShaAnsiToUnicode(a255, pConvDefault);
t1251:=ShaUnicodeToUTF8(u1251);
t1252:=ShaUnicodeToUTF8(u1252);
t255 :=ShaUnicodeToUTF8(u255);
if (ShaUnicodeToAnsi(u1251,pConvDefault)<>a1251)
or (ShaUTF8ToUnicode(t1251)<>u1251)
or (ShaAnsiToUTF8(a1251,pConvDefault)<>t1251)
or (ShaUTF8ToAnsi(t1251,pConvDefault)<>a1251)
then ShowMessage('Error in FormCreate');
end;
//--------------------------------------------------------------------------------------------------
procedure TForm1.ShowResult(const Title: string);
begin;
Memo1.Lines.Add ('');
Memo1.Lines.Add (' ' + Title + ' time, ms');
Memo1.Lines.Add ('============================');
Memo1.Lines.Add (' Charset');
Memo1.Lines.Add ('Functions 1251 1252 1252*');
Memo1.Lines.Add ('----------------------------');
Memo1.Lines.Add(Format('Sha %5d %5d %5d',[Ticks[01]-Ticks[00],
Ticks[02]-Ticks[01],
Ticks[03]-Ticks[02]]));
Memo1.Lines.Add(Format('WinAnsi %5d %5d %5d',[Ticks[04]-Ticks[03],
Ticks[05]-Ticks[04],
Ticks[06]-Ticks[05]]));
Memo1.Lines.Add(Format('Syn %5d %5d %5d',[Ticks[07]-Ticks[06],
Ticks[08]-Ticks[07],
Ticks[09]-Ticks[08]]));
Memo1.Lines.Add(Format('Sha %5d %5d %5d',[Ticks[10]-Ticks[09],
Ticks[11]-Ticks[10],
Ticks[12]-Ticks[11]]));
Memo1.Lines.Add(Format('WinAnsi %5d %5d %5d',[Ticks[13]-Ticks[12],
Ticks[14]-Ticks[13],
Ticks[15]-Ticks[14]]));
Memo1.Lines.Add(Format('Syn %5d %5d %5d',[Ticks[16]-Ticks[15],
Ticks[17]-Ticks[16],
Ticks[18]-Ticks[17]]));
end;
//--------------------------------------------------------------------------------------------------
procedure TForm1.bAnsiToUtf8Click(Sender: TObject);
var
t: RawUTF8;
i: integer;
begin;
i:=FirstTimer;
repeat;
repeat;
t:=ShaAnsiToUtf8(a1251,pConvDefault);
t:=ShaAnsiToUtf8(a1251,pConvDefault);
dec(i); until i=0; i:=NextTimer;
repeat;
t:=ShaAnsiToUtf8(a1252,pConvDefault);
t:=ShaAnsiToUtf8(a1252,pConvDefault);
dec(i); until i=0; i:=NextTimer;
repeat;
t:=ShaAnsiToUtf8(a255,pConvDefault);
t:=ShaAnsiToUtf8(a255,pConvDefault);
dec(i); until i=0; i:=NextTimer;
repeat;
dec(i); until i=0; i:=NextTimer;
repeat;
t:=WinAnsiToUtf8(a1252);
t:=WinAnsiToUtf8(a1252);
dec(i); until i=0; i:=NextTimer;
repeat;
dec(i); until i=0; i:=NextTimer;
repeat;
AnsiCharToUTF8(pointer(a1251),length(a1251),t,1251);
AnsiCharToUTF8(pointer(a1251),length(a1251),t,1251);
dec(i); until i=0; i:=NextTimer;
repeat;
AnsiCharToUTF8(pointer(a1252),length(a1252),t,1251);
AnsiCharToUTF8(pointer(a1252),length(a1252),t,1251);
dec(i); until i=0; i:=NextTimer;
repeat;
AnsiCharToUTF8(pointer(a255),length(a255),t,1251);
AnsiCharToUTF8(pointer(a255),length(a255),t,1251);
dec(i); until i=0; i:=NextTimer;
until TicksLast>9;
ShowResult('AnsiToUtf8');
end;
//--------------------------------------------------------------------------------------------------
procedure TForm1.bUTF8ToAnsiClick(Sender: TObject);
var
t: AnsiString;
i: integer;
begin;
i:=FirstTimer;
repeat;
repeat;
t:=ShaUTF8ToAnsi(t1251,pConvDefault);
t:=ShaUTF8ToAnsi(t1251,pConvDefault);
dec(i); until i=0; i:=NextTimer;
repeat;
t:=ShaUTF8ToAnsi(t1252,pConvDefault);
t:=ShaUTF8ToAnsi(t1252,pConvDefault);
dec(i); until i=0; i:=NextTimer;
repeat;
t:=ShaUTF8ToAnsi(t255,pConvDefault);
t:=ShaUTF8ToAnsi(t255,pConvDefault);
dec(i); until i=0; i:=NextTimer;
repeat;
dec(i); until i=0; i:=NextTimer;
repeat;
t:=Utf8ToWinAnsi(t1252);
t:=Utf8ToWinAnsi(t1252);
dec(i); until i=0; i:=NextTimer;
repeat;
dec(i); until i=0; i:=NextTimer;
repeat;
t:=Utf8ToString(t1251);
t:=Utf8ToString(t1251);
dec(i); until i=0; i:=NextTimer;
repeat;
t:=Utf8ToString(t1252);
t:=Utf8ToString(t1252);
dec(i); until i=0; i:=NextTimer;
repeat;
t:=Utf8ToString(t255);
t:=Utf8ToString(t255);
dec(i); until i=0; i:=NextTimer;
until TicksLast>9;
ShowResult('Utf8ToAnsi');
end;
Offline
Your code is a bit difficult to follow.
And I do not understand lines like this:
AnsiCharToUTF8(pointer(a1252),length(a1252),t,1251);
... 1252 or 1251?
In all cases, GetTickCount is not a very good idea for benchmarking.
You should better use a high resolution timer (like our TPrecisionTimer object).
You should better not use AnsiCharToUTF8() but e.g. directly CurrentAnsiConvert.AnsiToUTF8(a1251) if the current ansi code page is 1251.
Better speed will be achieved with AnsiToUTF8/UTF8ToAnsi and RawUnicodeToAnsi/AnsiToRawUnicode methods.
Those are the the direct entry points of the framework.
Offline
My default code page is 1251. It can be used for test with all russian and english (ASCII) texts.
I think 15 ms resolution of GetTickCount is sufficient in our case.
Changing to CurrentAnsiConvert.AnsiToUTF8()/UTF8ToAnsi() shows the same speed.
AnsiToUtf8 time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 2547 140 594
WinAnsi 0 203 0
Syn 3422 203 313
Sha 2547 140 594
WinAnsi 0 203 0
Syn 3422 203 313
Utf8ToAnsi time, ms
============================
Charset
Functions 1251 1252 1252*
----------------------------
Sha 2297 141 609
WinAnsi 0 219 0
Syn 3047 203 344
Sha 2234 156 610
WinAnsi 0 203 0
Syn 3062 203 329
Last edited by Sha (2012-02-09 21:15:33)
Offline
You are not measuring the same content.
So your benchmark is a bit difficult to read.
If you use plain random text, there is no diff between the code pages in our implementation.
It is very stable, and can handle any kind of content - it is always better to benchmark with random data instead of fixed data.
Of course, best pattern would be indeed real data (e.g. from a big text, like a whole Bible) in the real language.
The sample test I provide above (easy to follow) proves it.
Offline
I use text of same length in both languages (Russian and English). Measure differs in 15 times.
So in real applications for long texts it is better to use win1251 coding but not utf8. It makes difficult using of JSON in my case.
Offline
With the following code:
procedure TestCP;
var T: TPrecisionTimer;
C: TSynAnsiConvert;
CP,i: Integer;
ST: array[0..10000] of RawByteString;
Sha: PConvTable;
begin
for i := 0 to high(ST) do
ST[i] := TSynTestCase.RandomString(i shr 3);
for CP := 1250 to 1257 do begin
writeln('CodePage ',CP);
Write(' WinAPI UTF8:');
C := TSynAnsiConvert.Create(CP);
T.Start;
for i := 0 to high(ST) do
Assert(C.UTF8ToAnsi(C.AnsiToUTF8(ST[i]))=ST[i]);
Write(T.Stop,' Unicode:');
T.Start;
for i := 0 to high(ST) do
Assert(C.RawUnicodeToAnsi(C.AnsiToRawUnicode(ST[i]))=ST[i]);
Writeln(T.Stop);
C.Free;
Sha := GetShaConvTable(CP);
write(' Sha UTF8:');
T.Start;
for i := 0 to high(ST) do
Assert(ShaUTF8ToAnsi(ShaAnsiToUTF8(ST[i],Sha),Sha)=ST[i]);
Write(T.Stop,' Unicode:');
T.Start;
for i := 0 to high(ST) do
Assert(ShaUnicodeToAnsi(ShaAnsiToUnicode(ST[i],Sha),Sha)=ST[i]);
writeln(T.Stop);
write(' Syn UTF8:');
C := TSynAnsiConvert.Engine(CP);
T.Start;
for i := 0 to high(ST) do
Assert(C.UTF8ToAnsi(C.AnsiToUTF8(ST[i]))=ST[i]);
Write(T.Stop,' Unicode:');
T.Start;
for i := 0 to high(ST) do
Assert(C.RawUnicodeToAnsi(C.AnsiToRawUnicode(ST[i]))=ST[i]);
Writeln(T.Stop);
end;
end;
I noticed that your routines are a bit faster than mine.
But also that it works only with (CP=1250) or (CP=1251) or (CP=1254) or (CP=1256).
I've got access violation otherwise. I suspect there are some issues in your UTF8 conversion code.
Here are the scores with random text (i.e. a more aggressive test than with true text):
CodePage 1250
WinAPI UTF8:117.78ms Unicode:44.48ms
Sha UTF8:79.80ms Unicode:18.98ms
Syn UTF8:89.67ms Unicode:25.75ms
CodePage 1251
WinAPI UTF8:116.97ms Unicode:44.54ms
Sha UTF8:80.15ms Unicode:18.73ms
Syn UTF8:90.04ms Unicode:25.75ms
CodePage 1252
WinAPI UTF8:116.15ms Unicode:43.64ms
Syn UTF8:89.62ms Unicode:25.63ms
CodePage 1253
WinAPI UTF8:116.59ms Unicode:44.40ms
Syn UTF8:89.23ms Unicode:25.98ms
CodePage 1254
WinAPI UTF8:117.03ms Unicode:44.83ms
Sha UTF8:79.82ms Unicode:18.66ms
Syn UTF8:89.49ms Unicode:25.88ms
CodePage 1255
WinAPI UTF8:117.06ms Unicode:44.42ms
Syn UTF8:89.54ms Unicode:25.64ms
CodePage 1256
WinAPI UTF8:116.54ms Unicode:44.47ms
Sha UTF8:80.01ms Unicode:18.83ms
Syn UTF8:88.97ms Unicode:25.94ms
CodePage 1257
WinAPI UTF8:116.63ms Unicode:44.64ms
Syn UTF8:89.59ms Unicode:26.04ms
For UTF8, Windows APIs are not so bad, after all... What was slow is Unicode/UTF8 encoding in this case - and this part is sharing the same encoding routine.
For Unicode, we are about 2 times faster than Windows API.
In all cases, UTF-8 is acceptable for French or such (with an accent every one and then, but mostly latin chars).
Of course, UTF-8 is more verbose with Code Page 1251, when most chars are not ASCII 7.
It still makes sense if your UTF-8 has some field names or spaces, like when an object is serialized.
In all cases, the speed bottleneck of our framework is clearly not in the UTF-8 encoding any more.
Offline
I noticed that your routines are a bit faster than mine.
But also that it works only with (CP=1250) or (CP=1251) or (CP=1254) or (CP=1256).
I've got access violation otherwise. I suspect there are some issues in your UTF8 conversion code.
I have validated all my functions for CP 1251 using code
function RandomString(MaxCharCount: Integer): RawByteString;
var
CharCount, CharRange: integer;
P: PAnsiChar;
begin;
// CharRange:=96 + 32; //ASCII + #128..159
CharRange:=96 + 128; //ASCII + #128..255
CharCount:=1+Random(MaxCharCount);
SetString(Result, nil, CharCount);
P:=pointer(Result);
while CharCount>0 do begin;
dec(CharCount); P[CharCount]:=AnsiChar(Random(CharRange)+32); //starting from #32
end;
end;
function Validate(const s: RawByteString; Status: integer=0): integer;
var
s2: RawByteString;
u, u2: RawUnicode;
t, t2: RawUTF8;
Len, Len2, Len8, TestNo: integer;
begin;
u:='';
t:='';
Len8:=0;
Len:=Length(s);
if Len>0 then begin;
SetLength(u,Len*2); u[Len+2]:=#0;
MultiByteToWideChar(GetACP, 0, pointer(s), Len, pointer(u), Len);
Len8:=WideCharToMultiByte(CP_UTF8, 0, pointer(u), Len, nil, 0, nil, nil);
SetLength(t, Len8);
WideCharToMultiByte(CP_UTF8, 0, pointer(u), Len, pointer(t), Len8, nil, nil);
end;
TestNo:=1;
if TestNo and Status=0 then begin;
u2:=ShaAnsiToUnicode(s, pConvDefault);
if u2=u then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
t2:=ShaUnicodeToUTF8(u);
if t2=t then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
s2:=ShaUnicodeToAnsi(u, pConvDefault);
if s2=s then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
u2:=ShaUTF8ToUnicode(t);
if u2=u then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
t2:=ShaAnsiToUTF8(s, pConvDefault);
if t2=t then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
s2:=ShaUTF8ToAnsi(t, pConvDefault);
if s2=s then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
Len2:=ShaUnicodeToUTF8(pointer(u), Len);
if Len2=Len8 then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
Len2:=ShaAnsiToUTF8(pointer(s), Len, pConvDefault);
if Len2=Len8 then Status:=Status or TestNo;
end;
TestNo:=TestNo*2;
if TestNo and Status=0 then begin;
Len2:=ShaUTF8ToAnsi(pointer(t), Len8);
if Len2=Len then Status:=Status or TestNo;
end;
while TestNo<>0 do begin;
TestNo:=TestNo*2;
Status:=Status or TestNo;
end;
Result:=Status;
end;
procedure TForm1.bValidateClick(Sender: TObject);
var
s: RawByteString;
i, len, ErrorCount, Status: integer;
begin;
ErrorCount:=0;
i:=0;
len:=0;
repeat;
s:=RandomString(len);
Status:=Validate(s);
if Status<>-1 then begin;
inc(ErrorCount);
Validate(s,Status);
end;
len:=32;
if i>100000 then len:=8*1024;
inc(i);
until i>150000;
Memo1.Lines.Add(Format('Validation done, %d errors',[ErrorCount]));
end;
Also I have tested them for CP 1251 and 1252 as you do
function TSynTestCaseRandomString(CharCount: Integer): RawByteString;
var V: cardinal;
P: PAnsiChar;
begin
SetString(result,nil,CharCount);
P := pointer(Result);
while CharCount>0 do begin
if CharCount>5 then begin
V := Random(maxInt); // fast: one random compute per 5 chars
P[0] := AnsiChar(32+V and 127); V := V shr 7;
P[1] := AnsiChar(32+V and 127); V := V shr 7;
P[2] := AnsiChar(32+V and 127); V := V shr 7;
P[3] := AnsiChar(32+V and 127); V := V shr 7;
P[4] := AnsiChar(65+V);
Inc(P,5);
dec(CharCount,5);
end else begin
P^ := AnsiChar(32+Random(224));
inc(P);
dec(CharCount);
end;
end;
end;
procedure TestCP;
var
CP, i: Integer;
ST: array[0..10000] of RawByteString;
Sha: PConvTable;
begin
for i := 0 to high(ST) do ST[i] := TSynTestCaseRandomString(i shr 3);
for CP:=1251 to 1252 do begin;
if CP=1251 then Sha:=pConvDefault else Sha:=pConvLatin;
for i := 0 to high(ST) do
if ShaUTF8ToAnsi(ShaAnsiToUTF8(ST[i],Sha),Sha)<>ST[i] then begin;
Form1.Memo1.Lines.Add('ShaAnsiToUTF8'+IntToStr(CP));
Form1.Memo1.Lines.Add(ST[i]);
break;
end;
for i := 0 to high(ST) do
if ShaUnicodeToAnsi(ShaAnsiToUnicode(ST[i],Sha),Sha)<>ST[i] then begin;
Form1.Memo1.Lines.Add('ShaAnsiToUnicode'+IntToStr(CP));
Form1.Memo1.Lines.Add(ST[i]);
break;
end;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
begin
TestCP;
Form1.Memo1.Lines.Add('TestCP done');
end;
No errors found in all tests.
Can you please point more details about AV for CP 1252.
Offline
Just use my above benchmark code, and you'll find out that it it works only with (CP=1250) or (CP=1251) or (CP=1254) or (CP=1256).
So may be problem in your above benchmark code? ;-)
Or you tested old my functions?
I haven't such function
Sha := GetShaConvTable(CP);
as well as conversion tables for all these CPs.
Could you provide minimal code that I could reproduce AV?
Offline
I do not have the GetShaConvTable() code here.
It is just something which will create a pCOnvTable instance correspodning to the CP.
The error raised in case of some random text length.
I tested ShaUnicode_0.pas as far as I remember.
Offline
Ok. Then let's try my code.
I have tested functons from current version of ShaUnicode
using 1000 random CP and 10000 random strings.
No error. You can repeat this. All code you need is below. You need also Button and Memo on the Form.
procedure FillRandomConvTable(pTable: PConvTable);
const
DefaultConvChar = 32;
var
i, n: NInt;
begin;
pTable.CodePage:=0;
for i:=0 to 127 do begin;
pTable.WideToAnsi[i]:=i;
pTable.AnsiToWide[i]:=i;
end;
for i:=128 to $FFFF do pTable.WideToAnsi[i]:=DefaultConvChar;
for i:=128 to 255 do begin;
repeat;
if i<128+64 then n:=Random($07FF - 255) + 256
else n:=Random($FFFF - $07FF) + $0800;
until pTable.WideToAnsi[n]=DefaultConvChar;
pTable.WideToAnsi[n]:=i;
pTable.AnsiToWide[i]:=n;
end;
for i:=0 to 255 do begin;
n:=pTable.AnsiToWide[i];
if n<=127 then n:=0
else if n<=$7FF then n:=1
else n:=2;
pTable.AnsiToUTF8Len[i]:=n;
end;
end;
function TSynTestCaseRandomString(CharCount: Integer): RawByteString;
var V: cardinal;
P: PAnsiChar;
begin
SetString(result,nil,CharCount);
P := pointer(Result);
while CharCount>0 do begin
if CharCount>5 then begin
V := Random(maxInt); // fast: one random compute per 5 chars
P[0] := AnsiChar(32+V and 127); V := V shr 7;
P[1] := AnsiChar(32+V and 127); V := V shr 7;
P[2] := AnsiChar(32+V and 127); V := V shr 7;
P[3] := AnsiChar(32+V and 127); V := V shr 7;
P[4] := AnsiChar(65+V);
Inc(P,5);
dec(CharCount,5);
end else begin
P^ := AnsiChar(32+Random(224));
inc(P);
dec(CharCount);
end;
end;
end;
procedure TestCP;
var
ST: array[0..10000] of RawByteString;
CP, i: integer;
Table: TConvTable;
begin
for i := 0 to high(ST) do ST[i] := TSynTestCaseRandomString(i shr 3);
for CP:=0 to 999 do begin;
FillRandomConvTable(@Table);
for i := 0 to high(ST) do
if ShaUTF8ToAnsi(ShaAnsiToUTF8(ST[i],@Table),@Table)<>ST[i] then begin;
Form1.Memo1.Lines.Add('ShaAnsiToUTF8 '+IntToStr(CP));
Form1.Memo1.Lines.Add(ST[i]);
break;
end;
for i := 0 to high(ST) do
if ShaUnicodeToAnsi(ShaAnsiToUnicode(ST[i],@Table),@Table)<>ST[i] then begin;
Form1.Memo1.Lines.Add('ShaAnsiToUnicode '+IntToStr(CP));
Form1.Memo1.Lines.Add(ST[i]);
break;
end;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
begin
TestCP;
Form1.Memo1.Lines.Add('TestCP done');
end;
Offline
Pages: 1