You are not logged in.
Pages: 1
Hi ab, I am missing a routine for parsing a string containing CSV data.
Possibly, I'll got disapproval since SynCommons unit provides a GetNextItem routine that returns the next CSV from a string. This works well on a single line of CSV and could also be done with CSVToRawUTF8DynArray or DelimitedText of TStrings.
But you will struggle with parsing a file that has more than one record.
JSON content is not a problem because line feed and carriage return in a JSON string will be escaped. In this case, you will be fine with the GetNextItemTrimedCRLF routine in SynCommons that returns the next CSV record. Anyway, you could fail with other content. This is, because a quoted value in CSV may contain a CR or LF (see RFC 4180 text/csv).
I guess, it wouldn't be a good idea to parse a record after searching for the end of it (CRLF) in a CSV file – parsing the record twice. But if a GetNextItemCRLF would return the next value and the end of a record you could parse a file like this
s := '"first'#10'row"'#9'2nd col'#13#10'2nd row'#9'end'#13#10;
p := PUTF8Char(s);
RecNo := 1;
repeat
GetNextItemCRLF(p, #9, '"', Value, @eol);
{do something with Value}
if eol then Inc(RecNo);
until (p=nil)or(p^=#0)or(p^=#26);
Besides, you could safely parse a range from a spreadsheet you got with Clipboard.AsText that was put to clipboard from Libre Office Calc or MS Excel. It would be a small piece of code for parsing CSV if you image how CSV parsers out there take a sledgehammer to crack a nut.
A GetNextItemCRLF routine derived from GetNextItem and GetNextItemStringCRLF could look as follows:
procedure GetNextItemCRLF(var P: PUTF8Char; Sep: AnsiChar; var result: RawUTF8;
eol: PBoolean = nil); overload;
var S,E: PUTF8Char;
begin
if eol<>nil then
eol^:= true;
if P=nil then
result := '' else begin
S := P;
while (S^<>#0) and (S^<>Sep) and (S^<>#10) do
inc(S);
E := S;
if (E>P) and (E[-1]=#13) then
dec(E);
FastSetString(result,P,E-P);
if S^<>#0 then begin
if (eol<>nil)and(S^<>#10) then
eol^:= False;
P := S+1
end else
P := nil;
end;
end;
procedure GetNextItemCRLF(var P: PUTF8Char; Sep, Quote: AnsiChar;
var result: RawUTF8; eol: PBoolean = nil); overload;
begin
if eol<>nil then
eol^:= true;
if P=nil then
result := ''
else if P^=Quote then begin
P := UnQuoteSQLStringVar(P,result);
if P=nil then
result := ''
else if P^<>#0 then begin
if P^=#13 then
inc(P);
if (P^=Sep)and(eol<>nil) then
eol^:= False;
inc(P);
end;
end else
GetNextItemCRLF(P,Sep,result,eol);
end;
function GetNextItemStringCRLF(var P: PChar; Sep: Char;
eol: PBoolean = nil): String; overload;
var S,E: PChar;
begin
if eol<>nil then
eol^:= true;
if P=nil then
result := '' else begin
S := P;
while (S^<>#0) and (S^<>Sep) and (S^<>#10) do
inc(S);
E := S;
if (E>P) and (E[-1]=#13) then
dec(E);
SetString(result,P,E-P);
if S^<>#0 then begin
if (eol<>nil)and(S^<>#10) then
eol^:= False;
P := S+1
end else
P := nil;
end;
end;
function GetNextItemStringCRLF(var P: PChar; Sep, Quote: Char;
eol: PBoolean = nil): String; overload;
begin
if eol<>nil then
eol^:= true;
if P=nil then
result := ''
else if P^=Quote then begin
result:= {SysUtils.}AnsiExtractQuotedStr(P,Quote);
if P=nil then
result := ''
else if P^<>#0 then begin
if P^=#13 then
inc(P);
if (P^=Sep)and(eol<>nil) then
eol^:= False;
inc(P);
end;
end else
result:= GetNextItemStringCRLF(P,Sep,eol);
end;
AnsiExtractQuotedStr was used for the String version since there was no matching version of UnQuoteSQLStringVar.
I would appreciate if such a routine could be included to SynCommons unit and mormot.core.text respectively.
Offline
Pages: 1