unit superxmlparser; {$IFDEF FPC} {$MODE OBJFPC}{$H+} {$ENDIF} interface uses superobject, classes; type TOnProcessingInstruction = procedure(const PI, PIParent: ISuperObject); function XMLParseString(const data: SOString; pack: Boolean = false; onpi: TOnProcessingInstruction = nil): ISuperObject; function XMLParseStream(stream: TStream; pack: Boolean = false; onpi: TOnProcessingInstruction = nil): ISuperObject; function XMLParseFile(const FileName: string; pack: Boolean = false; onpi: TOnProcessingInstruction = nil): ISuperObject; const xmlname = '#name'; xmlattributes = '#attributes'; xmlchildren = '#children'; dtdname = '#name'; dtdPubidLiteral = '#pubidliteral'; dtdSystemLiteral = '#systemliteral'; implementation uses sysutils {$IFNDEF UNIX}, windows{$ENDIF}; const XML_SPACE : PSOChar = #32; // XML_ARL: PSOChar = '['; XML_ARR: PSOChar = ']'; XML_BIG: PSOChar = '>'; XML_LOW: PSOChar = '<'; XML_AMP: PSOChar = '&'; XML_SQU: PSOChar = ''''; XML_DQU: PSOChar = '"'; type TSuperXMLState = ( xsStart, // | xsEatSpaces, // xsElement, // <| xsElementName, // <[a..z]| xsAttributes, // ..<| xsCloseElementName, // ..| xsElementString, // |azer xsElementComment, // xsElementPI, // xsElementCDATA, // xsEscape, // &| xsEscape_lt, // &l|t; xsEscape_gt, // &g|t; xsEscape_amp, // &a|mp; xsEscape_apos, // &a|pos; xsEscape_quot, // &q|uot; xsEscape_char, // &#|; xsEscape_char_num, // |123456; xsEscape_char_hex, // &#x|000FFff; xsEnd); TSuperXMLError = (xeSuccess, xeContinue, xeProcessInst, xeError); TSuperXMLElementClass = (xcNone, xcElement, xcComment, xcString, xcCdata, xcDocType, xcProcessInst); TSuperXMLEncoding = ({$IFNDEF UNIX}xnANSI,{$ENDIF} xnUTF8, xnUnicode); PSuperXMLStack = ^TSuperXMLStack; TSuperXMLStack = record state: TSuperXMLState; savedstate: TSuperXMLState; prev: PSuperXMLStack; next: PSuperXMLStack; clazz: TSuperXMLElementClass; obj: ISuperObject; end; TSuperXMLParser = class private FStack: PSuperXMLStack; FDocType: ISuperObject; FError: TSuperXMLError; FStr: TSuperWriterString; FValue: TSuperWriterString; FPosition: Integer; FAChar: SOChar; FPack: Boolean; procedure StackUp; procedure StackDown; procedure Reset; function ParseBuffer(data: PSOChar; var PI, PIParent: ISuperObject; len: Integer = -1): Integer; public constructor Create(pack: Boolean); destructor Destroy; override; end; { TXMLContext } constructor TSuperXMLParser.Create(pack: Boolean); begin FDocType := nil; FStr := TSuperWriterString.Create; FValue := TSuperWriterString.Create; StackUp; FError := xeSuccess; FPack := pack; end; destructor TSuperXMLParser.Destroy; begin while FStack <> nil do StackDown; FStr.Free; FValue.Free; end; procedure TSuperXMLParser.Reset; begin while FStack <> nil do StackDown; StackUp; FError := xeSuccess; end; function TSuperXMLParser.ParseBuffer(data: PSOChar; var PI, PIParent: ISuperObject; len: integer): Integer; const spaces = [#32,#9,#10,#13]; alphas = ['a'..'z', 'A'..'Z', '_', ':', #161..#255]; nums = ['0'..'9', '.', '-']; hex = nums + ['a'..'f','A'..'F']; alphanums = alphas + nums; publitteral = [#32, #13, #10, 'a'..'z', 'A'..'Z', '0'..'9', '-', '''', '"', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*', '#', '@', '$', '_', '%']; function hexdigit(const x: SOChar): byte; begin if x <= '9' then Result := byte(x) - byte('0') else Result := (byte(x) and 7) + 9; end; procedure putchildrenstr; var anobject: ISuperObject; begin anobject := FStack^.obj.AsObject[xmlchildren]; if anobject = nil then begin anobject := TSuperObject.Create(stArray); FStack^.obj.AsObject[xmlchildren] := anobject; end; anobject.AsArray.Add(TSuperObject.Create(FValue.Data)); end; procedure AddProperty(const parent, value: ISuperObject; const name: SOString); var anobject: ISuperObject; arr: ISuperObject; begin anobject := parent.AsObject[name]; if anobject = nil then parent.AsObject[name] := value else begin if (anobject.DataType = stArray) then anobject.AsArray.Add(value) else begin arr := TSuperObject.Create(stArray); arr.AsArray.Add(anobject); arr.AsArray.Add(value); parent.AsObject[name] := arr; end; end; end; procedure packend; var anobject, anobject2: ISuperObject; n: Integer; begin anobject := FStack^.obj.AsObject[xmlchildren]; if (anobject <> nil) and (anobject.AsArray.Length = 1) and (anobject.AsArray[0].DataType = stString) then begin if FStack^.obj.AsObject.count = 2 then // name + children begin if FStack^.prev <> nil then AddProperty(FStack^.prev^.obj, anobject.AsArray[0], FStack^.obj.AsObject.S[xmlname]) else begin AddProperty(FStack^.obj, anobject.AsArray[0], '#text'); FStack^.obj.AsObject.Delete(xmlchildren); end; end else begin AddProperty(FStack^.obj, anobject.AsArray[0], FStack^.obj.AsObject.S[xmlname]); FStack^.obj.AsObject.Delete(xmlchildren); if FStack^.prev <> nil then AddProperty(FStack^.prev^.obj, FStack^.obj, FStack^.obj.AsObject.S[xmlname]) else FStack^.obj.AsObject.Delete(xmlchildren); FStack^.obj.AsObject.Delete(xmlname); end; end else begin if (anobject <> nil) then begin for n := 0 to anobject.AsArray.Length - 1 do begin anobject2 := anobject.AsArray[n]; if ObjectIsType(anobject2, stObject) then begin AddProperty(FStack^.obj, anobject2, anobject2.AsObject.S[xmlname]); anobject2.AsObject.Delete(xmlname); end else AddProperty(FStack^.obj, anobject2, '#text'); end; FStack^.obj.Delete(xmlchildren); end; if FStack^.prev <> nil then AddProperty(FStack^.prev^.obj, FStack^.obj, FStack^.obj.AsObject.S[xmlname]); FStack^.obj.Delete(xmlname); end; end; var c: SOChar; read: Integer; p: PSOChar; anobject: ISuperObject; label redo, err; begin p := data; read := 0; //Result := 0; repeat if (read = len) then begin if (FStack^.prev = nil) and ((FStack^.state = xsEnd) or ((FStack^.state = xsEatSpaces) and (FStack^.savedstate = xsEnd))) then begin if FPack then packend; FError := xeSuccess; end else FError := xeContinue; Result := read; exit; end; c := p^; redo: case FStack^.state of xsEatSpaces: if {$IFDEF UNICODE}(c < #256) and {$ENDIF} (AnsiChar(c) in spaces) then {nop} else begin FStack^.state := FStack^.savedstate; goto redo; end; xsStart: case c of '<': FStack^.state := xsElement; else goto err; end; xsElement: begin case c of '?': begin FStack^.savedstate := xsStart; FStack^.state := xsEatSpaces; StackUp; FStr.Reset; FStack^.state := xsElementPI; FStack^.clazz := xcProcessInst; end; '!': begin FPosition := 0; FStack^.state := xsElementComment; FStack^.clazz := xcComment; end; else if ((c < #256) and (AnsiChar(c) in alphas)) or (c >= #256) then begin FStr.Reset; FStack^.state := xsElementName; FStack^.clazz := xcElement; goto redo; end else goto err; end; end; xsElementPI: begin if ((c < #256) and (AnsiChar(c) in alphanums)) or (c >= #256) then FStr.Append(@c, 1) else begin FStack^.obj := TSuperObject.Create(stObject); FStack^.obj.AsObject.S[xmlname] := FStr.Data; FStack^.state := xsEatSpaces; if FStr.Data = 'xml' then FStack^.savedstate := xsAttributes else begin FValue.Reset; FStack^.savedstate := xsElementDataPI; end; goto redo; end; end; xsElementDataPI: begin case c of '?': begin FStack^.obj.AsObject.S['data'] := FValue.Data; FStack^.state := xsCloseElementPI; end; else FValue.Append(@c, 1); end; end; xsCloseElementPI: begin if (c <> '>') then goto err; PI := FStack^.obj; StackDown; PIParent := FStack^.obj; FError := xeProcessInst; Result := read + 1; Exit; end; xsElementName: begin if ((c < #256) and (AnsiChar(c) in alphanums)) or (c >= #256) then FStr.Append(@c, 1) else begin FStack^.obj := TSuperObject.Create(stObject); FStack^.obj.AsObject.S[xmlname] := FStr.Data; FStack^.state := xsEatSpaces; FStack^.savedstate := xsAttributes; goto redo; end; end; xsChildren: begin case c of '<': FStack^.state := xsTryCloseElement; else FValue.Reset; FStack^.state := xsElementString; FStack^.clazz := xcString; goto redo; end; end; xsCloseEmptyElement: begin case c of '>': begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsEnd; end else goto err; end; end; xsTryCloseElement: begin case c of '/': begin FStack^.state := xsCloseElementName; FPosition := 0; FStr.Reset; FStr.Append(PSoChar(FStack^.obj.AsObject.S[xmlname])); end; '!': begin FPosition := 0; FStack^.state := xsElementComment; FStack^.clazz := xcComment; end; '?': begin FStack^.savedstate := xsChildren; FStack^.state := xsEatSpaces; StackUp; FStr.Reset; FStack^.state := xsElementPI; FStack^.clazz := xcProcessInst; end else FStack^.state := xsChildren; StackUp; if ((c < #256) and (AnsiChar(c) in alphas)) or (c >= #256) then begin FStr.Reset; FStack^.state := xsElementName; FStack^.clazz := xcElement; goto redo; end else goto err; end; end; xsCloseElementName: begin if FStr.Position = FPosition then begin FStack^.savedstate := xsCloseEmptyElement; FStack^.state := xsEatSpaces; goto redo; end else begin if (c <> FStr.Data[FPosition]) then goto err; inc(FPosition); end; end; xsAttributes: begin case c of '?': begin if FStack^.clazz <> xcProcessInst then goto err; FStack^.state := xsCloseElementPI; end; '/': begin FStack^.state := xsCloseEmptyElement; end; '>': begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsChildren; end else if ((c < #256) and (AnsiChar(c) in alphas)) or (c >= #256) then begin FStr.Reset; FStr.Append(@c, 1); FStack^.state := xsAttributeName; end else goto err; end; end; xsAttributeName: begin if ((c < #256) and (AnsiChar(c) in alphanums)) or (c >= #256) then FStr.Append(@c, 1) else begin // no duplicate attribute if FPack then begin if FStack^.obj.AsObject[FStr.Data] <> nil then goto err; end else begin anobject := FStack^.obj.AsObject[xmlattributes]; if (anobject <> nil) and (anobject.AsObject[FStr.Data] <> nil) then goto err; end; FStack^.state := xsEatSpaces; FStack^.savedstate := xsEqual; goto redo; end; end; xsEqual: begin if c <> '=' then goto err; FStack^.state := xsEatSpaces; FStack^.savedstate := xsAttributeValue; FValue.Reset; FPosition := 0; FAChar := #0; end; xsAttributeValue: begin if FAChar <> #0 then begin if (c = FAChar) then begin if FPack then begin FStack^.obj.AsObject[FStr.Data] := TSuperObject.Create(Fvalue.Data); end else begin anobject := FStack^.obj.AsObject[xmlattributes]; if anobject = nil then begin anobject := TSuperObject.Create(stObject); FStack^.obj.AsObject[xmlattributes] := anobject; end; anobject.AsObject[FStr.Data] := TSuperObject.Create(Fvalue.Data); end; FStack^.savedstate := xsAttributes; FStack^.state := xsEatSpaces; end else case c of '&': begin FStack^.state := xsEscape; FStack^.savedstate := xsAttributeValue; end; #13, #10: begin FValue.TrimRight; FValue.Append(XML_SPACE, 1); FStack^.state := xsEatSpaces; FStack^.savedstate := xsAttributeValue; end; else FValue.Append(@c, 1); end; end else begin if (c < #256) and (AnsiChar(c) in ['"', '''']) then begin FAChar := c; inc(FPosition); end else goto err; end; end; xsElementString: begin case c of '<': begin FValue.TrimRight; putchildrenstr; FStack^.state := xsTryCloseElement; end; #13, #10: begin FValue.TrimRight; FValue.Append(XML_SPACE, 1); FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementString; end; '&': begin FStack^.state := xsEscape; FStack^.savedstate := xsElementString; end else FValue.Append(@c, 1); end; end; xsElementComment: begin case FPosition of 0: begin case c of '-': Inc(FPosition); '[': begin FValue.Reset; FPosition := 0; FStack^.state := xsElementCDATA; FStack^.clazz := xcCdata; end; 'D': begin if (FStack^.prev = nil) and (FDocType = nil) then begin FStack^.state := xsElementDocType; FPosition := 0; FStack^.clazz := xcDocType; end else goto err; end; else goto err; end; end; 1: begin if c <> '-' then goto err; Inc(FPosition); end; else if c = '-' then begin FPosition := 0; FStack^.state := xsCloseElementComment; end; end; end; xsCloseElementComment: begin case FPosition of 0: begin if c <> '-' then begin FPosition := 2; FStack^.state := xsElementComment; end else Inc(FPosition); end; 1: begin if c <> '>' then goto err; FStack^.state := xsEatSpaces; if FStack^.obj <> nil then FStack^.savedstate := xsChildren else FStack^.savedstate := xsStart; end; end; end; xsElementCDATA: begin case FPosition of 0: if (c = 'C') then inc(FPosition) else goto err; 1: if (c = 'D') then inc(FPosition) else goto err; 2: if (c = 'A') then inc(FPosition) else goto err; 3: if (c = 'T') then inc(FPosition) else goto err; 4: if (c = 'A') then inc(FPosition) else goto err; 5: if (c = '[') then inc(FPosition) else goto err; else case c of ']': begin FPosition := 0; FStack^.state := xsClodeElementCDATA; end; else FValue.Append(@c, 1); end; end; end; xsClodeElementCDATA: begin case FPosition of 0: if (c = ']') then inc(FPosition) else begin FValue.Append(XML_ARR, 1); FValue.Append(@c, 1); FPosition := 6; FStack^.state := xsElementCDATA; end; 1: case c of '>': begin putchildrenstr; FStack^.state := xsEatSpaces; FStack^.savedstate := xsChildren; end; ']': begin FValue.Append(@c, 1); end; else FValue.Append(@c, 1); FStack^.state := xsElementCDATA; end; end; end; xsElementDocType: begin case FPosition of 0: if (c = 'O') then inc(FPosition) else goto err; 1: if (c = 'C') then inc(FPosition) else goto err; 2: if (c = 'T') then inc(FPosition) else goto err; 3: if (c = 'Y') then inc(FPosition) else goto err; 4: if (c = 'P') then inc(FPosition) else goto err; 5: if (c = 'E') then inc(FPosition) else goto err; else if (c < #256) and (AnsiChar(c) in spaces) then begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementDocTypeName; FStr.Reset; end else goto err; end; end; xsElementDocTypeName: begin case FStr.Position of 0: begin case c of '>': begin FStack^.state := xsEatSpaces; FStack^.state := xsStart; FStack^.clazz := xcNone; end else if ((c < #256) and (AnsiChar(c) in alphas)) or (c > #256) then FStr.Append(@c, 1) else goto err; end; end; else if ((c < #256) and (AnsiChar(c) in alphanums)) or (c > #256) then FStr.Append(@c, 1) else if (c < #256) and (AnsiChar(c) in spaces) then begin FDocType := TSuperObject.Create(stObject); FDocType.AsObject.S[xmlname] := FStr.Data; FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementDocTypeExternId; end else goto err; end; end; xsElementDocTypeExternId: begin case c of 'P': begin FPosition := 0; FStack^.state := xsElementDocTypeExternIdPublic; end; 'S': begin FPosition := 0; FStack^.state := xsElementDocTypeExternIdSystem; end; '[': begin FStack^.savedstate := xsElementDocTypeIntSubset; FStack^.state := xsEatSpaces; end; '>': begin FStack^.savedstate := xsStart; FStack^.state := xsEatSpaces end else goto err; end; end; xsElementDocTypeExternIdPublic: begin case FPosition of 0: if (c = 'U') then inc(FPosition) else goto err; 1: if (c = 'B') then inc(FPosition) else goto err; 2: if (c = 'L') then inc(FPosition) else goto err; 3: if (c = 'I') then inc(FPosition) else goto err; 4: if (c = 'C') then inc(FPosition) else goto err; else if (c < #256) and (AnsiChar(c) in spaces) then begin FStr.Reset; FPosition := 0; FStack^.savedstate := xsElementDocTypePubIdLiteral; FStack^.state := xsEatSpaces; end else goto err; end; end; xsElementDocTypeExternIdSystem: begin case FPosition of 0: if (c = 'Y') then inc(FPosition) else goto err; 1: if (c = 'S') then inc(FPosition) else goto err; 2: if (c = 'T') then inc(FPosition) else goto err; 3: if (c = 'E') then inc(FPosition) else goto err; 4: if (c = 'M') then inc(FPosition) else goto err; else if (c < #256) and (AnsiChar(c) in spaces) then begin FStr.Reset; FPosition := 0; FStack^.savedstate := xsElementDocTypeSystemLiteral; FStack^.state := xsEatSpaces; end else goto err; end; end; xsElementDocTypePubIdLiteral: begin if FPosition = 0 then case c of '"', '''': begin FAChar := c; FPosition := 1; end else goto err; end else if c = FAChar then begin FDocType.AsObject.S[dtdPubidLiteral] := FStr.Data; FStr.Reset; FPosition := 0; FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementDocTypeSystemLiteral; end else if (c < #256) and (AnsiChar(c) in publitteral) then FStr.Append(@c, 1); end; xsElementDocTypeSystemLiteral: begin if FPosition = 0 then case c of '"', '''': begin FAChar := c; FPosition := 1; end else goto err; end else if c = FAChar then begin FDocType.AsObject.S[dtdSystemLiteral] := FStr.Data; FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementDocTypeTryIntSubset; end else FStr.Append(@c, 1); end; xsElementDocTypeTryIntSubset: begin case c of '>': begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsStart; FStack^.clazz := xcNone; end; '[': begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementDocTypeIntSubset; end; end; end; xsElementDocTypeIntSubset: begin case c of ']': begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsElementDocTypeTryClose; end; end; end; xsElementDocTypeTryClose: begin if c = '>' then begin FStack^.state := xsEatSpaces; FStack^.savedstate := xsStart; FStack^.clazz := xcNone; end else goto err; end; xsEscape: begin FPosition := 0; case c of 'l': FStack^.state := xsEscape_lt; 'g': FStack^.state := xsEscape_gt; 'a': FStack^.state := xsEscape_amp; 'q': FStack^.state := xsEscape_quot; '#': FStack^.state := xsEscape_char; else goto err; end; end; xsEscape_lt: begin case FPosition of 0: begin if c <> 't' then goto err; Inc(FPosition); end; 1: begin if c <> ';' then goto err; FValue.Append(XML_LOW, 1); FStack^.state := FStack^.savedstate; end; end; end; xsEscape_gt: begin case FPosition of 0: begin if c <> 't' then goto err; Inc(FPosition); end; 1: begin if c <> ';' then goto err; FValue.Append(XML_BIG, 1); FStack^.state := FStack^.savedstate; end; end; end; xsEscape_amp: begin case FPosition of 0: begin case c of 'm': Inc(FPosition); 'p': begin FStack^.state := xsEscape_apos; Inc(FPosition); end; else goto err; end; end; 1: begin if c <> 'p' then goto err; Inc(FPosition); end; 2: begin if c <> ';' then goto err; FValue.Append(XML_AMP, 1); FStack^.state := FStack^.savedstate; end; end; end; xsEscape_apos: begin case FPosition of 0: begin case c of 'p': Inc(FPosition); 'm': begin FStack^.state := xsEscape_amp; Inc(FPosition); end; else goto err; end; end; 1: begin if c <> 'o' then goto err; Inc(FPosition); end; 2: begin if c <> 's' then goto err; Inc(FPosition); end; 3: begin if c <> ';' then goto err; FValue.Append(XML_SQU, 1); FStack^.state := FStack^.savedstate; end; end; end; xsEscape_quot: begin case FPosition of 0: begin if c <> 'u' then goto err; Inc(FPosition); end; 1: begin if c <> 'o' then goto err; Inc(FPosition); end; 2: begin if c <> 't' then goto err; Inc(FPosition); end; 3: begin if c <> ';' then goto err; FValue.Append(XML_DQU, 1); FStack^.state := FStack^.savedstate; end; end; end; xsEscape_char: begin if (SOIChar(c) >= 256) then goto err; case AnsiChar(c) of '0'..'9': begin FPosition := SOIChar(c) - 48; FStack^.state := xsEscape_char_num; end; 'x': begin FStack^.state := xsEscape_char_hex; end else goto err; end; end; xsEscape_char_num: begin if (SOIChar(c) >= 256) then goto err; case AnsiChar(c) of '0'..'9':FPosition := (FPosition * 10) + (SOIChar(c) - 48); ';': begin FValue.Append(@FPosition, 1); FStack^.state := FStack^.savedstate; end; else goto err; end; end; xsEscape_char_hex: begin if (c >= #256) then goto err; if (AnsiChar(c) in hex) then begin FPosition := (FPosition * 16) + SOIChar(hexdigit(c)); end else if c = ';' then begin FValue.Append(@FPosition, 1); FStack^.state := FStack^.savedstate; end else goto err; end; xsEnd: begin if(FStack^.prev = nil) then Break; if FStack^.obj <> nil then begin if FPack then packend else begin anobject := FStack^.prev^.obj.AsObject[xmlchildren]; if anobject = nil then begin anobject := TSuperObject.Create(stArray); FStack^.prev^.obj.AsObject[xmlchildren] := anobject; end; anobject.AsArray.Add(FStack^.obj); end; end; StackDown; goto redo; end; end; inc(p); inc(read); until (c = #0); if FStack^.state = xsEnd then begin if FPack then packend; FError := xeSuccess; end else FError := xeError; Result := read; exit; err: FError := xeError; Result := read; end; function XMLParseFile(const FileName: string; pack: Boolean; onpi: TOnProcessingInstruction): ISuperObject; var stream: TFileStream; begin stream := TFileStream.Create(FileName, fmOpenRead, fmShareDenyWrite); try Result := XMLParseStream(stream, pack, onpi); finally stream.Free; end; end; procedure TSuperXMLParser.StackDown; var prev: PSuperXMLStack; begin if FStack <> nil then begin prev := FStack^.prev; FStack^.obj := nil; FreeMem(FStack); FStack := prev; if FStack <> nil then FStack^.next := nil; end; end; procedure TSuperXMLParser.StackUp; var st: PSuperXMLStack; begin {$IFDEF FPC} st := nil; {$ENDIF} GetMem(st, SizeOf(st^)); FillChar(st^, SizeOf(st^), 0); st^.state := xsEatSpaces; st^.savedstate := xsStart; st^.prev := FStack; if st^.prev <> nil then st^.prev^.next := st; st^.next := nil; st^.obj := nil; FStack := st; end; function utf8toucs2(src: PAnsiChar; srclen: Integer; dst: PWideChar; unused: PInteger): Integer; var ch: Byte; ret: Word; min: Cardinal; rem, com: integer; label redo; begin Result := 0; ret := 0; rem := 0; min := 0; if unused <> nil then unused^ := 0; if(src = nil) or (srclen = 0) then begin dst^ := #0; Exit; end; while srclen > 0 do begin ch := Byte(src^); inc(src); dec(srclen); redo: if (ch and $80) = 0 then begin dst^ := WideChar(ch); inc(Result); end else begin if((ch and $E0) = $C0) then begin min := $80; rem := 1; ret := ch and $1F; end else if((ch and $F0) = $E0) then begin min := $800; rem := 2; ret := ch and $0F; end else // too large utf8 bloc // ignore and continue continue; com := rem; while(rem <> 0) do begin dec(rem); if(srclen = 0) then begin if unused <> nil then unused^ := com; Exit; end; ch := Byte(src^); inc(src); dec(srclen); if((ch and $C0) = $80) then begin ret := ret shl 6; ret := ret or (ch and $3F); end else begin // unterminated utf8 bloc :/ // try next one goto redo; end; end; if (ch >= min) then begin dst^ := WideChar(ret); inc(Result); end else begin // too small utf8 bloc // ignore and continue Continue; end; end; inc(dst); end; end; function XMLParseStream(stream: TStream; pack: Boolean; onpi: TOnProcessingInstruction): ISuperObject; const CP_UTF8 = 65001; var wbuffer: array[0..1023] of SOChar; abuffer: array[0..1023] of AnsiChar; len, read, cursor: Integer; PI, PIParent: ISuperObject; bom: array[0..2] of byte; encoding: TSuperXMLEncoding; encodingstr: string; cp: Integer; ecp: ISuperObject; function getbuffer: Integer; var size, unusued: Integer; begin case encoding of {$IFNDEF UNIX} xnANSI: begin size := stream.Read(abuffer, sizeof(abuffer)); result := MultiByteToWideChar(cp, 0, @abuffer, size, @wbuffer, sizeof(wbuffer)); end; {$ENDIF} xnUTF8: begin size := stream.Read(abuffer, sizeof(abuffer)); result := utf8toucs2(@abuffer, size, @wbuffer, @unusued); if unusued > 0 then stream.Seek(-unusued, soFromCurrent); end; xnUnicode: Result := stream.Read(wbuffer, sizeof(wbuffer)) div sizeof(SOChar); else Result := 0; end; end; label redo, retry; begin // init knowned code pages ecp := so('{iso-8859-1: 28591,'+ 'iso-8859-2: 28592,'+ 'iso-8859-3: 28593,'+ 'iso-8859-4: 28594,'+ 'iso-8859-5: 28595,'+ 'iso-8859-6: 28596,'+ 'iso-8859-7: 28597,'+ 'iso-8859-8: 28598,'+ 'iso-8859-9: 28599,'+ 'iso 8859-15: 28605,'+ 'iso-2022-jp: 50220,'+ 'shift_jis: 932,'+ 'euc-jp: 20932,'+ 'ascii: 20127,'+ 'windows-1251: 1251,'+ 'windows-1252: 1252}'); // detect bom stream.Seek(0, soFromBeginning); len := stream.Read(bom, sizeof(bom)); if (len >= 2) and (bom[0] = $FF) and (bom[1] = $FE) then begin encoding := xnUnicode; stream.Seek(2, soFromBeginning); end else if (len = 3) and (bom[0] = $EF) and (bom[1] = $BB) and (bom[2] = $BF) then begin encoding := xnUTF8; cp := CP_UTF8; end else begin encoding := xnUTF8; cp := 0; stream.Seek(0, soFromBeginning); end; with TSuperXMLParser.Create(pack) do try len := getbuffer; while len > 0 do begin retry: read := ParseBuffer(@wbuffer, PI, PIParent, len); cursor := 0; redo: case FError of xeContinue: len := getbuffer; xeSuccess, xeError: Break; xeProcessInst: begin if (PIParent = nil) and (PI.AsObject.S[xmlname] = 'xml') then begin if pack then encodingstr := LowerCase(trim(PI.S['encoding'])) else encodingstr := LowerCase(trim(PI.S[xmlattributes + '.encoding'])); if (encodingstr <> '') then case encoding of xnUTF8: if(cp = CP_UTF8) then begin if (encodingstr <> 'utf-8') then begin FError := xeError; Break; end; end else begin cp := ecp.I[encodingstr]; if cp > 0 then begin {$IFNDEF UNIX} encoding := xnANSI; Reset; stream.Seek(0, soFromBeginning); len := getbuffer; goto retry; {$ELSE} raise Exception.Create('charset not implemented'); {$ENDIF} end; end; xnUnicode: if (encodingstr <> 'utf-16') and (encodingstr <> 'unicode') then begin FError := xeError; Break; end; end; end else if Assigned(onpi) then onpi(PI, PIParent); inc(cursor, read); if cursor >= len then begin len := getbuffer; continue; end; read := ParseBuffer(@wbuffer[cursor], PI, PIParent, len - cursor); goto redo; end; end; end; if FError = xeSuccess then Result := FStack^.obj else Result := nil; finally Free; end; end; function XMLParseString(const data: SOString; pack: Boolean; onpi: TOnProcessingInstruction): ISuperObject; var PI, PIParent: ISuperObject; cursor, read: Integer; label redo; begin with TSuperXMLParser.Create(pack) do try cursor := 0; read := ParseBuffer(PSOChar(data), PI, PIParent); redo: case FError of xeSuccess: Result := FStack^.obj; xeError: Result := nil; xeProcessInst: begin if Assigned(onpi) then onpi(PI, PIParent); inc(cursor, read); read := ParseBuffer(@data[cursor+1], PI, PIParent); goto redo; end; end; finally Free; end; end; end.