@@ -1307,14 +1307,19 @@ function FormatTimeNumber(Seconds: Double; DisplaySeconds: Boolean; MilliSeconds
13071307}
13081308procedure SaveUnicodeFile (Filename: String; Text: String; Encoding: TEncoding);
13091309var
1310- Writer: TStringList;
1310+ Writer: TFileStream;
1311+ Bytes: TBytes;
13111312begin
13121313 // Encoding may be nil when previously loaded via auto-detection
13131314 if not Assigned(Encoding) then
13141315 Encoding := UTF8NoBOMEncoding;
1315- Writer := TStringList.Create;
1316- Writer.Text := Text;
1317- Writer.SaveToFile(Filename, Encoding);
1316+ Bytes := Encoding.GetBytes(Text); // Encode text
1317+ Writer := TFileStream.Create(Filename, fmCreate);
1318+ try
1319+ Writer.WriteBuffer(Bytes[0 ], Length(Bytes));
1320+ finally
1321+ Writer.Free;
1322+ end ;
13181323end ;
13191324
13201325
@@ -1325,56 +1330,62 @@ procedure OpenTextFile(const Filename: String; out Stream: TFileStream; var Enco
13251330begin
13261331 // Open a textfile and return a stream. Detect its encoding if not passed by the caller
13271332 Stream := TFileStream.Create(Filename, fmOpenRead or fmShareDenyNone);
1328- // if Encoding = nil then
1329- // Encoding := DetectEncoding(Stream);
1333+ if Encoding = nil then
1334+ Encoding := DetectEncoding(Stream);
13301335 // If the file contains a BOM, advance the stream's position
13311336 BomLen := 0 ;
1332- { if Length(Encoding.GetPreamble) > 0 then begin
1337+ if Length(Encoding.GetPreamble) > 0 then begin
13331338 SetLength(Header, Length(Encoding.GetPreamble));
13341339 Stream.ReadBuffer(Pointer(Header)^, Length(Header));
13351340 if CompareMem(Header, Encoding.GetPreamble, SizeOf(Header)) then
13361341 BomLen := Length(Encoding.GetPreamble);
1337- end;}
1342+ end ;
13381343 Stream.Position := BomLen;
13391344end ;
13401345
13411346
13421347{ **
1343- Detect stream's content encoding through SynEdit's GetEncoding . Result can be:
1348+ Detect stream's content encoding. Result can be:
13441349 UTF-16 BE with BOM
13451350 UTF-16 LE with BOM
13461351 UTF-8 with or without BOM
1347- ANSI
1348- Aimed to work better than WideStrUtils.IsUTF8String() which didn't work in any test case here.
1349- @see http://en.wikipedia.org/wiki/Byte_Order_Mark
1350- Could also do that with TEncoding.GetBufferEncoding, but that relies on the file having a BOM
13511352}
13521353function DetectEncoding (Stream: TStream): TEncoding;
1353- { var
1354- SynEnc: TSynEncoding;
1355- WithBOM: Boolean;}
1356- begin
1357- Result := TEncoding.UTF8
1358- { LConvEncoding.GuessEncoding returns string identifiers, not the TEncoding objects
1359- SynEnc := SynUnicode.GetEncoding(Stream, WithBOM);
1360- case SynEnc of
1361- seUTF8: begin
1362- if WithBOM then
1363- Result := TEncoding.UTF8
1364- else
1365- Result := UTF8NoBOMEncoding;
1366- end;
1367- seUTF16LE: Result := TEncoding.Unicode;
1368- seUTF16BE: Result := TEncoding.BigEndianUnicode;
1369- seAnsi: Result := TEncoding.ANSI;
1370- else Result := UTF8NoBOMEncoding;
1371- end;}
1354+ const
1355+ BOM_UTF8: array [0 ..2 ] of Byte = ($EF, $BB, $BF);
1356+ BOM_UTF16LE: array [0 ..1 ] of Byte = ($FF, $FE);
1357+ BOM_UTF16BE: array [0 ..1 ] of Byte = ($FE, $FF);
1358+ var
1359+ Buffer: array [0 ..3 ] of Byte;
1360+ ReadCount: Integer;
1361+ OldPos: Int64;
1362+ begin
1363+ Result := UTF8NoBOMEncoding; // Default if no BOM is found
1364+
1365+ OldPos := Stream.Position;
1366+ Stream.Position := 0 ;
1367+ try
1368+ ReadCount := Stream.Read(Buffer, SizeOf(Buffer));
1369+ finally
1370+ Stream.Position := OldPos;
1371+ end ;
1372+
1373+ if (ReadCount >= 3 ) and CompareMem(@Buffer[0 ], @BOM_UTF8[0 ], 3 ) then
1374+ Result := TEncoding.UTF8
1375+ else if (ReadCount >= 2 ) and CompareMem(@Buffer[0 ], @BOM_UTF16LE[0 ], 2 ) then
1376+ Result := TEncoding.Unicode // UTF-16 LE
1377+ else if (ReadCount >= 2 ) and CompareMem(@Buffer[0 ], @BOM_UTF16BE[0 ], 2 ) then
1378+ Result := TEncoding.BigEndianUnicode // UTF-16 BE
1379+ // Could add detection for UTF-32 BOMs too if needed
1380+ else
1381+ Result := UTF8NoBOMEncoding; // No BOM
13721382end ;
13731383
13741384
13751385function ReadTextfileChunk (Stream: TFileStream; Encoding: TEncoding; ChunkSize: Int64 = 0 ): String;
13761386var
13771387 DataLeft: Int64;
1388+ Bytes: TBytes;
13781389begin
13791390 // Read a chunk or the complete contents out of a textfile, opened by OpenTextFile()
13801391 if Stream.Size = 0 then begin
@@ -1386,8 +1397,9 @@ function ReadTextfileChunk(Stream: TFileStream; Encoding: TEncoding; ChunkSize:
13861397 if (ChunkSize = 0 ) or (ChunkSize > DataLeft) then
13871398 ChunkSize := DataLeft;
13881399
1389- SetLength(Result, ChunkSize);
1390- Stream.Read(PChar(Result)^, ChunkSize);
1400+ SetLength(Bytes, ChunkSize);
1401+ Stream.ReadBuffer(Bytes[0 ], Length(Bytes));
1402+ Result := Encoding.GetString(Bytes);
13911403end ;
13921404
13931405
0 commit comments