// ============================================================================
// Encoding of message-headers.
// Copyright (c) 1999, Juergen Haible. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// ============================================================================

unit uEncoding; // Encoding of message-headers.

// ----------------------------------------------------------------------------
// Encoding of message-headers.
// Adapted from various sources I can't remember in detail - the tin-sources
// (www.tin.org) were surely amongst them.
// ----------------------------------------------------------------------------

interface

function EncodeB64( const BufTxt; BufLen: Integer ): String;
function DecodeB64( const BufB64; BufLen: Integer ): String;

function xEncodeQP( const BufTxt; BufLen: Integer ): String;
function DecodeQP ( const BufQP;  BufLen: Integer ): String;

function DecodeHeadervalue( const Org: String ): String; overload;
function DecodeHeadervalue( const Org: String; Out CharSet : String ): String; overload;

Procedure ConvertCharSetToLocal (Const OriginCharset: String; Var Text: String);

function DecodeUTF7toUCS16(Const Text: string): String;
function DecodeUTF8toUCS32(Const Text: string): String;

// ----------------------------------------------------------------------------

implementation

uses SysUtils, classes,
     utools, global, cLogfile;

const
   Base64Pad       = '=';
   Base64Alphabet  : PChar   = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef'
                             + 'ghijklmnopqrstuvwxyz0123456789+/';
Var
   Base64TableBuilt: Boolean = False;

var
   Base64Table: array[#0..#255] of Byte;

function EncodeB64( const BufTxt; BufLen: Integer ): String;
var  BufPtr, BufEnd: PChar;
     i, Count, Temp: Integer;
begin
   Result := '';

   BufPtr := @BufTxt;
   BufEnd := BufPtr + BufLen;

   while BufPtr<BufEnd do begin
      Temp := 0;
      Count := 0;
      for i:=0 to 2 do begin
         Temp := (Temp shl 8);
         if BufPtr<BufEnd then begin
            Temp := Temp or ord( BufPtr^ );
            inc( BufPtr );
            inc( Count );
         end;
      end;

      Result := Result + Base64Alphabet[ (Temp shr 18) and $3F ]
                       + Base64Alphabet[ (Temp shr 12) and $3F ];
      if Count>=2 then Result := Result + Base64Alphabet[ (Temp shr 6) and $3F ]
                  else Result := Result + Base64Pad;
      if Count>=3 then Result := Result + Base64Alphabet[ Temp and $3F ]
                  else Result := Result + Base64Pad;
   end;
end;

function DecodeB64( const BufB64; BufLen: Integer ): String;
var  BufPtr, BufEnd: PChar;
     Pattern, Bits : Integer;
     b64: Byte;
begin
   Result := '';

   if not Base64TableBuilt then begin
      FillChar( Base64Table, sizeof(Base64Table), $FF );
      for b64:=0 to 63 do Base64Table[ Base64Alphabet[b64] ] := b64;
      Base64TableBuilt := True;
   end;

   BufPtr := @BufB64;
   BufEnd := BufPtr + BufLen;

   Pattern := 0;
   Bits    := 0;

   while BufPtr<BufEnd do begin
      b64 := Base64Table[ BufPtr^ ];
      inc( BufPtr );
      if b64<>$FF then begin
         Pattern := (Pattern shl 6) or b64;
         inc( Bits, 6 );
         if Bits>=8 then begin
            dec( Bits, 8 );
            Result := Result + chr( ( Pattern shr Bits ) and $FF );
         end;
      end;
   end;
end;

// ----------------------------------------------------------------------------

function xEncodeQP( const BufTxt; BufLen: Integer ): String;
// Note: Just "quick&dirty" for testing purposes.
var  BufPtr, BufEnd: PChar;
     ch: Char;
begin
   Result := '';

   BufPtr := @BufTxt;
   BufEnd := BufPtr + BufLen;

   while BufPtr<BufEnd do begin
      ch := BufPtr^;
      inc( BufPtr );

      case ch of
         #0..#31,
         #61,
         #127..#255: Result := Result + '=' + inttohex(ord(ch),2);
         #32:        Result := Result + '_';
         else        Result := Result + ch;
      end;
   end;
end;

function DecodeQP( const BufQP; BufLen: Integer ): String;

   function Nibble( c: Char ): Byte;
   begin
      case c of
         '0'..'9': Result := Byte(c) - Byte('0');
         'A'..'F': Result := Byte(c) - Byte('A') + 10;
         'a'..'f': Result := Byte(c) - Byte('a') + 10;
         else      Result := 255;
      end;
   end;

var  BufPtr, BufEnd: PChar;
     ch            : Char;
     hi, lo        : Byte;
begin
   Result := '';

   BufPtr := @BufQP;
   BufEnd := BufPtr + BufLen;

   while BufPtr<BufEnd do begin
      ch := BufPtr^;
      inc( BufPtr );

      if ch='=' then begin

         if BufPtr>=BufEnd then break;
         ch := BufPtr^;
         inc( BufPtr );

         if (ch<>#13) and (ch<>#10) then begin
            if BufPtr>=BufEnd then break;
            hi := Nibble(ch);
            lo := Nibble(BufPtr^);
            inc( BufPtr );
            if (hi=255) or (lo=255) then exit;
            ch := chr( (hi shl 4) + lo );
            Result := Result + ch;
         end;

      end else begin

         if (ch<>'_') then Result := Result + ch
                      else Result := Result + ' ';

      end;
   end;
end;

// ----------------------------------------------------------------------------

function isspace( c: Char ): Boolean;
begin
   Result := ( c in [ #9, #10, #11, #12, #13, ' ' ] );
end;

function DecodeHeadervalue( const Org: String ): String;
Var Charset: String;
begin
   Result := DecodeHeadervalue( Org, Charset )
end;

function DecodeHeadervalue( const Org: String; Out Charset: String ): String;
var  BufPtr, BufEnd, Temp: PChar; TempStr: String; Encoding: Char;
begin
   Result := '';
   If Org = '' then Exit;
   BufPtr := PChar(Org);
   BufEnd := @Org[Length(Org)];
   Inc(BufEnd);
   while BufPtr<BufEnd do begin
      if BufPtr^='=' then begin
         inc( BufPtr );
         if (BufPtr>=BufEnd) then break;
         if BufPtr^<>'?' then begin
            Result := Result + '=' + BufPtr^;
            inc( BufPtr );
         end else begin
            CharSet := '';
            inc( BufPtr );
            while (BufPtr<BufEnd) and (BufPtr^<>'?') do begin
               CharSet := CharSet + BufPtr^;
               inc( BufPtr );
            end;
            inc( BufPtr );
            if (BufPtr>=BufEnd) then break;
            Encoding := BufPtr^;
            inc( BufPtr );
            if (BufPtr>=BufEnd) then break;
            if BufPtr^='?' then begin
               inc( BufPtr );
               if (BufPtr>=BufEnd) then break;
               Temp := StrScan( BufPtr, '?' );
               if Temp<>nil then begin
                  if UpperCase(Encoding)='B'
                     then TempStr := DecodeB64( BufPtr^, Temp - BufPtr )
                     else TempStr := DecodeQP ( BufPtr^, Temp - BufPtr );
                  ConvertCharSetToLocal(Charset, TempStr);
                  Result := Result + TempStr;
                  BufPtr := Temp + 1;
                  if (BufPtr<BufEnd) and (BufPtr^='=') then inc(BufPtr);
                  Temp := BufPtr;
                  while (Temp<BufEnd) and isspace(Temp^) do inc(Temp);
                  if (Temp<BufEnd) and (Temp^='=') then BufPtr:=Temp;
               end
            end
         end
      end else begin
         Result := Result + BufPtr^;
         inc( BufPtr );
      end
   end
end;

// ----------------------------------------------------------------------------

Type
   TCharToCharArray = Array[Char] of Char;
   TCachedCharToChar = Class
   public
      timestamp: TDateTime;
      Data: TCharToCharArray;
   end;
Var CachedCharToChars: TStringList = NIL;

Function LoadCharToCharArray(Const FileName: String; Out CharToChar: TCharToCharArray): boolean;
Var c: Char; i, p, x1, x2: Integer; s: String; CacheEntry: TCachedCharToChar;
begin
   With CachedCharToChars do begin
      p := IndexOf(FileName);
      If p>=0 then begin
         With TCachedCharToChar(Objects[p]) do begin
            If FileAge(FileName)=timestamp then begin
               Result := true;
               CharToChar := Data;
               exit
            end else begin
               Free;
               Delete(p)
            end
         end
      end
   end;
   For c := Low(Char) to High(Char) do CharToChar[c] := c;
   Result := FileExists(FileName);
   If Result then begin
      With TStringList.Create do try
         Log(LOGID_SYSTEM, 'Caching 8-bit-charset-translation-file "'+Filename+'"');
         LoadFromFile(FileName);
         For i := 0 To Count-1 do begin
            s := Trim(Strings[i]);
            p := Pos('=', s);
            If p>1 then try
               x1 := StrToInt(Copy(s, 1, p-1));
               x2 := StrToInt(Copy(s, p+1, Length(s)-p));
               CharToChar[Chr(x1)] := Chr(x2)
            except
               Log(LOGID_WARN, 'Invalid line #'+IntToStr(i+1)+': "'+s+'"')
            end
         end
      finally
         free
      end
   end else begin
      Log(LOGID_SYSTEM, 'Charset-Translation failed - no file "'+FileName+'"')
   end;
   CacheEntry := TCachedCharToChar.Create;
   CacheEntry.Timestamp := FileAge(FileName);
   CacheEntry.Data := CharToChar;
   CachedCharToChars.AddObject(FileName, CacheEntry)
end;


Var UCSConvList: TStringList_ExactMatch = NIL;
    UCSConvListFilename: String;
    UCSConvListTimeStamp: TDateTime;

function ConvertUSCToLocalCharset(Text:string;CharLen:Byte):string;
// Variable CharLen must have 2 for USC16 and 4 for UCS32.
Var p,i,j: integer; x: Byte; k: LongWord; s: String;
begin
   If Not (CharLen IN [2, 4]) then begin
       raise Exception.Create('Internal error - ConvertUSCToLocalCharset supports only '
         +'charlen 2 and 4, not '+IntToStr(Charlen))
   end;
   // Cache translation-file if not done before
   s := PATH_BASE + 'UCS_'+Def_Codepage+'.cnv';
   If Not Assigned(UCSConvList) then begin
      UCSConvList := TStringList_ExactMatch.Create;
   end;
   If (UCSConvListFilename<>s) or (FileAge(s)<>UCSConvListTimeStamp) then begin
      UCSConvListFilename := s;
      UCSConvListTimeStamp := FileAge(s);
      If FileExists(s) then begin
         Log(LOGID_SYSTEM, 'Caching UCS-to-local-charset-translation-file "'+s+'"');
         With UCSConvList do begin
            sorted := false;
            LoadFromFile(s);
            For i := Count-1 downto 0 do begin
               s := Trim(Strings[i]);
               p := Pos('=', s);
               If p > 0 then begin
                  try
                     StrToInt(Copy(s, 1, p-1)); // Test if value is valid 
                     x := StrToInt(Copy(s, p+1, Length(s)-1));
                     Strings[i] := Copy(s, 1, p-1);
                     Objects[i] := Pointer(x)
                  except
                     Log(LOGID_WARN, 'Invalid line #'+IntToStr(i+1)+': "'+s+'"');
                     Delete(i)
                  end
               end else begin
                  Delete(i)
               end
            end;
            sorted := true
         end
      end else begin
         Log(LOGID_SYSTEM, 'UCS-to-local-charset-translation failed - missing "'+s+'"');
         UCSConvList.Clear
      end
   end;
   // Translation itself
   If UCSConvList.Count = 0 then begin
      Result := text
   end else begin
      SetLength(Result, Length(Text) div CharLen);
      For j := 0 to (Length(Text) div CharLen)-1 do begin
         i := j*CharLen+1;
         case CharLen of
            2: k:=(ord(Text[i]) shl 8) + ord(Text[i+1]);
            4: k:=(ord(Text[i]  ) shl 24)+
                  (ord(Text[i+1]) shl 16)+
                  (ord(Text[i+2]) shl  8)+
                  (ord(Text[i+3]) );
            else k := 0
         end;
         p := UCSConvList.IndexOf(IntToStr(k));
         If p >= 0 then Result[j+1] := Char(UCSConvList.Objects[p])
                   else Result[j+1] := '?'
      end
   end
end;

Procedure ConvertCharSetToLocal (Const OriginCharset: String; Var Text: String);
Var ConvList: TCharToCharArray; i: integer; cs: String;
begin
   cs := LowerCase(OriginCharset);
   If cs<>LowerCase(Def_Codepage) then begin
      If cs='utf-8' then Text := ConvertUSCToLocalCharset(DecodeUTF8toUCS32(Text), 4)
      else
      If cs='utf-7' then Text := ConvertUSCToLocalCharset(DecodeUTF7toUCS16(Text), 2)
      else begin
         If LoadCharToCharArray(PATH_BASE + OriginCharset+'_'+Def_Codepage+'.cnv', ConvList) then begin
            For i:=1 to Length(Text) do Text[i] := ConvList[Text[i]]
         end
      end
   end
end;

function DecodeUTF7toUCS16(Const Text: String):String;
Var token  : string; i,j:word;
begin
   i:=1;
   result:='';
   While i<=Length(Text) do begin
      // check if escape sequence
      if text[i]='+' then  begin
         // search end of unicode token
         j:=pos('-',copy(text,i+1,length(text)));
         // extract base64 encoded unicode token
         token:=copy(text,i+1,j-1);
         // base64 decode
         result:=result+DecodeB64(token[1],length(token));
         inc(i, j+1);
      end else begin
         // unicod page 0 is  identically to us-ascii
         result:=result+chr(0)+text[i];
         inc(i);
      end
   end
end;

function DecodeUTF8toUCS32(Const Text: String):String;
Var i, j, anz, l: Integer; x: Byte;
    Trailing: LongWord;
begin
   i := 1; j := 1; l := Length(Text);
   // Set max. possible length and fill with Chr(0)
   SetLength(Result, 4*Length(Text));
   FillChar(Result[1], 4*Length(Text), 0);
   try
      // loop over all chars of text
      While i <= l do begin
         Anz := l-i;
         // check if the first octet of UTF-Char found
         x := Ord(Text[i]);
         Case x of
            $00..$7F: begin
               Inc(i, 1);
               Result[j+3] := Chr(x);
            end;
            $80..$DF: begin
               If Anz<1 then abort;
               Trailing:=((x and $1F) shl 6) + (ord(Text[i+1]) and $3F);
               Inc(i, 2);
               Result[j+2] := chr((Trailing and $FF00) shr 8);
               Result[j+3] := chr((Trailing and $FF));
            end;
            $E0..$EF: begin
               If Anz<2 then abort;
               Trailing:=((x and $0F) shl 12) + ((ord(Text[i+1]) and $3F) shl 6)
                         + (ord(Text[i+2]) and $3F);
               Inc(i, 3);
               Result[j+2] := chr((Trailing and $FF00) shr 8);
               Result[j+3] := chr((Trailing and $FF));
            end;
            $F0..$F7: begin
               If Anz<3 then abort;
               Trailing:=((x and $07) shl 18) + ((ord(Text[i+1]) and $3F) shl 12)
                         + ((ord(Text[i+2]) and $3F) shl 6) + (ord(Text[i+3]) and $3F);
               Inc(i, 4);
               Result[j+1] := chr((Trailing and $FF0000) shr 16);
               Result[j+2] := chr((Trailing and $FF00) shr 8);
               Result[j+3] := chr((Trailing and $FF));
            end;
            $F8..$FB: begin
               If Anz<4 then abort;
               Trailing:=((x and $03) shl 24) + ((ord(Text[i+1]) and $3F) shl 18)
                         + ((ord(Text[i+2]) and $3F) shl 12) + ((ord(Text[i+3]) and $3F) shl 6)
                         + (ord(Text[i+4]) and $3F);
               Inc(i, 5);
               Result[j]   := chr((Trailing and $FF000000) shr 24);
               Result[j+1] := chr((Trailing and $FF0000) shr 16);
               Result[j+2] := chr((Trailing and $FF00) shr 8);
               Result[j+3] := chr((Trailing and $FF));
            end;
            $FC, $FD: begin
               If Anz<5 then abort;
               Trailing:=((x and $01) shl 30) + ((ord(Text[i+1]) and $3F) shl 24)
                         + ((ord(Text[i+2]) and $3F) shl 18) + ((ord(Text[i+3]) and $3F) shl 12)
                         + ((ord(Text[i+4]) and $3F) shl 6)+ (ord(Text[i+5]) and $3F);
               Inc(i, 6);
               Result[j]   := chr((Trailing and $FF000000) shr 24);
               Result[j+1] := chr((Trailing and $FF0000) shr 16);
               Result[j+2] := chr((Trailing and $FF00) shr 8);
               Result[j+3] := chr((Trailing and $FF));
            end;
            else Inc(i)
         end;
         Inc(j, 4)
      end;
      SetLength(Result, j-1);
   except
      result:=text
   end
end;

initialization
   CachedCharToChars := TStringList.Create;
finalization
   With CachedCharToChars do begin
      While Count > 0 do begin
         TCachedCharToChar(Objects[0]).Free;
         Delete(0)
      end;
      Free
   end;
   If Assigned(UCSConvList) then UCSConvList.Free
end.
