// DownloadFile.cpp: implementation of the CDownloadFile class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "WebPageLoader.h"
#include "DownloadFile.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif


//////////////////////////////////////////////////////////////////////
//
// CDownloadFile
//
//////////////////////////////////////////////////////////////////////


//////////////////////////////////////////////////////////////////////
// Construction/Destruction

CDownloadFile::CDownloadFile()
{
   m_State = FILESTATE_WAITING;
   m_iDownloadAttempts = 0;
   m_nLevel = 0;
   m_iLastImage = -1;
   m_llBytesDownloaded = 0;
   m_iPriority = 0;
   m_bSkipRequest = FALSE;
   m_pFile = NULL;
   m_bIsHtml = FALSE;
   m_bIsImage = FALSE;
   m_bSkipStoreFile = FALSE;
}

CDownloadFile::~CDownloadFile()
{
}


//////////////////////////////////////////////////////////////////////
// Operations

BOOL CDownloadFile::Create(CSession *pSession, CString sURL, CString sParentURL, BOOL bIsImage, short nLevel/*=1*/)
{
   ASSERT(sURL.GetLength() > 0);
   
   if( sURL.IsEmpty() ) 
      return FALSE;

   m_sURL = sURL;
   m_sParentURL = sParentURL;
   m_nLevel = nLevel;

   m_bIsHtml = FALSE;            // Determined only after downloading file!!
   m_bIsImage = bIsImage;

   if( pSession->m_Settings.m_bUsePriorityFileNameFilter )
      if( MatchPatterns(sURL, pSession->m_Settings.m_sPriorityFileNameFilter) )
         m_iPriority = (m_bIsImage ? 12 : 10);

   return TRUE;
}

// Downloads the file, store it and parse it.
BOOL CDownloadFile::Download(CSession *pSession, CInternetSession *pInet, int iPosition)
{
   TRACE(_T("Download: %s\n"), m_sURL);
   ASSERT_VALID(pSession);
   ASSERT_VALID(pInet);

   DWORD dwSessionID = pSession->m_iUniqueID;

   CString sServer;
   CString sPage;
   DWORD dwType;
   INTERNET_PORT nPort;
   AfxParseURL( m_sURL, dwType, sServer, sPage, nPort );

   CMemFile fMem(100 * 1024);

   CString sPath, sFilename;

   // Validate output
   {
      CSessionDataLock lock(pSession);

      // Check against user's validation rules
      if( ValidateURL(pSession, sServer, sPage) == FALSE )
         return FALSE;
      
      // Now get the filename (and create the local file)...
      if( GetTargetFilename(pSession, sPath, sFilename) == FALSE ) {
         ReportErrorState(pSession, FILESTATE_ALREADYTHERE, IDS_LOG_NOFILE);
         return FALSE;
      }
   }

   // Make the download happen...
   BOOL bRes = DoDownload(pSession, pInet, sServer, sPage, nPort, dwType, sFilename, fMem);

   // Parse the file if it's HTML...
   if( bRes ) 
      bRes = DoParseFile(pSession, fMem);

   // Write file to disk?
   if( bRes && !(pSession->m_Settings.m_bDontKeepHTML && m_bIsHtml) && !m_bSkipStoreFile )
      bRes = DoWriteFile(pSession, sPath, sFilename, fMem);

   // Bonus feature: Randomize files in session after first download
   // Meant to be used for big initial links page.
   if( iPosition == 1 && pSession->m_Settings.m_bRandomize ) 
      pSession->RandomizeFiles(1000);

   // Download complete (or failed) - refresh UI
   pSession->PostUpdate(WM_REFRESHITEMS);
   pSession->PostUpdate(WM_REFRESHNODES, dwSessionID);

   return bRes;
}


// Does the actual file download from the internet.
BOOL CDownloadFile::DoDownload(CSession *pSession,
                               CInternetSession *pInet, 
                               CString sServer, CString sPage, INTERNET_PORT nPort,
                               DWORD dwType,
                               const CString& sFilename,
                               CMemFile& fMem)
{
   ASSERT_VALID(pSession);
   ASSERT_VALID(pInet);
   ASSERT(sFilename);

   CHttpConnection* pServer = NULL;
   CFile* pGenFile = NULL;

   if( !BfxIsValidSession(pSession) ) 
      return FALSE;

   DWORD dwSessionID = pSession->m_iUniqueID;

   BOOL bDownloadOK = FALSE; // This is the default result!

   BOOL bOk = TRUE;

   m_iDownloadAttempts++;
   
   m_llBytesDownloaded = 0;

   m_State = FILESTATE_CONNECTING;

   pSession->PostUpdate(WM_REFRESHNODES, dwSessionID);

   TRY
   {
      DWORD dwRet;
      DWORD dwFlags;
      TCHAR szHeaders[1024] = { 0 };

      LPCTSTR szUsername = NULL;
      LPCTSTR szPassword = NULL;

      // Open regular file if it's local.
      // Otherwise use WinINet wrapper to download from Intenret.
      // The WinINet wrapper won't load local files because of
      // security.
      if( dwType == AFX_INET_SERVICE_FILE ) 
      {
         m_State = FILESTATE_DOWNLOADING;

         CString sFilename = m_sURL;
         sFilename.Replace(_T("file:///"), _T(""));

         pGenFile = new CFile();

         if( pGenFile == NULL ) {
            ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_SERVERFAIL);
            goto done;
         }

         if( !pGenFile->Open(sFilename, CFile::modeRead) ) {
            ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_FILENOTFOUND);
            goto done;
         }
      }
      else
      {
         // Look for a valid username/password...
         if( pSession->m_sUsername.GetLength() > 0 ) 
            szUsername = pSession->m_sUsername;
         if( pSession->m_sPassword.GetLength() > 0 ) 
            szPassword = pSession->m_sPassword;

         // Prepare HTTP headers...
         if( pSession->m_pPreferences->m_bCompatibilityMode ) 
         {
           CString sReferer;
           sReferer.Format(_T("Referer: http://%s/index.html\r\n"), sServer);
           if( nPort == INTERNET_DEFAULT_HTTPS_PORT ) 
              sReferer.Format(_T("Referer: https://%s/index.html\r\n"), sServer);
           _tcscpy( szHeaders, _T("Accept: image/gif, image/x-xbitmap, image/jpeg, */*\r\n") );
           _tcscat( szHeaders, _T("Accept-Language: en-us\r\n") );
           _tcscat( szHeaders, sReferer );
           _tcscat( szHeaders, _T("Connection: Keep-Alive\r\n") );
         }

         // Open HTTP connection...
         pServer = pInet->GetHttpConnection(sServer, nPort, szUsername, szPassword);
         ASSERT_VALID(pServer);
     
         if( pServer == NULL ) {
            ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_SERVERFAIL);
            goto done;
         }

         // Set timeout options...
         pServer->SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 20 * 1000);
         pServer->SetOption(INTERNET_OPTION_SEND_TIMEOUT, 30 * 1000);
         pServer->SetOption(INTERNET_OPTION_RECEIVE_TIMEOUT, 40 * 1000);

         dwFlags = INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_RELOAD;
      
         if( dwType == AFX_INET_SERVICE_HTTPS ) 
            dwFlags |= INTERNET_FLAG_SECURE;

         // Open file...
         CHttpFile *pInetFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET, sPage, NULL, 1, NULL, NULL, dwFlags);
         ASSERT_VALID(pInetFile);
      
         if( pInetFile == NULL ) {
            ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_FILENOTFOUND);
            goto done;
         }
    
         {
            CSessionDataLock lock(pSession);

            pGenFile = pInetFile;

            m_pFile = pInetFile;

            pSession->m_pCurrentDownloadFile = this;
         }

         if( _tcslen(szHeaders) > 0 )
            pInetFile->AddRequestHeaders(szHeaders, HTTP_ADDREQ_FLAG_REPLACE | HTTP_ADDREQ_FLAG_ADD);

         pInetFile->SendRequest();      

         pInetFile->QueryInfoStatusCode(dwRet);
         if( dwRet == HTTP_STATUS_OK ) 
         {
            //
            // Ok, we've got a file connection. Now download...
            //

            if( !pSession->m_pPreferences->m_bCompatibilityMode )
            {
               CString strNewLocation;
               pInetFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation);
               int nPlace = strNewLocation.Find(_T("\nLocation: "));
               if( nPlace >= 0 ) {
                  strNewLocation = strNewLocation.Mid(nPlace + 11);
                  nPlace = strNewLocation.Find('\n');
                  if( nPlace > 0 ) sPage = strNewLocation.Left(nPlace);
               }
            }

            // Let's just see the how big this file will get...
            DWORD dwLen;
            DWORD dwTotalSize;
            dwLen = sizeof(DWORD);
            dwTotalSize = 0;
            pInetFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH | HTTP_QUERY_FLAG_NUMBER, 
                                 &dwTotalSize, &dwLen, NULL);

            {
               CSessionDataLock lock(pSession);
            
               m_State = FILESTATE_DOWNLOADING;

               if( pSession->m_Settings.m_bUseFileSizeFilter 
                   && !pSession->m_pPreferences->m_bCompatibilityMode ) 
               {
                  // If it is an image and we have file-size filter on,
                  // then check now
                  if( m_bIsImage && 
                      (dwTotalSize > 0) &&
                      (long)(dwTotalSize / 1024) < pSession->m_Settings.m_iFileSizeFilter ) 
                  {
                     ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_NOTBIGENOUGH);
                     goto done;
                  }
               }
            }

            if( m_bIsImage && dwTotalSize > 0 ) {
               fMem.SetLength(dwTotalSize);
               fMem.SeekToBegin();
            }

         }
         else 
         {
            //
            // Error!
            //       
            switch( dwRet ) {
            case HTTP_STATUS_NOT_FOUND:
               ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_FILENOTFOUND);
               break;
            case HTTP_STATUS_MOVED:
            case HTTP_STATUS_REDIRECT:
            case HTTP_STATUS_REDIRECT_METHOD:
               ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_FILEMOVED);
               break;
            case HTTP_STATUS_DENIED:
            case HTTP_STATUS_FORBIDDEN:
            case HTTP_STATUS_PAYMENT_REQ:
            case HTTP_STATUS_USE_PROXY:
            case HTTP_STATUS_PROXY_AUTH_REQ:
               ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_ACCESSDENIED);
               break;
            case HTTP_STATUS_BAD_REQUEST:
            case HTTP_STATUS_SERVER_ERROR:
            case HTTP_STATUS_UNSUPPORTED_MEDIA:
            case HTTP_STATUS_VERSION_NOT_SUP:
               ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_SERVERERROR);
               break;
            case HTTP_STATUS_REQUEST_TIMEOUT:
            case HTTP_STATUS_GATEWAY_TIMEOUT:
            case HTTP_STATUS_SERVICE_UNAVAIL:
               ReportErrorState(pSession, FILESTATE_BROKEN, IDS_LOG_TIMEOUT);
               break;
            case HTTP_STATUS_PARTIAL:
            case HTTP_STATUS_PRECOND_FAILED:
            case HTTP_STATUS_PARTIAL_CONTENT:
            case HTTP_STATUS_REQUEST_TOO_LARGE:
               ReportErrorState(pSession, FILESTATE_BROKEN, IDS_LOG_INCOMPLETE);
               break;
            default:
               ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_GENERALERROR);
               break;
            }

            goto done;
         }
      }

      pSession->PostUpdate(WM_REFRESHITEMS);
      pSession->PostUpdate(WM_REFRESHNODES, dwSessionID);

      //
      // Now download file and write to disk
      //

      enum { DOWNLOADBUFFER = 2048 };
      BYTE szBuff[DOWNLOADBUFFER];

      // Read first chunk...
      UINT nRead;
      nRead = pGenFile->Read(szBuff, DOWNLOADBUFFER - 1);

      // We do some initial test on the first buffer downloaded...
      if( nRead > 0 )
      {
         CSessionDataLock lock(pSession);

         bOk = ValidateFirstBuffer(pSession, sPage, szBuff, nRead);
      }

      while( bOk && nRead > 0 ) 
      {
         fMem.Write(szBuff, nRead); 
         
         //
         // Check for changed state
         //
         {
            CSessionDataLock lock(pSession);
            
            // Do some statistics...
            m_llBytesDownloaded += nRead;                             
            pSession->m_Info.m_llBytesDownloaded += nRead;
            
            // Did someone request us to stop?
            if( pSession->m_bSleepRequest || 
                pSession->m_bStopRequest || 
                pSession->m_bKillRequest ) 
            {
               ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_SKIPPED);
               bOk = FALSE;
            }
            // ...or skip it?
            if( m_bSkipRequest ) {
               ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_SKIPPED);
               bOk = FALSE;
            }
         }

         // Read next buffer...
         nRead = pGenFile->Read(szBuff, DOWNLOADBUFFER-1);
      }
      
      //
      // Make sure it's a valid file
      //

      if( bOk ) 
      {
         DWORD dwTotalSize = fMem.GetLength();
         
         CSessionDataLock lock(pSession);

         bOk = ValidateDownloadedFile(pSession, sFilename, dwTotalSize, sPage);
      }

      if( bOk ) 
      {              
         m_State = FILESTATE_DONE;

         pSession->m_Info.m_nFilesDownloaded++;
         pSession->m_pPreferences->m_nFilesDownloaded++;
         pSession->m_pPreferences->m_llBytesDownloaded += m_llBytesDownloaded;

         pSession->Log(LOGTYPE_LOG, IDS_LOG_FILEDOWNLOADED, m_sURL);

         // SUCCESS!
         bDownloadOK = TRUE;
      }

done: ;

   }
   CATCH_ALL( e )
   {
      ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_GENERALERROR);
   }
   END_CATCH_ALL

   {
      CSessionDataLock lock(pSession);

      pSession->m_pCurrentDownloadFile = NULL;

      m_pFile = NULL;
   }

   if( pGenFile != NULL ) 
   {
      TRY
      {
         pGenFile->Close();
      }
      CATCH_ALL( e )
      {
         pGenFile->Abort();
      }
      END_CATCH_ALL

      delete pGenFile;     
   }

   if( pServer != NULL ) 
   {
      ASSERT_VALID(pServer);
      TRY
      {
         pServer->Close();
      }
      CATCH_ALL( e )
      {
      }
      END_CATCH_ALL
      
      delete pServer;     
   }

   return bDownloadOK;
}

// Construct filename and local folder structures
BOOL CDownloadFile::GetTargetFilename(CSession *pSession, CString& sPath, CString& sFilename)
{
   ASSERT_VALID(pSession);

   int pos;

   sPath = pSession->m_Settings.m_sDownloadPath;

   sPath.TrimRight();

   ADDBACKSLASH(sPath);

   CString sFName = m_sURL;
   // Mask off some traditional URL extensions...
   pos = sFName.Find(_T('?'));
   if( pos >= 0 ) sFName = sFName.Left(pos);
   pos = sFName.Find(_T('#'));
   if( pos >= 0 ) sFName = sFName.Left(pos);
   
   // Get to the bare filename
   pos = sFName.ReverseFind(_T('/'));
   if( pos<0 ) return FALSE;
   sFName = sFName.Mid(pos + 1);
   
   if( sFName.IsEmpty() ) 
      sFName += _T("index.html");

   sFName = BfxUrlUnescape(sFName);

   sFName.Replace(_T('~'), _T(' '));
   sFName.Replace(_T("!"), _T(" "));
   sFName.Replace(_T("&"), _T(" "));
   sFName.Remove(_T('*'));
   sFName.Remove(_T('?'));
   sFName.Remove(_T(':'));
   sFName.Remove(_T('+'));
   sFName.Remove(_T(','));
   sFName.Remove(_T('<'));
   sFName.Remove(_T('>'));
   sFName.Remove(_T('"'));
   sFName.Remove(_T('\''));
   sFName.Remove(_T('\\'));
   sFName.TrimRight();
   sFName.TrimRight(_T("-.~'"));

   // If "append to target path" is requested, we transform the
   // URL and append it to the target filename.
   if( pSession->m_Settings.m_bAppendSourcePath )
   {
      CString sPart = m_sURL;  

      if( pSession->m_Settings.m_bAppendParentPath && m_bIsImage && !m_sParentURL.IsEmpty() )
         sPart = m_sParentURL;

      // The part between "http://" and "?"...
      pos = sPart.Find(_T("//"));
      if( pos >= 0 ) sPart = sPart.Mid(pos + 2);
      pos = sPart.Find(_T("?"));
      if( pos >= 0 ) sPart = sPart.Left(pos);
      pos = sPart.Find(_T("#"));
      if( pos >= 0 ) sPart = sPart.Left(pos);

      sPart.TrimLeft(_T("/ "));
      sPart.Replace(_T('/'),_T('\\'));
      sPart.Replace(_T("%5c"), _T(" "));
      sPart.Replace(_T("%5C"), _T(" "));
      
      sPart = BfxUrlUnescape(sPart);

      if( pSession->m_Settings.m_bPrettyPath ) 
         PrettyPath(sPart);

      size_t x;

      static LPCTSTR aBlanks = _T("~!&");
      static LPCTSTR sRemoves = _T("*?:+,<>\"\'|");
      for( x = 0; x < _tcslen(aBlanks); x++ ) 
         sPart.Replace(aBlanks[x], _T(' '));
      for( x = 0; x < _tcslen(sRemoves); x++ ) 
         sPart.Remove(sRemoves[x]);

      sPart.TrimLeft();
      sPart.TrimRight();

      // Finally append the result (the part up until the URL filename)...
      pos = sPart.ReverseFind(_T('\\')); 
      if( pos > 0 && sPart.Right(5).FindOneOf(_T("\\.")) >= 0 )
         sPath += sPart.Left(pos);
      else
         sPath += sPart;

      sPath.Replace(_T("  "), _T(" "));
      sPath.Replace(_T("  "), _T(" "));

      sPath.TrimRight(_T("-.~'\\ "));
  
      ADDBACKSLASH(sPath);
   }

   int iSpecialTrick = pSession->m_Settings.m_iSpecialTrick;

   // SPECIAL TRICK #1: Use image filename from URL not HTML src
   if( iSpecialTrick == 3 ) {
      SpecialTrick3(pSession, sPath, sFName);
   }

   // Pretty print: All single-digit image filename gets formatted as 2-digits (1.jpg to 01.jpg)...
   if( pSession->m_Settings.m_bPrettyPath && m_bIsImage && sFName.GetLength() > 2 && sFName.GetAt(1) == '.' && _ttoi(sFName) > 0 ) {
      CString sTemp = static_cast<LPCTSTR>( sFName );
      sFName = _T("0") + sTemp;
   }

   // Always ahve filename extension
   if( !m_bIsImage && sFName.Find(_T('.')) < 0 )
      sFName += _T(".html");

   // Filename too long?
   int retries = 0;
   while( (sPath + sFName).GetLength() >= MAX_PATH - 20 && ++retries < 10 && sPath.GetLength() > 100 )
      sPath = sPath.Left(sPath.GetLength() - 10);
   while( (sPath + sFName).GetLength() >= MAX_PATH - 20 && ++retries < 20 && sPath.Find(_T('\\')) > 3 )
      sPath = sPath.Left(sPath.ReverseFind(_T('\\')));
   while( (sPath + sFName).GetLength() >= MAX_PATH - 20 && ++retries < 30 && sFName.GetLength() > 30 )
      sFName = sFName.Left(40) + sFName.Mid(sFName.ReverseFind(_T('.')));

   sPath.TrimRight();

   ADDBACKSLASH(sPath);

   // If file does not exists, then we can safely create a new...
   if( BfxFileExists(sPath + sFName) ) 
   {
      // It is a duplicate!!!
      switch( pSession->m_Settings.m_Duplicates ) {
      case DUP_SKIP:
         // Skip duplicated images.
         // We still need to re-download HTML files to process any links 
         // embedded in it!
         if( !BfxHasValidExtension(m_sURL, HTML_EXTENSIONS, TRUE) )
            return FALSE;
         break;
      case DUP_OVERWRITE:
         // Overwrite is the default behaviour...
         break;
      case DUP_RENAME:
         {
            // Rename scenario... horror! We do a simple increment from a-z. If file
            // does not exists, then use it. Otherwise fail...
            CString sFile, sExt;  
            // First we need to seperate the filename and extension...
            pos = sFName.ReverseFind(_T('.'));
            if( pos < 0 ) return FALSE;
            sFile = sFName.Left(pos);
            sExt = sFName.Mid(pos);
            // ...then scan for a free filename entry...
            BOOL bFound = FALSE;
            for( TCHAR c=_T('a'); c<=_T('z'); c++ ) {
               CString sNewFilename;
               if( pSession->m_Settings.m_bPrefixRename )
                  sNewFilename.Format(_T("%c%s%s"), c, sFile, sExt);
               else
                  sNewFilename.Format(_T("%s%c%s"), sFile, c, sExt);
      
               if( !BfxFileExists(sPath + sNewFilename) ) {
                  sFName = sNewFilename;
                  bFound = TRUE;
                  break;
               }
            }
            if( !bFound ) return FALSE;
         }
         break;
      }   
   }

   sFilename = sPath + sFName;
   return TRUE;
}

BOOL CDownloadFile::ValidateFirstBuffer(CSession *pSession,
                                        const CString& sFilename,
                                        LPBYTE pBuffer, DWORD dwSize)
// Validate the first downloaded buffer (usually 1024 bytes).
// We check for common problems, such as empty page returned, images
// failed to load (returns HTML page with error message) or animated gif
// banners.
{
   ASSERT_VALID(pSession);
   ASSERT(sFilename);
   ASSERT(pBuffer);
   
   LPCTSTR pstrBuffer = (LPCTSTR) pBuffer;
   ASSERT(::IsBadReadPtr(pstrBuffer,dwSize)==FALSE);
   if( ::IsBadReadPtr(pstrBuffer,dwSize) ) return FALSE;
   
   // Not big enough to be a valid file at all!
   if( dwSize < 8 ) {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_EMPTYFILE);
      return FALSE;
   }

   // Only images are really checked from this point...
   // We assume everything else are valid HTML/download files.
   if( !m_bIsImage ) 
      return TRUE; 
   if( sFilename.Find(_T(".txt")) >= 0 )
      return TRUE;

   // Copy the buffer into a string. Make sure we
   // do nothing wrong and pad 0-bytes with spaces.
   CString s;
   LPTSTR p = s.GetBuffer(dwSize+1);
   DWORD dwMaxFirstTag = min(dwSize, 2000);
   for( DWORD i = 0; i < dwMaxFirstTag; i++ ) {
      p[i] = ( pstrBuffer[i] == '\0' ? _T(' ') : pstrBuffer[i] );
   }
   p[dwSize] = _T('\0');
   s.ReleaseBuffer(dwSize+1);
   s.MakeLower();
   if( s.Find(_T("<html")) >= 0 
       || s.Find(_T("<head>")) >= 0 
       || s.Find(_T("<body")) >= 0 
       || s.Find(_T("<frameset ")) >= 0 
       || s.Find(_T("<script")) >= 0 
       || s.Find(_T("<meta ")) >= 0 )
   {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_BADIMAGE);
      return FALSE; 
   }

   // Is it an executable (virus)?
   if( s.Find(_T("mz")) == 0 ) {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_BADIMAGE);
      return FALSE; 
   }

   // It's a GIF. Check for animated GIFs -> banner!
   if( pSession->m_Type == TYPE_HTMLSCAN
       && pSession->m_Settings.m_bSkipBanners ) 
   {
      if( (s.Find(_T("gif98a")) == 0 
           || s.Find(_T("gif89a")) == 0)
          && s.Find(_T("netscape2.0")) > 0 ) 
      {
         ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_BANNER);
         return FALSE;
      }
   }

   // Well, couldn't find anything wrong. Let's continue...
   return TRUE;
}

BOOL CDownloadFile::ValidateURL(CSession *pSession,
                                const CString& sServer, const CString& sPage)
// Validate that the download URL is valid.
// If it's an obvious banner url, then refuse.
{
   ASSERT_VALID(pSession);

   static LPCTSTR szBannerProviders[] = {
      _T("doubleclick"),
      _T("sitemeter"),
      _T("adclick"),
      _T("ban/"),
      _T("-ads/"),
      _T("/ad."),
      _T(".ad/"),
      _T("click2net"),
      _T("click-"),
      _T("click."),
      _T("click/"),
      _T("exchange"),
      _T("newads"),
      _T("adswap"),
      _T("paid"),
      _T("thumb"),
      _T("thmb"),
      _T("/thn_"),
      _T("/tn_"),
      _T("/th_"),
      _T("/TN_"),
      _T("/TH_"),
      _T("_t."),
      _T("/t/"),
      _T("/th/"),
      _T("/tn/"),
      _T("mini/"),
      _T("avatar/"),
      _T("/small"),
      _T("_logo."),
      _T("join"),
      _T("banner"),
      _T("button"),
      _T("counter"),
      _T("traffic"),
      _T("javascript"),
      _T("vbscript"),
      _T("blank"),
      _T("spacer"),
      _T("menu-"),
      NULL };
   static LPCTSTR szImageProviders[] = {
      _T("imagevenue"),
      _T("imagehosting"),
      _T("imagehigh"),
      _T("imagepile"),
      _T("imageshack"),
      _T("jpghosting"),
      _T("freeimagehosting"),
      _T("photobucket"),
      _T("picsserver"),
      _T("picserver"),
      _T("img."),
      _T("image."),
      _T("images."),
      _T("gallery."),
      NULL };

   CString sServerLower = sServer;
   CString sPageLower = sPage;
   sServerLower.MakeLower();
   sPageLower.MakeLower();

   CString sUrlLower = pSession->m_sURL;
   sUrlLower.MakeLower();

   // Populate the image-provider and banner-provider lists
   if( pSession->m_pPreferences->m_aBannerProviders.GetSize() == 0 )
   {
      LPCTSTR* p;
      p = szBannerProviders;
      while( *p!=NULL ) { pSession->m_pPreferences->m_aBannerProviders.Add(CString(*p)); p++; }
      p = szImageProviders;
      while( *p!=NULL ) { pSession->m_pPreferences->m_aImageProviders.Add(CString(*p)); p++; }
      // Load from .ini file
      CString sIniFile = BfxGetAppPath() + _T("WebPageLoader.ini");
      CString sKey;
      int i;
      for( i = 1; ; i++ ) {
         TCHAR szValue[200] = { 0 };
         sKey.Format(_T("%d"), i);
         ::GetPrivateProfileString(_T("BannerProviders"), sKey, _T(""), szValue, (sizeof(szValue) / sizeof(szValue[0])) - 1, sIniFile);
         if( _tcslen(szValue) == 0 ) break;
         pSession->m_pPreferences->m_aBannerProviders.Add(CString(szValue));
      }
      for( i = 1; ; i++ ) {
         TCHAR szValue[200] = { 0 };
         sKey.Format(_T("%d"), i);
         ::GetPrivateProfileString(_T("ImageProviders"), sKey, _T(""), szValue, (sizeof(szValue) / sizeof(szValue[0])) - 1, sIniFile);
         if( _tcslen(szValue) == 0 ) break;
         pSession->m_pPreferences->m_aImageProviders.Add(CString(szValue));
      }
   }

   // Known image-provider? Then ignore domain limits set by configuration.
   BOOL bImageProviderHost = FALSE;
   for( int i = 0; i < pSession->m_pPreferences->m_aImageProviders.GetSize(); i++ ) {
      if( sServerLower.Find(pSession->m_pPreferences->m_aImageProviders.GetAt(i)) >= 0 )
         bImageProviderHost = TRUE;      
   }

   // Are we stepping outside the original session domain?
   if( !bImageProviderHost 
       && pSession->m_Settings.m_bPreventOutsideDomain 
       && sServer.CompareNoCase(pSession->m_Info.m_sServer) != 0 ) 
   {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_OUTSIDEDOMAIN);
      return FALSE;
   }

   // Are we stepping outside the original session URL?
   if( !bImageProviderHost 
       && pSession->m_Settings.m_bPreventOutsideURL ) 
   {
      int iPos;
      CString sFirst = sPage;
      CString sSecond = pSession->m_Info.m_sPage;
      iPos = sFirst.ReverseFind(_T('/'));
      if( iPos > 0 ) sFirst = sFirst.Left(iPos);
      iPos = sSecond.ReverseFind(_T('/'));
      if( iPos > 0 ) sSecond = sSecond.Left(iPos);
      if( sServer.CompareNoCase(pSession->m_Info.m_sServer) != 0 
          || sFirst != sSecond ) 
      {
         ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_OUTSIDEURL);
         return FALSE;
      }
   }

   // Can we recognize a banner provider?
   if( pSession->m_Type == TYPE_HTMLSCAN
       && pSession->m_Settings.m_bSkipBanners ) 
   {
      for( int i=0; i<pSession->m_pPreferences->m_aBannerProviders.GetSize(); i++ ) {
         if( sPageLower.Find(pSession->m_pPreferences->m_aBannerProviders.GetAt(i)) >= 0 
             && sUrlLower.Find(pSession->m_pPreferences->m_aBannerProviders.GetAt(i)) < 0 ) 
         {
            ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_BANNER);
            return FALSE; // Found a banner provider!
         }
      }
   }

   // Does it have a "must contain" clause?
   // NOTE: This in on the URL (not just the pagename, but the whole
   //       after-server-is-appended string)
   if( m_bIsImage 
       && pSession->m_Settings.m_bUseImageFileNameFilter ) 
   {
      if( MatchPatterns(sServer + sPage, pSession->m_Settings.m_sImageFileNameFilter) == FALSE ) {
         ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_NOPATTERNMATCH);
         return FALSE;
      }
   }

   // Does it have a "must contain" clause?
   // BUG: We do this on the page (not the filename, but the whole
   //      after-server-is-appended string)
   if( !m_bIsImage 
       && pSession->m_Settings.m_bUseHtmlFileNameFilter 
       && m_nLevel > 1 ) 
   {
      if( MatchPatterns(sServer + sPage, pSession->m_Settings.m_sHtmlFileNameFilter) == FALSE ) {
         ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_NOPATTERNMATCH);
         return FALSE;
      }
   }

   // Does it have an exclude filter?
   if( pSession->m_Settings.m_bUseExcludeFileNameFilter
       && m_nLevel > 1 ) 
   {
      if( MatchPatterns(sServer + sPage, pSession->m_Settings.m_sExcludeFileNameFilter) == TRUE ) {
         ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_NOPATTERNMATCH);
         return FALSE;
      }
   }

   return TRUE;
}

BOOL CDownloadFile::ValidateDownloadedFile(CSession *pSession, 
                                           const CString& sFilename,
                                           DWORD dwFileSize,
                                           LPCTSTR strURL)
// Make some basic tests on the completely downloaded file.
// We should see if the download was broken or too small.
{   
   // Check if file was stopped/paused during download
   if( pSession->m_bSleepRequest || pSession->m_bStopRequest || pSession->m_bKillRequest ) {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_STOPPED);
      return FALSE;
   }

   // Check if file was downloaded at all?
   if( dwFileSize < 8 ) {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_EMPTYFILE);
      return FALSE;
   }
   
   // Check if file has valid size. If file-size filter is on, we
   // check against this.
   if( m_bIsImage && 
       pSession->m_Settings.m_bUseFileSizeFilter && 
       (long)(dwFileSize/1024) < pSession->m_Settings.m_iFileSizeFilter ) 
   {
      ReportErrorState(pSession, FILESTATE_SKIPPED, IDS_LOG_NOTBIGENOUGH);
      return FALSE;
   }

   return TRUE;
}

BOOL CDownloadFile::DoParseFile(CSession *pSession, CMemFile& fMem)
// This function parses an HTML file for links, images and stuff.
// WILL LOCK THE SESSION RIGHT BEFORE USING IT!
{
   // We can only parse HTML content...
   if( m_bIsImage ) 
      return TRUE;
   if( !BfxHasValidExtension(m_sURL, HTML_EXTENSIONS, TRUE) )
      return TRUE;

   TRACE(_T("Parse File: %s\n"), m_sURL);

   DWORD dwSessionID = pSession->m_iUniqueID;
   int iSpecialTrick = pSession->m_Settings.m_iSpecialTrick;

   m_State = FILESTATE_PARSING;

   pSession->PostUpdate(WM_REFRESHNODES, dwSessionID);

   // Read the file into a large string buffer
   CString sTxt;
   UINT nLen = fMem.GetLength(); 
   fMem.SeekToBegin();
   fMem.Read(sTxt.GetBuffer(nLen), nLen);
   sTxt.ReleaseBuffer(nLen);

   CString sBegin = sTxt.Left(200);
   CString sEmpty;

   if( !sBegin.IsEmpty() && (sBegin.Find(_T("<html")) >= 0 || sBegin.Find(_T("<HTML")) >= 0 || sBegin.Find(_T("<!DOCTYPE ")) >= 0) )
   {
      m_bIsHtml = TRUE;

      CString sValue;
      CString sTag;
      int pos = 0;
      int endpos = 0;
      while( FindToken(sTxt, pos, sValue, sTag, endpos) ) {
         sValue = ExtractURL(sValue, m_sURL);
        
         if( !sValue.IsEmpty() )
            AddUrl(pSession, sValue, sTag);
         
         if( iSpecialTrick == 1 && sValue.Find(_T("001.")) > 0 )
            SpecialTrick1(pSession, sTxt, sValue);
         
         pos = endpos;
      }

      // SPECIAL TRICK #1
      if( iSpecialTrick == 1 && sTxt.Find(_T("<b>Author</b>")) < 0 )
         m_bSkipStoreFile = TRUE;

      // SPECIAL TRICK #2
      if( iSpecialTrick == 2 && CrackCode(sTxt, sValue) ) {
         sValue = ExtractURL(sValue, m_sURL);
         AddUrl(pSession, sValue, sEmpty);
      }
   }

   m_State = FILESTATE_DONE;

   return TRUE;
}

BOOL CDownloadFile::DoWriteFile(CSession* pSession, const CString& sPath, const CString& sFilename, CMemFile& fMem)
{
   TRACE(_T("Write File: %s\n"), sFilename);

   CDir dir;
   dir.Create(sPath);

   BOOL bOk = FALSE;

   LPBYTE pData = NULL;

   TRY
   {
      CFile file;
      bOk = file.Open(sFilename, CFile::modeCreate|CFile::modeWrite|CFile::typeBinary|CFile::shareExclusive);
      if( bOk ) {
         DWORD nLen = fMem.GetLength();
         pData = fMem.Detach();
         file.Write(pData, nLen);
         file.Close();
      }
   }
   CATCH_ALL(e)
   {
      ReportErrorState(pSession, FILESTATE_ERROR, IDS_LOG_FILEWRITE);
      bOk = FALSE;
   }
   END_CATCH_ALL

   if( pData != NULL )
      free(pData);  // CMemFile does malloc/free      

   return bOk;
}

BOOL CDownloadFile::AddUrl(CSession *pSession, const CString& sValue, const CString& sTag)
// We have a download URL string?
{ 
   if( sValue.IsEmpty() ) 
      return FALSE;

   // Check if we want to add this kind of file?
   bool bAdd = false;
   bool bIsHTML = true;
   if( BfxHasValidExtension(sValue, HTML_EXTENSIONS, TRUE) ) {
      bAdd = true;
   }
   else if( BfxHasValidExtension(sValue, pSession->m_Settings.m_sFileExtensions, FALSE) ) {
      bAdd = true;
      bIsHTML = false;
   }

   // Add this item?
   if( !bAdd )
      return FALSE;
   
   if( pSession->m_Settings.m_bPreventRepeating ) {
      CSessionHashLock lock(pSession);
      char output;
      if( pSession->m_FilesHash.Lookup(sValue, output) ) 
         return FALSE;
   }

   // Make sure we don't nest too deep
   if( m_nLevel >= pSession->m_Settings.m_nDownloadLevel ) 
      return FALSE;
   
   // ...with different rules for HTML pages
   if( bIsHTML && pSession->m_Settings.m_bDontKeepHTML && m_nLevel + 1 >= pSession->m_Settings.m_nDownloadLevel ) 
      return FALSE;

   // Is it an image?
   BOOL bIsImage = BfxHasValidExtension(sValue, pSession->m_Settings.m_sFileExtensions, FALSE);
   
   // Really an image?
   if( sValue.Find(_T(".cgi")) >= 0 ) 
      bIsImage = FALSE;
   if( BfxHasValidExtension(sValue, HTML_EXTENSIONS, TRUE) )
      bIsImage = FALSE;

   if( bIsImage 
       && pSession->m_Settings.m_bUseImageTagFilter
       && MatchPatterns(sTag, pSession->m_Settings.m_sImageTagFilter) == FALSE )
         return FALSE;

   CSessionFilesLock lock1(pSession);
   CSessionHashLock  lock2(pSession);

   // Yes, it was a good file. Add it to queue...
   CDownloadFile *pFile = new CDownloadFile();
   pFile->Create(
      pSession,
      sValue,
      m_sURL,
      bIsImage,
      m_nLevel + 1);
   pSession->m_Files.AddTail(pFile);
   pSession->m_FilesHash.SetAt(sValue, 1);

   return TRUE;
};

void CDownloadFile::SpecialTrick1(CSession* pSession, const CString& sHtml, const CString& sUrl)
{
   int iMax = 52;
   if( sHtml.Find(_T("052.")) > 0 ) 
      iMax = 102;
   if( sHtml.Find(_T("102.")) > 0 ) 
      iMax = 202;
   if( sHtml.Find(_T("202.")) > 0 ) 
      iMax = 302;
   if( sHtml.Find(_T("302.")) > 0 ) 
      iMax = 402;
   CString sCounter;
   for( int i = 0; i < iMax; i++ ) {
      CString sNewUrl = sUrl;
      sCounter.Format(_T("%03d."), i);
      sNewUrl.Replace(_T("001."), sCounter);
      AddUrl(pSession, sNewUrl, "");
   }
}

void CDownloadFile::SpecialTrick3(CSession* pSession, CString& sPath, CString& sFName)
{
   CString sOldPath = sPath;
   CString sOldFName = sFName;

   // Remove URL ending with "/images/1/100/"...
   sPath.TrimRight("1234567890\\/.");

   CString sPart = sFName;
   sPart.Replace(_T(".jpg"), _T(""));
   sPart.Replace(_T(".png"), _T(""));
   BOOL bIsSimpleFilename = sPart.FindOneOf(_T("ghijkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_")) < 0;

   // Take image filename from last path components...
   if( m_bIsImage && bIsSimpleFilename ) {
      CString sNamePart;
      sNamePart = sOldPath.Mid(sPath.GetLength());
      sNamePart.Replace('\\', '-');
      sNamePart.Replace('/', '-');
      sNamePart.TrimLeft(_T(".-"));
      sNamePart.TrimRight(_T(".-"));
      // Cases:
      //   4.jpg
      //   432ba543a47989a23.jpg
      if( sOldFName.GetLength() < 8 )
         sFName.Format(_T("Img-%s-%s"), sNamePart, sOldFName);
      else
         sFName.Format(_T("Img-%s%s"), sNamePart, sOldFName.Right(4));
   }

   sPath.TrimRight();

   ADDBACKSLASH(sPath);
}

BOOL CDownloadFile::CrackCode(const CString &sTxt, CString &sResult)
{
   sResult = _T("");

   if( sTxt.Find(_T("return lD(")) > 0 )
   {
      int posStart = sTxt.Find(_T("return lD(")) + 10 + 1;
      int posEnd = sTxt.Find(_T("'"), posStart);
      if( posStart < 0 || posEnd < 0 ) return FALSE;
      CString sCodedUrl = sTxt.Mid(posStart, posEnd - posStart);
      int iOffset = sCodedUrl.GetAt(sCodedUrl.GetLength()-1) - _T('0');
      for( int i = 0; i < sCodedUrl.GetLength()-1; i++ ) {
         TCHAR ch = sCodedUrl.GetAt(i) - iOffset;
         sResult += ch;
      }
      return TRUE;
   }
   
   return FALSE;
};

BOOL CDownloadFile::FindToken(const CString& sTxt, int Pos, CString& sResult, CString& sTag, int& endpos)
// A function that scans the document from a particular position (the Pos argument)
// for known HMTL tags.
// It then parses the tag to find known link attributes (e.g. HREF and IMG attributes)
// and extracts the string within.
{
   static LPCTSTR szTags[] = {
      _T("<a"),
      _T("<img"),
      _T("<area"),
      _T("<embed"),
      _T("<option"),
      _T("<table"),
      _T("<frame"),
      _T("<iframe"),
      _T("<td"),
      NULL };
   static LPCTSTR szAttribs[] = {
      _T(" data-src="),
      _T(" src="),
      _T(" href="),
      _T(" value="),
      _T(" background="),
      NULL };

   int minpos = sTxt.Find('<', Pos);
   if( minpos < 0 ) return FALSE;
   
   bool bFound = false;
   while( !bFound && minpos >= 0 ) {
      for( LPCTSTR* pTags = szTags; *pTags != NULL && !bFound; pTags++ ) {
         if( _tcsnicmp(const_cast<LPTSTR>(static_cast<LPCTSTR>(sTxt)) + minpos, *pTags, _tcslen(*pTags)) == 0 )
            bFound = true;
      }
      if( bFound ) break;
      minpos = sTxt.Find('<', minpos + 1);
   }
   if( !bFound ) return FALSE;

   endpos = sTxt.Find(_T('>'), minpos);
   if( endpos < 0 ) return FALSE;

   // From now on, we better return TRUE - even when
   // errors occur. This is because we have located the END tag
   // and we want to continue with the next tag!

   sResult.Empty();

   sTag = sTxt.Mid(minpos + 1, endpos - minpos);
   
   minpos = sTag.Find(_T(' '));
   bFound = false;
   while( !bFound && minpos >= 0 ) {
      for( LPCTSTR* pAttribs = szAttribs; *pAttribs != NULL && !bFound; pAttribs++ ) {
         if( _tcsnicmp(const_cast<LPTSTR>(static_cast<LPCTSTR>(sTag)) + minpos, *pAttribs, _tcslen(*pAttribs)) == 0 )
            bFound = true;
      }
      if( bFound ) break;
      minpos = sTag.Find(' ', minpos + 1);
   }
   if( !bFound ) return TRUE;

   minpos = sTag.Find('=', minpos);
   if( minpos < 0 ) return TRUE;

   if( sTag.GetLength() < minpos + 3 ) return TRUE;

   TCHAR chQuote = sTag[minpos + 1];
   int iOffset = 1;
   if( chQuote != '\'' && chQuote != '\"' ) 
      chQuote = ' ', iOffset = 0;   
   int epos = sTag.Find(chQuote, minpos + 1 + iOffset);
   if( epos < 0 ) 
      epos = sTag.Find('>', minpos);
   if( epos < 0 ) 
      return TRUE;

   sResult = sTag.Mid(minpos + 1 + iOffset, epos - minpos - 1 - iOffset);

   if( sResult.IsEmpty() ) return TRUE;   

//   TRACE("Found tag: %s\n", sResult.Left(500));

   return TRUE;
};

void CDownloadFile::PrettyPath(CString& sPart)
{
   // Path formatting
   sPart.Replace(_T("www."), _T(""));
   sPart.Replace(_T("img\\"), _T("\\"));
   sPart.Replace(_T("big\\"), _T("\\"));
   sPart.Replace(_T("Big\\"), _T("\\"));
   sPart.Replace(_T("img\\"), _T("\\"));
   sPart.Replace(_T("Img\\"), _T("\\"));
   sPart.Replace(_T("pic\\"), _T("\\"));
   sPart.Replace(_T("Pic\\"), _T("\\"));
   sPart.Replace(_T("pics\\"), _T("\\"));
   sPart.Replace(_T("Pics\\"), _T("\\"));
   sPart.Replace(_T("full\\"), _T("\\"));
   sPart.Replace(_T("Full\\"), _T("\\"));
   sPart.Replace(_T("image\\"), _T("\\"));
   sPart.Replace(_T("Image\\"), _T("\\"));
   sPart.Replace(_T("large\\"), _T("\\"));
   sPart.Replace(_T("Large\\"), _T("\\"));
   sPart.Replace(_T("images\\"), _T("\\"));
   sPart.Replace(_T("Images\\"), _T("\\"));
   sPart.Replace(_T("content\\"), _T("\\"));
   sPart.Replace(_T("Content\\"), _T("\\"));
   sPart.Replace(_T("gallery\\"), _T("\\"));
   sPart.Replace(_T("Gallery\\"), _T("\\"));
   sPart.Replace(_T("galleries\\"), _T("\\"));
   sPart.Replace(_T("Galleries\\"), _T("\\"));
   sPart.Replace(_T("bigimages\\"), _T("\\"));
   sPart.Replace(_T("BigImages\\"), _T("\\"));
   sPart.Replace(_T("%20"),_T(" "));

   while( sPart.Left(3) == _T("..\\") && sPart.GetLength() > 4 ) 
      sPart = sPart.Mid(4);

   // Capitalize words...
   bool bUpperNext = true;
   for( int i = 0; i < sPart.GetLength(); i++ ) {
      TCHAR ch = sPart.GetAt(i);
      if( bUpperNext ) {
         sPart.SetAt(i, _totupper(ch));
         bUpperNext = false;
      }
      else if( _tcschr(_T("_- \\/"), ch) != NULL )
         bUpperNext = true;
   }

   // Remove nasty characters...
   sPart.Replace(_T('_'), _T(' '));

   // Strip URL subpath for regular domains...
   int iPosDot = sPart.Find('.');
   int iPosDotCom = sPart.Find(_T(".com"));
   if( iPosDot > 0 && iPosDotCom > 0 && iPosDot < iPosDotCom )
      sPart = sPart.Mid(iPosDot + 1);
}

CString CDownloadFile::ExtractURL(CString sValue, const CString& sURL)
// This function takes a string from the FindToken() function and turns
// it into a valid URL.
// The string may be passed as a relative URL, or invalid! We
// must transform it to something that AfxParseURL() accepts.
{
   CString sValueLowerCase( sValue );
   sValueLowerCase.MakeLower();

   sValue.TrimRight();

   if( sValue.IsEmpty() ) 
      return CString();
   if( sValue.Left(1) == _T("#") ) 
      return CString();
   if( sValue.Find(_T("javascript")) >= 0 )
      return CString();

   CString sHost;
   CString sPage;
   DWORD dwType;
   INTERNET_PORT nPort = AFX_INET_SERVICE_HTTP;
   BOOL bRes = AfxParseURL( sURL, dwType, sHost, sPage, nPort );
   if( !bRes ) return CString();

   // We support HTTP, HTTPS and local FILE protocol
   if( (dwType != AFX_INET_SERVICE_HTTP) && (dwType != AFX_INET_SERVICE_HTTPS) && (dwType != AFX_INET_SERVICE_FILE) ) 
      return CString();

   CString sProtocol;
   if( dwType == AFX_INET_SERVICE_FILE ) 
      sProtocol = "file:";
   if( dwType == AFX_INET_SERVICE_HTTP ) 
      sProtocol = "http:";
   if( dwType == AFX_INET_SERVICE_HTTPS ) 
      sProtocol = "https:";
   
   CString sServer = sProtocol + _T("//") + sHost;

   int pos;

   // We're interested in the path here...
   CString sPath = sURL;
   pos = sPath.ReverseFind(_T('/'));
   if( pos>0 ) sPath = sPath.Left(pos);
   pos = sPath.ReverseFind(_T('\\'));
   if( pos>0 ) sPath = sPath.Left(pos);
   sPath += _T('/');

   // Look for known URL constructs and return fully qualified URL...
   sValue.Replace(_T('\\'), _T('/'));

   //sValue.Replace(_T("/."), _T("."));  // ??? Removed. Would prevent relative path resolve

   sValue.Replace(_T("&amp;"), _T("&"));
   sValue.Replace(_T("&lt;"), _T("<"));
   sValue.Replace(_T("&gt;"), _T(">"));
   
   if( sValue.Left(2) == _T("//") ) 
      return sProtocol + sValue;
   if( sValue.Left(1) == _T("/") ) 
      return sServer + sValue;
   if( sValueLowerCase.Left(7) == _T("http://") ) 
      return sValue;
   if( sValueLowerCase.Left(8) == _T("https://") ) 
      return sValue;
   if( sValue.Left(3) == _T("www") ) 
      return sValue;

   // It's a relative URL. Use WinINet library to resolve the URL.
   CString sResult;
   DWORD dwBufLen = MAX_URL_LEN - 1;
   bRes = ::InternetCombineUrl(sServer + sPage, sValue, sResult.GetBuffer(MAX_URL_LEN), &dwBufLen, ICU_BROWSER_MODE);
   sResult.ReleaseBuffer();
   if( !bRes ) return CString();

   return sResult;
};

bool CDownloadFile::IsBusy() const
{
   switch( m_State ) {
   case FILESTATE_CONNECTING:
   case FILESTATE_DOWNLOADING:
   case FILESTATE_PARSING:
      return true;
   }
   return false;
}

void CDownloadFile::ReportErrorState(CSession *pSession, DownloadState State, UINT nRes)
{
   if( m_State == State )
      return;

   m_State = State;

   LogType logtype = LOGTYPE_ERROR;
   if( State == FILESTATE_SKIPPED || State == FILESTATE_ALREADYTHERE )
      logtype = LOGTYPE_WARNING;
   if( nRes == IDS_LOG_SKIPPED || nRes == IDS_LOG_STOPPED || nRes == IDS_LOG_EMPTYFILE )
      logtype = LOGTYPE_LOG;
   pSession->Log(logtype, nRes, m_sURL);

   if( State == FILESTATE_SKIPPED || State == FILESTATE_ALREADYTHERE )
      pSession->m_Info.m_nFilesSkipped++;
   else
      pSession->m_Info.m_nFilesFailed++;
}

void CDownloadFile::Abort(BOOL& bWasOnline)
{
   if( m_pFile != NULL )
   {
      TRACE(_T("Aborting download!\n"));
      TRY
      {
         m_bSkipRequest = true;

         m_pFile->Abort();

         bWasOnline = TRUE;
      }
      CATCH_ALL(e)
      {
         TRACE(_T("Download abort failed!!!\n"));
      }
      END_CATCH_ALL
   }
}

