#ifndef OBJTOOLS_DATA_LOADERS_WGS___WGSLOADER_IMPL__HPP
#define OBJTOOLS_DATA_LOADERS_WGS___WGSLOADER_IMPL__HPP

/*  $Id: wgsloader_impl.hpp 660413 2022-12-15 18:09:00Z vasilche $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author: Eugene Vasilchenko
 *
 * File Description: WGS file data loader
 *
 * ===========================================================================
 */


#include <corelib/ncbistd.hpp>
#include <corelib/ncbimtx.hpp>
#include <sra/data_loaders/wgs/wgsloader.hpp>
#include <sra/readers/sra/wgsread.hpp>
#include <util/limited_size_map.hpp>

BEGIN_NCBI_SCOPE

class CThreadNonStop;

BEGIN_SCOPE(objects)

class CDataLoader;
class CWGSDataLoader_Impl;
class CWGSSeqInfo;
class CWGSFileInfo;
class CWGSBlobId;
class CWGSResolver;

class CWGSFileInfo : public CObject
{
public:
    CWGSFileInfo();
    CWGSFileInfo(const CWGSDataLoader_Impl& impl,
                 CTempString prefix);

    void Open(const CWGSDataLoader_Impl& impl,
              CTempString prefix);
    
    CTempString GetWGSPrefix(void) const
        {
            return m_WGSPrefix;
        }

    struct SAccFileInfo {
        SAccFileInfo(void)
            : row_id(0),
              seq_type('\0'),
              version(-1)
            {
            }
        DECLARE_OPERATOR_BOOL_REF(file);

        bool IsContig(void) const {
            return seq_type == '\0';
        }
        bool IsScaffold(void) const {
            return seq_type == 'S';
        }
        bool IsProtein(void) const {
            return seq_type == 'P';
        }

        CWGSSeqIterator GetContigIterator(void) const;
        CWGSScaffoldIterator GetScaffoldIterator(void) const;
        CWGSProteinIterator GetProteinIterator(void) const;
        SAccFileInfo GetRootFileInfo(void) const;

        // set version if necessary
        bool ValidateAcc(const CTextseq_id& text_id);
        bool ValidateGi(TGi gi);

        bool IsMigrated(const CWGSProteinIterator& iter) const;
    
        CRef<CWGSFileInfo> file;
        TVDBRowId row_id;
        char seq_type; // '\0' - regular nuc, 'S' - scaffold, 'P' - protein
        int version;
    };

    bool FindGi(SAccFileInfo& info, TGi gi);
    bool FindProtAcc(SAccFileInfo& info, const CTextseq_id& text_id);

    const CWGSDb& GetDb(void) const
        {
            return m_WGSDb;
        }
    operator const CWGSDb&(void) const
        {
            return GetDb();
        }

    void LoadBlob(const CWGSBlobId& blob_id,
                  CTSE_LoadLock& load_lock) const;
    void LoadChunk(const CWGSBlobId& blob_id,
                   CTSE_Chunk_Info& chunk) const;

    CWGSSeqIterator GetContigIterator(const CWGSBlobId& blob_id) const;
    CWGSScaffoldIterator GetScaffoldIterator(const CWGSBlobId& blob_id) const;
    CWGSProteinIterator GetProteinIterator(const CWGSBlobId& blob_id) const;

protected:
    friend class CWGSDataLoader_Impl;

    void x_Initialize(const CWGSDataLoader_Impl& impl,
                      CTempString prefix);
    void x_InitMasterDescr(void);

    string m_WGSPrefix;
    CWGSDb m_WGSDb;
    CMutex m_Mutex;
};


class CWGSDataLoader_Impl : public CObject
{
public:
    explicit CWGSDataLoader_Impl(const CWGSDataLoader::SLoaderParams& params);
    ~CWGSDataLoader_Impl(void);

    CRef<CWGSFileInfo> GetWGSFile(const string& acc);

    CRef<CWGSFileInfo> GetFileInfo(const CWGSBlobId& blob_id);
    typedef CWGSFileInfo::SAccFileInfo SAccFileInfo;
    SAccFileInfo GetFileInfoByGi(TGi gi);
    SAccFileInfo GetFileInfoByProtAcc(const CTextseq_id& text_id);
    SAccFileInfo GetFileInfoByAcc(const CTextseq_id& text_id);
    SAccFileInfo GetFileInfoByGeneral(const CDbtag& dbtag);
    SAccFileInfo GetFileInfo(const CSeq_id_Handle& idh);

    CDataLoader::TTSE_LockSet GetRecords(CDataSource* data_source,
                                         const CSeq_id_Handle& idh,
                                         CDataLoader::EChoice choice);
    CRef<CWGSBlobId> GetBlobId(const CSeq_id_Handle& idh);
    CTSE_LoadLock GetBlobById(CDataSource* data_source,
                              const CWGSBlobId& blob_id);
    void LoadBlob(const CWGSBlobId& blob_id,
                  CTSE_LoadLock& load_lock);
    void LoadChunk(const CWGSBlobId& blob_id,
                   CTSE_Chunk_Info& chunk);

    typedef vector<CSeq_id_Handle> TIds;
    void GetIds(const CSeq_id_Handle& idh, TIds& ids);
    CDataLoader::SAccVerFound GetAccVer(const CSeq_id_Handle& idh);
    CDataLoader::SGiFound GetGi(const CSeq_id_Handle& idh);
    TTaxId GetTaxId(const CSeq_id_Handle& idh);
    TSeqPos GetSequenceLength(const CSeq_id_Handle& idh);
    CDataLoader::SHashFound GetSequenceHash(const CSeq_id_Handle& idh);
    CDataLoader::STypeFound GetSequenceType(const CSeq_id_Handle& idh);

    bool GetAddWGSMasterDescr(void) const
        {
            return m_AddWGSMasterDescr;
        }

    void SetAddWGSMasterDescr(bool flag)
        {
            m_AddWGSMasterDescr = flag;
        }
    
protected:
    friend class CWGSFileInfo;

    CWGSResolver& GetResolver(void);

private:
    // first:
    //   false if explicitly listed file in the loader params
    //   true if dynamically loaded SRA
    // second: SRA accession or wgs file path

    // WGS files by accession
    typedef map<string, CRef<CWGSFileInfo> > TFixedFiles;
    typedef limited_size_map<string, CRef<CWGSFileInfo> > TFoundFiles;

    // mutex guarding input into the map
    CMutex  m_Mutex;
    CVDBMgr m_Mgr;
    string  m_WGSVolPath;
    CRef<CWGSResolver> m_Resolver;
    unsigned m_UpdateDelay;
    CRef<CThreadNonStop> m_UpdateThread;
    TFixedFiles m_FixedFiles;
    TFoundFiles m_FoundFiles;
    bool m_AddWGSMasterDescr;
    bool m_ResolveGIs;
    bool m_ResolveProtAccs;
};


class CWGSBlobId : public CBlobId
{
public:
    explicit CWGSBlobId(CTempString str);
    explicit CWGSBlobId(const CWGSFileInfo::SAccFileInfo& info);
    ~CWGSBlobId(void);

    // wgs file name or SRR accession
    string m_WGSPrefix;
    char m_SeqType;
    TVDBRowId m_RowId;
    int m_Version;

    // string blob id representation:
    // eBlobType_annot_plain_id
    string ToString(void) const;
    void FromString(CTempString str);

    bool operator<(const CBlobId& id) const;
    bool operator==(const CBlobId& id) const;
};


END_SCOPE(objects)
END_NCBI_SCOPE

#endif  // OBJTOOLS_DATA_LOADERS_WGS___WGSLOADER_IMPL__HPP
