Polyphase Game Engine
Loading...
Searching...
No Matches
TinyLLMTokenizerAsset Class Reference

Asset containing tokenizer vocabulary for TinyLLM models. More...

#include <TinyLLMTokenizerAsset.h>

Inheritance diagram for TinyLLMTokenizerAsset:
Asset Object

Classes

struct  TokenIndex
 

Public Member Functions

 DECLARE_ASSET (TinyLLMTokenizerAsset, Asset)
 
 TinyLLMTokenizerAsset ()
 
virtual ~TinyLLMTokenizerAsset ()
 
virtual void Create () override
 
virtual void Destroy () override
 
virtual void LoadStream (Stream &stream, Platform platform) override
 
virtual void SaveStream (Stream &stream, Platform platform) override
 
virtual bool Import (const std::string &path, ImportOptions *options) override
 
virtual void GatherProperties (std::vector< Property > &outProps) override
 
virtual glm::vec4 GetTypeColor () override
 
virtual const char * GetTypeName () override
 
virtual const char * GetTypeImportExt () override
 
const std::vector< std::string > & GetVocab () const
 
const std::vector< float > & GetVocabScores () const
 
uint32_t GetMaxTokenLength () const
 
int32_t GetVocabSize () const
 
int StrLookup (const char *str)
 
void Encode (const char *text, bool addBos, bool addEos, std::vector< int32_t > &outTokens)
 
std::string Decode (int prevToken, int token)
 
- Public Member Functions inherited from Asset
 DECLARE_FACTORY_MANAGER (Asset)
 
 DECLARE_FACTORY (Asset, Asset)
 
 DECLARE_OBJECT (Asset, Object)
 
 Asset ()
 
virtual ~Asset ()
 
virtual void Copy (Asset *srcAsset)
 
const std::string & GetName () const
 
bool IsLoaded () const
 
bool IsRefCounted () const
 
int32_t GetRefCount () const
 
bool IsEmbedded () const
 
void SetEmbedded (bool embed)
 
bool IsEngineAsset () const
 
void SetEngineAsset (bool engineAsset)
 
void SetName (const std::string &name)
 
void IncrementRefCount ()
 
void DecrementRefCount ()
 
void LoadFile (const char *path, AsyncLoadRequest *request=nullptr)
 
void LoadEmbedded (const EmbeddedFile *embeddedAsset, AsyncLoadRequest *request=nullptr)
 
void SaveFile (const char *path, Platform platform)
 
virtual bool ShouldSnapshotForPie () const
 
bool IsTransient () const
 
void SetTransient (bool transient)
 
uint64_t GetUuid () const
 
void SetUuid (uint64_t uuid)
 
void EnsureUuid ()
 
void WriteHeader (Stream &stream)
 
- Public Member Functions inherited from Object
virtual ~Object ()=default
 
virtual const char * RuntimeName () const =0
 
virtual const char * RuntimeParentName () const =0
 
virtual RuntimeId InstanceRuntimeId () const =0
 
virtual ObjectQueryInterface (RuntimeId id) const
 
virtual bool Is (RuntimeId id) const
 
virtual bool Is (const char *name) const
 
virtual bool DrawCustomProperty (Property &prop)
 
template<typename T >
T * As () const
 
virtual bool Equals (const Object *rhs) const
 

Protected Member Functions

void BuildSortedVocab ()
 
void InitBytePieces ()
 

Static Protected Member Functions

static bool HandlePropChange (Datum *datum, uint32_t index, const void *newValue)
 

Protected Attributes

std::vector< std::string > mVocab
 
std::vector< float > mVocabScores
 
uint32_t mMaxTokenLength = 0
 
std::vector< TokenIndexmSortedVocab
 
bool mSortedVocabBuilt = false
 
char mBytePieces [512]
 
- Protected Attributes inherited from Asset
uint32_t mVersion = 0
 
TypeId mType = INVALID_TYPE_ID
 
uint64_t mUuid = 0
 
bool mEmbedded = false
 
bool mLoaded = false
 
bool mEnableRefCount = true
 
bool mEngineAsset = false
 
bool mTransient = false
 
std::string mName = "Asset"
 
int32_t mRefCount = 0
 

Additional Inherited Members

- Static Public Member Functions inherited from Asset
static AssetHeader ReadHeader (Stream &stream)
 
static std::string GetNameFromPath (const std::string &path)
 
static std::string GetDirectoryFromPath (const std::string &path)
 
static const char * GetNameFromTypeId (TypeId id)
 
static TypeId GetTypeIdFromName (const char *name)
 
- Static Public Member Functions inherited from Object
static const char * ClassRuntimeName ()
 

Detailed Description

Asset containing tokenizer vocabulary for TinyLLM models.

Constructor & Destructor Documentation

◆ TinyLLMTokenizerAsset()

TinyLLMTokenizerAsset::TinyLLMTokenizerAsset ( )

◆ ~TinyLLMTokenizerAsset()

TinyLLMTokenizerAsset::~TinyLLMTokenizerAsset ( )
virtual

Member Function Documentation

◆ BuildSortedVocab()

void TinyLLMTokenizerAsset::BuildSortedVocab ( )
protected

◆ Create()

void TinyLLMTokenizerAsset::Create ( )
overridevirtual

Reimplemented from Asset.

◆ DECLARE_ASSET()

TinyLLMTokenizerAsset::DECLARE_ASSET ( TinyLLMTokenizerAsset  ,
Asset   
)

◆ Decode()

std::string TinyLLMTokenizerAsset::Decode ( int  prevToken,
int  token 
)

◆ Destroy()

void TinyLLMTokenizerAsset::Destroy ( )
overridevirtual

Reimplemented from Asset.

◆ Encode()

void TinyLLMTokenizerAsset::Encode ( const char *  text,
bool  addBos,
bool  addEos,
std::vector< int32_t > &  outTokens 
)

◆ GatherProperties()

void TinyLLMTokenizerAsset::GatherProperties ( std::vector< Property > &  outProps)
overridevirtual

Reimplemented from Asset.

◆ GetMaxTokenLength()

uint32_t TinyLLMTokenizerAsset::GetMaxTokenLength ( ) const
inline

◆ GetTypeColor()

glm::vec4 TinyLLMTokenizerAsset::GetTypeColor ( )
overridevirtual

Reimplemented from Asset.

◆ GetTypeImportExt()

const char * TinyLLMTokenizerAsset::GetTypeImportExt ( )
overridevirtual

Reimplemented from Asset.

◆ GetTypeName()

const char * TinyLLMTokenizerAsset::GetTypeName ( )
overridevirtual

Reimplemented from Asset.

◆ GetVocab()

const std::vector< std::string > & TinyLLMTokenizerAsset::GetVocab ( ) const
inline

◆ GetVocabScores()

const std::vector< float > & TinyLLMTokenizerAsset::GetVocabScores ( ) const
inline

◆ GetVocabSize()

int32_t TinyLLMTokenizerAsset::GetVocabSize ( ) const
inline

◆ HandlePropChange()

bool TinyLLMTokenizerAsset::HandlePropChange ( Datum datum,
uint32_t  index,
const void *  newValue 
)
staticprotected

◆ Import()

bool TinyLLMTokenizerAsset::Import ( const std::string &  path,
ImportOptions options 
)
overridevirtual

Reimplemented from Asset.

◆ InitBytePieces()

void TinyLLMTokenizerAsset::InitBytePieces ( )
protected

◆ LoadStream()

void TinyLLMTokenizerAsset::LoadStream ( Stream stream,
Platform  platform 
)
overridevirtual

Reimplemented from Asset.

◆ SaveStream()

void TinyLLMTokenizerAsset::SaveStream ( Stream stream,
Platform  platform 
)
overridevirtual

Reimplemented from Asset.

◆ StrLookup()

int TinyLLMTokenizerAsset::StrLookup ( const char *  str)

Member Data Documentation

◆ mBytePieces

char TinyLLMTokenizerAsset::mBytePieces[512]
protected

◆ mMaxTokenLength

uint32_t TinyLLMTokenizerAsset::mMaxTokenLength = 0
protected

◆ mSortedVocab

std::vector<TokenIndex> TinyLLMTokenizerAsset::mSortedVocab
protected

◆ mSortedVocabBuilt

bool TinyLLMTokenizerAsset::mSortedVocabBuilt = false
protected

◆ mVocab

std::vector<std::string> TinyLLMTokenizerAsset::mVocab
protected

◆ mVocabScores

std::vector<float> TinyLLMTokenizerAsset::mVocabScores
protected

The documentation for this class was generated from the following files: