26 virtual void Create()
override;
27 virtual void Destroy()
override;
37 const std::vector<std::string>&
GetVocab()
const {
return mVocab; }
47 int StrLookup(
const char* str);
48 void Encode(
const char* text,
bool addBos,
bool addEos, std::vector<int32_t>& outTokens);
49 std::string Decode(
int prevToken,
int token);
53 static bool HandlePropChange(
Datum* datum, uint32_t index,
const void* newValue);
55 void BuildSortedVocab();
56 void InitBytePieces();
61 uint32_t mMaxTokenLength = 0;
65 bool mSortedVocabBuilt =
false;
68 char mBytePieces[512];
Platform
Definition EngineTypes.h:31
#define POLYPHASE_API
Definition PolyphaseAPI.h:31
virtual bool Import(const std::string &path, ImportOptions *options=nullptr)
Definition Asset.cpp:244
virtual void Create()
Definition Asset.cpp:77
virtual void SaveStream(Stream &stream, Platform platform)
Definition Asset.cpp:236
virtual glm::vec4 GetTypeColor()
Definition Asset.cpp:254
virtual const char * GetTypeImportExt()
Definition Asset.cpp:264
virtual const char * GetTypeName()
Definition Asset.cpp:259
virtual void GatherProperties(std::vector< Property > &outProps) override
Definition Asset.cpp:249
virtual void LoadStream(Stream &stream, Platform platform)
Definition Asset.cpp:222
virtual void Destroy()
Definition Asset.cpp:87
Asset containing tokenizer vocabulary for TinyLLM models.
Definition TinyLLMTokenizerAsset.h:17
std::vector< std::string > mVocab
Definition TinyLLMTokenizerAsset.h:59
uint32_t GetMaxTokenLength() const
Definition TinyLLMTokenizerAsset.h:39
std::vector< float > mVocabScores
Definition TinyLLMTokenizerAsset.h:60
const std::vector< float > & GetVocabScores() const
Definition TinyLLMTokenizerAsset.h:38
DECLARE_ASSET(TinyLLMTokenizerAsset, Asset)
const std::vector< std::string > & GetVocab() const
Definition TinyLLMTokenizerAsset.h:37
int32_t GetVocabSize() const
Definition TinyLLMTokenizerAsset.h:40
std::vector< TokenIndex > mSortedVocab
Definition TinyLLMTokenizerAsset.h:64
Definition TinyLLMTokenizerAsset.h:43
int id
Definition TinyLLMTokenizerAsset.h:45
const char * str
Definition TinyLLMTokenizerAsset.h:44