Singleton that manages LLM inference state.
More...
#include <TinyLLMManager.h>
|
| bool | LoadModel (TinyLLMAsset *asset, int32_t maxSeqLen=0) |
| |
| void | UnloadModel () |
| |
| bool | IsModelLoaded () const |
| |
| TinyLLMAsset * | GetModel () const |
| |
| void | Reset () |
| |
| float * | Forward (int32_t token, int32_t pos) |
| |
| int32_t | Sample (float temperature=1.0f, float topP=0.9f) |
| |
| std::string | Generate (const std::string &prompt, int32_t maxTokens, float temperature=1.0f, float topP=0.9f) |
| |
| std::vector< int32_t > | Encode (const std::string &text, bool addBos=true) |
| |
| std::string | Decode (int32_t prevToken, int32_t token) |
| |
| bool | BeginGenerate (const std::string &prompt, int32_t maxTokens, float temperature=1.0f, float topP=0.9f) |
| |
| std::string | ContinueGenerate () |
| |
| bool | IsGenerating () const |
| |
| void | Abort () |
| |
| float | GetLastTokPerSec () const |
| |
| int32_t | GetPosition () const |
| |
| int32_t | GetMaxSeqLen () const |
| |
Singleton that manages LLM inference state.
◆ Abort()
| void TinyLLMManager::Abort |
( |
| ) |
|
◆ BeginGenerate()
| bool TinyLLMManager::BeginGenerate |
( |
const std::string & |
prompt, |
|
|
int32_t |
maxTokens, |
|
|
float |
temperature = 1.0f, |
|
|
float |
topP = 0.9f |
|
) |
| |
◆ ContinueGenerate()
| std::string TinyLLMManager::ContinueGenerate |
( |
| ) |
|
◆ Create()
| void TinyLLMManager::Create |
( |
| ) |
|
|
static |
◆ Decode()
| std::string TinyLLMManager::Decode |
( |
int32_t |
prevToken, |
|
|
int32_t |
token |
|
) |
| |
◆ Destroy()
| void TinyLLMManager::Destroy |
( |
| ) |
|
|
static |
◆ Encode()
| std::vector< int32_t > TinyLLMManager::Encode |
( |
const std::string & |
text, |
|
|
bool |
addBos = true |
|
) |
| |
◆ Forward()
| float * TinyLLMManager::Forward |
( |
int32_t |
token, |
|
|
int32_t |
pos |
|
) |
| |
◆ Generate()
| std::string TinyLLMManager::Generate |
( |
const std::string & |
prompt, |
|
|
int32_t |
maxTokens, |
|
|
float |
temperature = 1.0f, |
|
|
float |
topP = 0.9f |
|
) |
| |
◆ Get()
◆ GetDefaultMaxSeqLen()
| int32_t TinyLLMManager::GetDefaultMaxSeqLen |
( |
| ) |
|
|
static |
◆ GetLastTokPerSec()
| float TinyLLMManager::GetLastTokPerSec |
( |
| ) |
const |
◆ GetMaxSeqLen()
| int32_t TinyLLMManager::GetMaxSeqLen |
( |
| ) |
const |
◆ GetModel()
◆ GetPosition()
| int32_t TinyLLMManager::GetPosition |
( |
| ) |
const |
◆ IsGenerating()
| bool TinyLLMManager::IsGenerating |
( |
| ) |
const |
◆ IsModelLoaded()
| bool TinyLLMManager::IsModelLoaded |
( |
| ) |
const |
◆ LoadModel()
| bool TinyLLMManager::LoadModel |
( |
TinyLLMAsset * |
asset, |
|
|
int32_t |
maxSeqLen = 0 |
|
) |
| |
◆ Reset()
| void TinyLLMManager::Reset |
( |
| ) |
|
◆ Sample()
| int32_t TinyLLMManager::Sample |
( |
float |
temperature = 1.0f, |
|
|
float |
topP = 0.9f |
|
) |
| |
◆ UnloadModel()
| void TinyLLMManager::UnloadModel |
( |
| ) |
|
The documentation for this class was generated from the following files: