Polyphase Game Engine
Loading...
Searching...
No Matches
TinyLLMManager Class Reference

Singleton that manages LLM inference state. More...

#include <TinyLLMManager.h>

Classes

struct  ProbIndex
 

Public Member Functions

bool LoadModel (TinyLLMAsset *asset, int32_t maxSeqLen=0)
 
void UnloadModel ()
 
bool IsModelLoaded () const
 
TinyLLMAssetGetModel () const
 
void Reset ()
 
float * Forward (int32_t token, int32_t pos)
 
int32_t Sample (float temperature=1.0f, float topP=0.9f)
 
std::string Generate (const std::string &prompt, int32_t maxTokens, float temperature=1.0f, float topP=0.9f)
 
std::vector< int32_t > Encode (const std::string &text, bool addBos=true)
 
std::string Decode (int32_t prevToken, int32_t token)
 
bool BeginGenerate (const std::string &prompt, int32_t maxTokens, float temperature=1.0f, float topP=0.9f)
 
std::string ContinueGenerate ()
 
bool IsGenerating () const
 
void Abort ()
 
float GetLastTokPerSec () const
 
int32_t GetPosition () const
 
int32_t GetMaxSeqLen () const
 

Static Public Member Functions

static TinyLLMManagerGet ()
 
static void Create ()
 
static void Destroy ()
 
static int32_t GetDefaultMaxSeqLen ()
 

Detailed Description

Singleton that manages LLM inference state.

Member Function Documentation

◆ Abort()

void TinyLLMManager::Abort ( )

◆ BeginGenerate()

bool TinyLLMManager::BeginGenerate ( const std::string &  prompt,
int32_t  maxTokens,
float  temperature = 1.0f,
float  topP = 0.9f 
)

◆ ContinueGenerate()

std::string TinyLLMManager::ContinueGenerate ( )

◆ Create()

void TinyLLMManager::Create ( )
static

◆ Decode()

std::string TinyLLMManager::Decode ( int32_t  prevToken,
int32_t  token 
)

◆ Destroy()

void TinyLLMManager::Destroy ( )
static

◆ Encode()

std::vector< int32_t > TinyLLMManager::Encode ( const std::string &  text,
bool  addBos = true 
)

◆ Forward()

float * TinyLLMManager::Forward ( int32_t  token,
int32_t  pos 
)

◆ Generate()

std::string TinyLLMManager::Generate ( const std::string &  prompt,
int32_t  maxTokens,
float  temperature = 1.0f,
float  topP = 0.9f 
)

◆ Get()

TinyLLMManager * TinyLLMManager::Get ( )
static

◆ GetDefaultMaxSeqLen()

int32_t TinyLLMManager::GetDefaultMaxSeqLen ( )
static

◆ GetLastTokPerSec()

float TinyLLMManager::GetLastTokPerSec ( ) const

◆ GetMaxSeqLen()

int32_t TinyLLMManager::GetMaxSeqLen ( ) const

◆ GetModel()

TinyLLMAsset * TinyLLMManager::GetModel ( ) const

◆ GetPosition()

int32_t TinyLLMManager::GetPosition ( ) const

◆ IsGenerating()

bool TinyLLMManager::IsGenerating ( ) const

◆ IsModelLoaded()

bool TinyLLMManager::IsModelLoaded ( ) const

◆ LoadModel()

bool TinyLLMManager::LoadModel ( TinyLLMAsset asset,
int32_t  maxSeqLen = 0 
)

◆ Reset()

void TinyLLMManager::Reset ( )

◆ Sample()

int32_t TinyLLMManager::Sample ( float  temperature = 1.0f,
float  topP = 0.9f 
)

◆ UnloadModel()

void TinyLLMManager::UnloadModel ( )

The documentation for this class was generated from the following files: