Singleton that manages LLM inference state. More...

#include <TinyLLMManager.h>

Classes
struct	ProbIndex

Public Member Functions
bool	LoadModel (TinyLLMAsset *asset, int32_t maxSeqLen=0)

void	UnloadModel ()

bool	IsModelLoaded () const

TinyLLMAsset *	GetModel () const

void	Reset ()

float *	Forward (int32_t token, int32_t pos)

int32_t	Sample (float temperature=1.0f, float topP=0.9f)

std::string	Generate (const std::string &prompt, int32_t maxTokens, float temperature=1.0f, float topP=0.9f)

std::vector< int32_t >	Encode (const std::string &text, bool addBos=true)

std::string	Decode (int32_t prevToken, int32_t token)

bool	BeginGenerate (const std::string &prompt, int32_t maxTokens, float temperature=1.0f, float topP=0.9f)

std::string	ContinueGenerate ()

bool	IsGenerating () const

void	Abort ()

float	GetLastTokPerSec () const

int32_t	GetPosition () const

int32_t	GetMaxSeqLen () const

Static Public Member Functions
static TinyLLMManager *	Get ()

static void	Create ()

static void	Destroy ()

static int32_t	GetDefaultMaxSeqLen ()

Detailed Description

Singleton that manages LLM inference state.

Member Function Documentation

◆ Abort()

void TinyLLMManager::Abort ( )

◆ BeginGenerate()

bool TinyLLMManager::BeginGenerate	(	const std::string &	prompt,
		int32_t	maxTokens,
		float	temperature = `1.0f`,
		float	topP = `0.9f`
	)

◆ ContinueGenerate()

std::string TinyLLMManager::ContinueGenerate ( )

◆ Create()

void TinyLLMManager::Create ( )

static

◆ Decode()

std::string TinyLLMManager::Decode	(	int32_t	prevToken,
		int32_t	token
	)

◆ Destroy()

void TinyLLMManager::Destroy ( )

static

◆ Encode()

std::vector< int32_t > TinyLLMManager::Encode	(	const std::string &	text,
		bool	addBos = `true`
	)

◆ Forward()

float * TinyLLMManager::Forward	(	int32_t	token,
		int32_t	pos
	)

◆ Generate()

std::string TinyLLMManager::Generate	(	const std::string &	prompt,
		int32_t	maxTokens,
		float	temperature = `1.0f`,
		float	topP = `0.9f`
	)

◆ Get()

TinyLLMManager * TinyLLMManager::Get ( )

static

◆ GetDefaultMaxSeqLen()

int32_t TinyLLMManager::GetDefaultMaxSeqLen ( )

static

◆ GetLastTokPerSec()

float TinyLLMManager::GetLastTokPerSec ( ) const

◆ GetMaxSeqLen()

int32_t TinyLLMManager::GetMaxSeqLen ( ) const

◆ GetModel()

TinyLLMAsset * TinyLLMManager::GetModel ( ) const

◆ GetPosition()

int32_t TinyLLMManager::GetPosition ( ) const

◆ IsGenerating()

bool TinyLLMManager::IsGenerating ( ) const

◆ IsModelLoaded()

bool TinyLLMManager::IsModelLoaded ( ) const

◆ LoadModel()

bool TinyLLMManager::LoadModel	(	TinyLLMAsset *	asset,
		int32_t	maxSeqLen = `0`
	)

◆ Reset()

void TinyLLMManager::Reset ( )

◆ Sample()

int32_t TinyLLMManager::Sample	(	float	temperature = `1.0f`,
		float	topP = `0.9f`
	)

◆ UnloadModel()

void TinyLLMManager::UnloadModel ( )

The documentation for this class was generated from the following files:

Engine/Source/Engine/TinyLLMManager.h
Engine/Source/Engine/TinyLLMManager.cpp

Classes

Public Member Functions

Static Public Member Functions

Detailed Description

Member Function Documentation

◆ Abort()

◆ BeginGenerate()

◆ ContinueGenerate()

◆ Create()

◆ Decode()

◆ Destroy()

◆ Encode()

◆ Forward()

◆ Generate()

◆ Get()

◆ GetDefaultMaxSeqLen()

◆ GetLastTokPerSec()

◆ GetMaxSeqLen()

◆ GetModel()

◆ GetPosition()

◆ IsGenerating()

◆ IsModelLoaded()

◆ LoadModel()

◆ Reset()

◆ Sample()

◆ UnloadModel()