Polyphase-Engine/api/TinyLLMInference_8h_source.html

#pragma once


#include "Assets/TinyLLMAsset.h"  // For TinyLLMConfig

#include <stdint.h>

#include <stddef.h>


#ifdef __cplusplus

extern "C" {

#endif


void tinyllm_rmsnorm(float* o, float* x, float* weight, int size);


void tinyllm_softmax(float* x, int size);


void tinyllm_matmul(float* xout, float* x, float* w, int n, int d);


float* tinyllm_forward(

    float* x, float* xb, float* xb2, float* hb, float* hb2,

    float* q, float* key_cache, float* value_cache, float* att, float* logits,

    float* token_emb, float* rms_att, float* wq, float* wk, float* wv, float* wo,

    float* rms_ffn, float* w1, float* w2, float* w3, float* rms_final, float* wcls,

    const TinyLLMConfig* config,

    int token, int pos

);


size_t tinyllm_calc_runstate_size(const TinyLLMConfig* config, int max_seq_len);


size_t tinyllm_calc_weights_size(const TinyLLMConfig* config);


#ifdef __cplusplus

}

#endif

TinyLLMAsset.h
Asset type for ultra-tiny LLM models (llama2.c format).

tinyllm_softmax
void tinyllm_softmax(float *x, int size)
In-place softmax.
Definition TinyLLMInference.cpp:26

tinyllm_matmul
void tinyllm_matmul(float *xout, float *x, float *w, int n, int d)
Matrix-vector multiplication: W (d,n) @ x (n,) -> xout (d,)
Definition TinyLLMInference.cpp:46

tinyllm_rmsnorm
void tinyllm_rmsnorm(float *o, float *x, float *weight, int size)
RMS normalization.
Definition TinyLLMInference.cpp:11

tinyllm_forward
float * tinyllm_forward(float *x, float *xb, float *xb2, float *hb, float *hb2, float *q, float *key_cache, float *value_cache, float *att, float *logits, float *token_emb, float *rms_att, float *wq, float *wk, float *wv, float *wo, float *rms_ffn, float *w1, float *w2, float *w3, float *rms_final, float *wcls, const TinyLLMConfig *config, int token, int pos)
Single-token forward pass through the transformer.
Definition TinyLLMInference.cpp:57

tinyllm_calc_weights_size
size_t tinyllm_calc_weights_size(const TinyLLMConfig *config)
Calculate the size of model weights.
Definition TinyLLMInference.cpp:204

tinyllm_calc_runstate_size
size_t tinyllm_calc_runstate_size(const TinyLLMConfig *config, int max_seq_len)
Calculate the size needed for RunState buffers.
Definition TinyLLMInference.cpp:180

TinyLLMConfig
Configuration for the transformer model.
Definition TinyLLMAsset.h:27