35 float* x,
float* xb,
float* xb2,
float* hb,
float* hb2,
36 float* q,
float* key_cache,
float* value_cache,
float* att,
float* logits,
37 float* token_emb,
float* rms_att,
float* wq,
float* wk,
float* wv,
float* wo,
38 float* rms_ffn,
float* w1,
float* w2,
float* w3,
float* rms_final,
float* wcls,
Asset type for ultra-tiny LLM models (llama2.c format).
void tinyllm_softmax(float *x, int size)
In-place softmax.
Definition TinyLLMInference.cpp:26
void tinyllm_matmul(float *xout, float *x, float *w, int n, int d)
Matrix-vector multiplication: W (d,n) @ x (n,) -> xout (d,)
Definition TinyLLMInference.cpp:46
void tinyllm_rmsnorm(float *o, float *x, float *weight, int size)
RMS normalization.
Definition TinyLLMInference.cpp:11
float * tinyllm_forward(float *x, float *xb, float *xb2, float *hb, float *hb2, float *q, float *key_cache, float *value_cache, float *att, float *logits, float *token_emb, float *rms_att, float *wq, float *wk, float *wv, float *wo, float *rms_ffn, float *w1, float *w2, float *w3, float *rms_final, float *wcls, const TinyLLMConfig *config, int token, int pos)
Single-token forward pass through the transformer.
Definition TinyLLMInference.cpp:57
size_t tinyllm_calc_weights_size(const TinyLLMConfig *config)
Calculate the size of model weights.
Definition TinyLLMInference.cpp:204
size_t tinyllm_calc_runstate_size(const TinyLLMConfig *config, int max_seq_len)
Calculate the size needed for RunState buffers.
Definition TinyLLMInference.cpp:180
Configuration for the transformer model.
Definition TinyLLMAsset.h:27