|
Polyphase Game Engine
|
Pure C inference kernels for ultra-tiny LLM (llama2.c style). More...
Go to the source code of this file.
Functions | |
| void | tinyllm_rmsnorm (float *o, float *x, float *weight, int size) |
| RMS normalization. | |
| void | tinyllm_softmax (float *x, int size) |
| In-place softmax. | |
| void | tinyllm_matmul (float *xout, float *x, float *w, int n, int d) |
| Matrix-vector multiplication: W (d,n) @ x (n,) -> xout (d,) | |
| float * | tinyllm_forward (float *x, float *xb, float *xb2, float *hb, float *hb2, float *q, float *key_cache, float *value_cache, float *att, float *logits, float *token_emb, float *rms_att, float *wq, float *wk, float *wv, float *wo, float *rms_ffn, float *w1, float *w2, float *w3, float *rms_final, float *wcls, const TinyLLMConfig *config, int token, int pos) |
| Single-token forward pass through the transformer. | |
| size_t | tinyllm_calc_runstate_size (const TinyLLMConfig *config, int max_seq_len) |
| Calculate the size needed for RunState buffers. | |
| size_t | tinyllm_calc_weights_size (const TinyLLMConfig *config) |
| Calculate the size of model weights. | |
Pure C inference kernels for ultra-tiny LLM (llama2.c style).
| size_t tinyllm_calc_runstate_size | ( | const TinyLLMConfig * | config, |
| int | max_seq_len | ||
| ) |
Calculate the size needed for RunState buffers.
| size_t tinyllm_calc_weights_size | ( | const TinyLLMConfig * | config | ) |
Calculate the size of model weights.
| float * tinyllm_forward | ( | float * | x, |
| float * | xb, | ||
| float * | xb2, | ||
| float * | hb, | ||
| float * | hb2, | ||
| float * | q, | ||
| float * | key_cache, | ||
| float * | value_cache, | ||
| float * | att, | ||
| float * | logits, | ||
| float * | token_emb, | ||
| float * | rms_att, | ||
| float * | wq, | ||
| float * | wk, | ||
| float * | wv, | ||
| float * | wo, | ||
| float * | rms_ffn, | ||
| float * | w1, | ||
| float * | w2, | ||
| float * | w3, | ||
| float * | rms_final, | ||
| float * | wcls, | ||
| const TinyLLMConfig * | config, | ||
| int | token, | ||
| int | pos | ||
| ) |
Single-token forward pass through the transformer.
| void tinyllm_matmul | ( | float * | xout, |
| float * | x, | ||
| float * | w, | ||
| int | n, | ||
| int | d | ||
| ) |
Matrix-vector multiplication: W (d,n) @ x (n,) -> xout (d,)
| void tinyllm_rmsnorm | ( | float * | o, |
| float * | x, | ||
| float * | weight, | ||
| int | size | ||
| ) |
RMS normalization.
| void tinyllm_softmax | ( | float * | x, |
| int | size | ||
| ) |
In-place softmax.