Polyphase Game Engine
Loading...
Searching...
No Matches
TinyLLMInference.h
Go to the documentation of this file.
1
6#pragma once
7
8#include "Assets/TinyLLMAsset.h" // For TinyLLMConfig
9#include <stdint.h>
10#include <stddef.h>
11
12#ifdef __cplusplus
13extern "C" {
14#endif
15
19void tinyllm_rmsnorm(float* o, float* x, float* weight, int size);
20
24void tinyllm_softmax(float* x, int size);
25
29void tinyllm_matmul(float* xout, float* x, float* w, int n, int d);
30
34float* tinyllm_forward(
35 float* x, float* xb, float* xb2, float* hb, float* hb2,
36 float* q, float* key_cache, float* value_cache, float* att, float* logits,
37 float* token_emb, float* rms_att, float* wq, float* wk, float* wv, float* wo,
38 float* rms_ffn, float* w1, float* w2, float* w3, float* rms_final, float* wcls,
39 const TinyLLMConfig* config,
40 int token, int pos
41);
42
46size_t tinyllm_calc_runstate_size(const TinyLLMConfig* config, int max_seq_len);
47
51size_t tinyllm_calc_weights_size(const TinyLLMConfig* config);
52
53#ifdef __cplusplus
54}
55#endif
Asset type for ultra-tiny LLM models (llama2.c format).
void tinyllm_softmax(float *x, int size)
In-place softmax.
Definition TinyLLMInference.cpp:26
void tinyllm_matmul(float *xout, float *x, float *w, int n, int d)
Matrix-vector multiplication: W (d,n) @ x (n,) -> xout (d,)
Definition TinyLLMInference.cpp:46
void tinyllm_rmsnorm(float *o, float *x, float *weight, int size)
RMS normalization.
Definition TinyLLMInference.cpp:11
float * tinyllm_forward(float *x, float *xb, float *xb2, float *hb, float *hb2, float *q, float *key_cache, float *value_cache, float *att, float *logits, float *token_emb, float *rms_att, float *wq, float *wk, float *wv, float *wo, float *rms_ffn, float *w1, float *w2, float *w3, float *rms_final, float *wcls, const TinyLLMConfig *config, int token, int pos)
Single-token forward pass through the transformer.
Definition TinyLLMInference.cpp:57
size_t tinyllm_calc_weights_size(const TinyLLMConfig *config)
Calculate the size of model weights.
Definition TinyLLMInference.cpp:204
size_t tinyllm_calc_runstate_size(const TinyLLMConfig *config, int max_seq_len)
Calculate the size needed for RunState buffers.
Definition TinyLLMInference.cpp:180
Configuration for the transformer model.
Definition TinyLLMAsset.h:27