| | #pragma once |
| |
|
| | #include "llama.h" |
| | #include "common.h" |
| |
|
| | struct common_speculative; |
| |
|
| | struct common_speculative_params { |
| | int n_draft = 16; |
| | int n_reuse = 256; |
| |
|
| | float p_min = 0.9f; |
| | }; |
| |
|
| | struct common_speculative * common_speculative_init(struct llama_context * ctx_dft); |
| |
|
| | void common_speculative_free(struct common_speculative * spec); |
| |
|
| | bool common_speculative_are_compatible( |
| | const struct llama_context * ctx_tgt, |
| | const struct llama_context * ctx_dft); |
| |
|
| | |
| | llama_tokens common_speculative_gen_draft( |
| | struct common_speculative * spec, |
| | struct common_speculative_params params, |
| | const llama_tokens & prompt, |
| | llama_token id_last); |
| |
|