| | import ctypes |
| | import enum |
| | import os |
| |
|
| | |
| | CPU0 = (1 << 0) |
| | CPU1 = (1 << 1) |
| | CPU2 = (1 << 2) |
| | CPU3 = (1 << 3) |
| | CPU4 = (1 << 4) |
| | CPU5 = (1 << 5) |
| | CPU6 = (1 << 6) |
| | CPU7 = (1 << 7) |
| |
|
| | |
| | class LLMCallState(enum.IntEnum): |
| | RKLLM_RUN_NORMAL = 0 |
| | RKLLM_RUN_WAITING = 1 |
| | RKLLM_RUN_FINISH = 2 |
| | RKLLM_RUN_ERROR = 3 |
| |
|
| | class RKLLMInputType(enum.IntEnum): |
| | RKLLM_INPUT_PROMPT = 0 |
| | RKLLM_INPUT_TOKEN = 1 |
| | RKLLM_INPUT_EMBED = 2 |
| | RKLLM_INPUT_MULTIMODAL = 3 |
| |
|
| | class RKLLMInferMode(enum.IntEnum): |
| | RKLLM_INFER_GENERATE = 0 |
| | RKLLM_INFER_GET_LAST_HIDDEN_LAYER = 1 |
| | RKLLM_INFER_GET_LOGITS = 2 |
| |
|
| | |
| | class RKLLMExtendParam(ctypes.Structure): |
| | |
| | base_domain_id: ctypes.c_int32 |
| | |
| | embed_flash: ctypes.c_int8 |
| | |
| | enabled_cpus_num: ctypes.c_int8 |
| | |
| | enabled_cpus_mask: ctypes.c_uint32 |
| | reserved: ctypes.c_uint8 * 106 |
| |
|
| | _fields_ = [ |
| | ("base_domain_id", ctypes.c_int32), |
| | ("embed_flash", ctypes.c_int8), |
| | ("enabled_cpus_num", ctypes.c_int8), |
| | ("enabled_cpus_mask", ctypes.c_uint32), |
| | ("reserved", ctypes.c_uint8 * 106) |
| | ] |
| |
|
| | class RKLLMParam(ctypes.Structure): |
| | |
| | model_path: ctypes.c_char_p |
| | |
| | max_context_len: ctypes.c_int32 |
| | |
| | max_new_tokens: ctypes.c_int32 |
| | |
| | top_k: ctypes.c_int32 |
| | |
| | n_keep: ctypes.c_int32 |
| | |
| | top_p: ctypes.c_float |
| | |
| | temperature: ctypes.c_float |
| | |
| | repeat_penalty: ctypes.c_float |
| | |
| | frequency_penalty: ctypes.c_float |
| | |
| | presence_penalty: ctypes.c_float |
| | |
| | mirostat: ctypes.c_int32 |
| | |
| | mirostat_tau: ctypes.c_float |
| | |
| | mirostat_eta: ctypes.c_float |
| | |
| | skip_special_token: ctypes.c_bool |
| | |
| | is_async: ctypes.c_bool |
| | |
| | img_start: ctypes.c_char_p |
| | |
| | img_end: ctypes.c_char_p |
| | |
| | img_content: ctypes.c_char_p |
| | |
| | extend_param: RKLLMExtendParam |
| |
|
| | _fields_ = [ |
| | ("model_path", ctypes.c_char_p), |
| | ("max_context_len", ctypes.c_int32), |
| | ("max_new_tokens", ctypes.c_int32), |
| | ("top_k", ctypes.c_int32), |
| | ("n_keep", ctypes.c_int32), |
| | ("top_p", ctypes.c_float), |
| | ("temperature", ctypes.c_float), |
| | ("repeat_penalty", ctypes.c_float), |
| | ("frequency_penalty", ctypes.c_float), |
| | ("presence_penalty", ctypes.c_float), |
| | ("mirostat", ctypes.c_int32), |
| | ("mirostat_tau", ctypes.c_float), |
| | ("mirostat_eta", ctypes.c_float), |
| | ("skip_special_token", ctypes.c_bool), |
| | ("is_async", ctypes.c_bool), |
| | ("img_start", ctypes.c_char_p), |
| | ("img_end", ctypes.c_char_p), |
| | ("img_content", ctypes.c_char_p), |
| | ("extend_param", RKLLMExtendParam) |
| | ] |
| |
|
| | class RKLLMLoraAdapter(ctypes.Structure): |
| | lora_adapter_path: ctypes.c_char_p |
| | lora_adapter_name: ctypes.c_char_p |
| | scale: ctypes.c_float |
| |
|
| | _fields_ = [ |
| | ("lora_adapter_path", ctypes.c_char_p), |
| | ("lora_adapter_name", ctypes.c_char_p), |
| | ("scale", ctypes.c_float) |
| | ] |
| |
|
| | class RKLLMEmbedInput(ctypes.Structure): |
| | |
| | embed: ctypes.POINTER(ctypes.c_float) |
| | n_tokens: ctypes.c_size_t |
| |
|
| | _fields_ = [ |
| | ("embed", ctypes.POINTER(ctypes.c_float)), |
| | ("n_tokens", ctypes.c_size_t) |
| | ] |
| |
|
| | class RKLLMTokenInput(ctypes.Structure): |
| | |
| | input_ids: ctypes.POINTER(ctypes.c_int32) |
| | n_tokens: ctypes.c_size_t |
| |
|
| | _fields_ = [ |
| | ("input_ids", ctypes.POINTER(ctypes.c_int32)), |
| | ("n_tokens", ctypes.c_size_t) |
| | ] |
| |
|
| | class RKLLMMultiModelInput(ctypes.Structure): |
| | prompt: ctypes.c_char_p |
| | image_embed: ctypes.POINTER(ctypes.c_float) |
| | n_image_tokens: ctypes.c_size_t |
| | n_image: ctypes.c_size_t |
| | image_width: ctypes.c_size_t |
| | image_height: ctypes.c_size_t |
| |
|
| | _fields_ = [ |
| | ("prompt", ctypes.c_char_p), |
| | ("image_embed", ctypes.POINTER(ctypes.c_float)), |
| | ("n_image_tokens", ctypes.c_size_t), |
| | ("n_image", ctypes.c_size_t), |
| | ("image_width", ctypes.c_size_t), |
| | ("image_height", ctypes.c_size_t) |
| | ] |
| |
|
| | class _RKLLMInputUnion(ctypes.Union): |
| | prompt_input: ctypes.c_char_p |
| | embed_input: RKLLMEmbedInput |
| | token_input: RKLLMTokenInput |
| | multimodal_input: RKLLMMultiModelInput |
| |
|
| | _fields_ = [ |
| | ("prompt_input", ctypes.c_char_p), |
| | ("embed_input", RKLLMEmbedInput), |
| | ("token_input", RKLLMTokenInput), |
| | ("multimodal_input", RKLLMMultiModelInput) |
| | ] |
| |
|
| | class RKLLMInput(ctypes.Structure): |
| | input_type: ctypes.c_int |
| | _union_data: _RKLLMInputUnion |
| |
|
| | _fields_ = [ |
| | ("input_type", ctypes.c_int), |
| | ("_union_data", _RKLLMInputUnion) |
| | ] |
| | |
| | @property |
| | def prompt_input(self) -> bytes: |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_PROMPT: |
| | return self._union_data.prompt_input |
| | raise AttributeError("Not a prompt input") |
| | @prompt_input.setter |
| | def prompt_input(self, value: bytes): |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_PROMPT: |
| | self._union_data.prompt_input = value |
| | else: |
| | raise AttributeError("Not a prompt input") |
| | @property |
| | def embed_input(self) -> RKLLMEmbedInput: |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_EMBED: |
| | return self._union_data.embed_input |
| | raise AttributeError("Not an embed input") |
| | @embed_input.setter |
| | def embed_input(self, value: RKLLMEmbedInput): |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_EMBED: |
| | self._union_data.embed_input = value |
| | else: |
| | raise AttributeError("Not an embed input") |
| |
|
| | @property |
| | def token_input(self) -> RKLLMTokenInput: |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_TOKEN: |
| | return self._union_data.token_input |
| | raise AttributeError("Not a token input") |
| | @token_input.setter |
| | def token_input(self, value: RKLLMTokenInput): |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_TOKEN: |
| | self._union_data.token_input = value |
| | else: |
| | raise AttributeError("Not a token input") |
| |
|
| | @property |
| | def multimodal_input(self) -> RKLLMMultiModelInput: |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_MULTIMODAL: |
| | return self._union_data.multimodal_input |
| | raise AttributeError("Not a multimodal input") |
| | @multimodal_input.setter |
| | def multimodal_input(self, value: RKLLMMultiModelInput): |
| | if self.input_type == RKLLMInputType.RKLLM_INPUT_MULTIMODAL: |
| | self._union_data.multimodal_input = value |
| | else: |
| | raise AttributeError("Not a multimodal input") |
| |
|
| | class RKLLMLoraParam(ctypes.Structure): |
| | lora_adapter_name: ctypes.c_char_p |
| |
|
| | _fields_ = [ |
| | ("lora_adapter_name", ctypes.c_char_p) |
| | ] |
| |
|
| | class RKLLMPromptCacheParam(ctypes.Structure): |
| | save_prompt_cache: ctypes.c_int |
| | prompt_cache_path: ctypes.c_char_p |
| |
|
| | _fields_ = [ |
| | ("save_prompt_cache", ctypes.c_int), |
| | ("prompt_cache_path", ctypes.c_char_p) |
| | ] |
| |
|
| | class RKLLMInferParam(ctypes.Structure): |
| | mode: ctypes.c_int |
| | lora_params: ctypes.POINTER(RKLLMLoraParam) |
| | prompt_cache_params: ctypes.POINTER(RKLLMPromptCacheParam) |
| | keep_history: ctypes.c_int |
| |
|
| | _fields_ = [ |
| | ("mode", ctypes.c_int), |
| | ("lora_params", ctypes.POINTER(RKLLMLoraParam)), |
| | ("prompt_cache_params", ctypes.POINTER(RKLLMPromptCacheParam)), |
| | ("keep_history", ctypes.c_int) |
| | ] |
| |
|
| | class RKLLMResultLastHiddenLayer(ctypes.Structure): |
| | |
| | hidden_states: ctypes.POINTER(ctypes.c_float) |
| | |
| | embd_size: ctypes.c_int |
| | |
| | num_tokens: ctypes.c_int |
| |
|
| | _fields_ = [ |
| | ("hidden_states", ctypes.POINTER(ctypes.c_float)), |
| | ("embd_size", ctypes.c_int), |
| | ("num_tokens", ctypes.c_int) |
| | ] |
| |
|
| | class RKLLMResultLogits(ctypes.Structure): |
| | |
| | logits: ctypes.POINTER(ctypes.c_float) |
| | |
| | vocab_size: ctypes.c_int |
| | |
| | num_tokens: ctypes.c_int |
| |
|
| | _fields_ = [ |
| | ("logits", ctypes.POINTER(ctypes.c_float)), |
| | ("vocab_size", ctypes.c_int), |
| | ("num_tokens", ctypes.c_int) |
| | ] |
| |
|
| | class RKLLMResult(ctypes.Structure): |
| | text: ctypes.c_char_p |
| | token_id: ctypes.c_int32 |
| | last_hidden_layer: RKLLMResultLastHiddenLayer |
| | logits: RKLLMResultLogits |
| |
|
| | _fields_ = [ |
| | ("text", ctypes.c_char_p), |
| | ("token_id", ctypes.c_int32), |
| | ("last_hidden_layer", RKLLMResultLastHiddenLayer), |
| | ("logits", RKLLMResultLogits) |
| | ] |
| |
|
| | |
| | LLMHandle = ctypes.c_void_p |
| |
|
| | |
| | LLMResultCallback = ctypes.CFUNCTYPE( |
| | None, |
| | ctypes.POINTER(RKLLMResult), |
| | ctypes.c_void_p, |
| | ctypes.c_int |
| | ) |
| |
|
| |
|
| | class RKLLMRuntime: |
| | def __init__(self, library_path="./librkllmrt.so"): |
| | try: |
| | self.lib = ctypes.CDLL(library_path) |
| | except OSError as e: |
| | raise OSError(f"Failed to load RKLLM library from {library_path}. " |
| | f"Ensure it's in your LD_LIBRARY_PATH or provide the full path. Error: {e}") |
| | self._setup_functions() |
| | self.llm_handle = LLMHandle() |
| | self._c_callback = None |
| |
|
| | def _setup_functions(self): |
| | |
| | self.lib.rkllm_createDefaultParam.restype = RKLLMParam |
| | self.lib.rkllm_createDefaultParam.argtypes = [] |
| |
|
| | |
| | self.lib.rkllm_init.restype = ctypes.c_int |
| | self.lib.rkllm_init.argtypes = [ |
| | ctypes.POINTER(LLMHandle), |
| | ctypes.POINTER(RKLLMParam), |
| | LLMResultCallback |
| | ] |
| |
|
| | |
| | self.lib.rkllm_load_lora.restype = ctypes.c_int |
| | self.lib.rkllm_load_lora.argtypes = [LLMHandle, ctypes.POINTER(RKLLMLoraAdapter)] |
| |
|
| | |
| | self.lib.rkllm_load_prompt_cache.restype = ctypes.c_int |
| | self.lib.rkllm_load_prompt_cache.argtypes = [LLMHandle, ctypes.c_char_p] |
| |
|
| | |
| | self.lib.rkllm_release_prompt_cache.restype = ctypes.c_int |
| | self.lib.rkllm_release_prompt_cache.argtypes = [LLMHandle] |
| |
|
| | |
| | self.lib.rkllm_destroy.restype = ctypes.c_int |
| | self.lib.rkllm_destroy.argtypes = [LLMHandle] |
| |
|
| | |
| | self.lib.rkllm_run.restype = ctypes.c_int |
| | self.lib.rkllm_run.argtypes = [ |
| | LLMHandle, |
| | ctypes.POINTER(RKLLMInput), |
| | ctypes.POINTER(RKLLMInferParam), |
| | ctypes.c_void_p |
| | ] |
| |
|
| | |
| | |
| | self.lib.rkllm_run_async.restype = ctypes.c_int |
| | self.lib.rkllm_run_async.argtypes = [ |
| | LLMHandle, |
| | ctypes.POINTER(RKLLMInput), |
| | ctypes.POINTER(RKLLMInferParam), |
| | ctypes.c_void_p |
| | ] |
| |
|
| | |
| | self.lib.rkllm_abort.restype = ctypes.c_int |
| | self.lib.rkllm_abort.argtypes = [LLMHandle] |
| |
|
| | |
| | self.lib.rkllm_is_running.restype = ctypes.c_int |
| | self.lib.rkllm_is_running.argtypes = [LLMHandle] |
| |
|
| | |
| | self.lib.rkllm_clear_kv_cache.restype = ctypes.c_int |
| | self.lib.rkllm_clear_kv_cache.argtypes = [LLMHandle, ctypes.c_int] |
| |
|
| | |
| | self.lib.rkllm_set_chat_template.restype = ctypes.c_int |
| | self.lib.rkllm_set_chat_template.argtypes = [ |
| | LLMHandle, |
| | ctypes.c_char_p, |
| | ctypes.c_char_p, |
| | ctypes.c_char_p |
| | ] |
| |
|
| | def create_default_param(self) -> RKLLMParam: |
| | """Creates a default RKLLMParam structure.""" |
| | return self.lib.rkllm_createDefaultParam() |
| |
|
| | def init(self, param: RKLLMParam, callback_func) -> int: |
| | """ |
| | Initializes the LLM. |
| | :param param: RKLLMParam structure. |
| | :param callback_func: A Python function that matches the signature: |
| | def my_callback(result_ptr, userdata_ptr, state_enum): |
| | result = result_ptr.contents # RKLLMResult |
| | # Process result |
| | # userdata can be retrieved if passed during run, or ignored |
| | # state = LLMCallState(state_enum) |
| | :return: 0 for success, non-zero for failure. |
| | """ |
| | if not callable(callback_func): |
| | raise ValueError("callback_func must be a callable Python function.") |
| |
|
| | |
| | self._c_callback = LLMResultCallback(callback_func) |
| | |
| | ret = self.lib.rkllm_init(ctypes.byref(self.llm_handle), ctypes.byref(param), self._c_callback) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_init failed with error code {ret}") |
| | return ret |
| |
|
| | def load_lora(self, lora_adapter: RKLLMLoraAdapter) -> int: |
| | """Loads a Lora adapter.""" |
| | ret = self.lib.rkllm_load_lora(self.llm_handle, ctypes.byref(lora_adapter)) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_load_lora failed with error code {ret}") |
| | return ret |
| |
|
| | def load_prompt_cache(self, prompt_cache_path: str) -> int: |
| | """Loads a prompt cache from a file.""" |
| | c_path = prompt_cache_path.encode('utf-8') |
| | ret = self.lib.rkllm_load_prompt_cache(self.llm_handle, c_path) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_load_prompt_cache failed for {prompt_cache_path} with error code {ret}") |
| | return ret |
| |
|
| | def release_prompt_cache(self) -> int: |
| | """Releases the prompt cache from memory.""" |
| | ret = self.lib.rkllm_release_prompt_cache(self.llm_handle) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_release_prompt_cache failed with error code {ret}") |
| | return ret |
| |
|
| | def destroy(self) -> int: |
| | """Destroys the LLM instance and releases resources.""" |
| | if self.llm_handle and self.llm_handle.value: |
| | ret = self.lib.rkllm_destroy(self.llm_handle) |
| | self.llm_handle = LLMHandle() |
| | if ret != 0: |
| | |
| | print(f"Warning: rkllm_destroy failed with error code {ret}") |
| | return ret |
| | return 0 |
| |
|
| | def run(self, rkllm_input: RKLLMInput, rkllm_infer_params: RKLLMInferParam, userdata=None) -> int: |
| | """Runs an LLM inference task synchronously.""" |
| | |
| | |
| | if userdata is not None: |
| | |
| | self._userdata_ref = userdata |
| | c_userdata = ctypes.cast(ctypes.pointer(ctypes.py_object(userdata)), ctypes.c_void_p) |
| | else: |
| | c_userdata = None |
| | ret = self.lib.rkllm_run(self.llm_handle, ctypes.byref(rkllm_input), ctypes.byref(rkllm_infer_params), c_userdata) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_run failed with error code {ret}") |
| | return ret |
| |
|
| | def run_async(self, rkllm_input: RKLLMInput, rkllm_infer_params: RKLLMInferParam, userdata=None) -> int: |
| | """Runs an LLM inference task asynchronously.""" |
| | if userdata is not None: |
| | |
| | self._userdata_ref = userdata |
| | c_userdata = ctypes.cast(ctypes.pointer(ctypes.py_object(userdata)), ctypes.c_void_p) |
| | else: |
| | c_userdata = None |
| | ret = self.lib.rkllm_run_async(self.llm_handle, ctypes.byref(rkllm_input), ctypes.byref(rkllm_infer_params), c_userdata) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_run_async failed with error code {ret}") |
| | return ret |
| |
|
| | def abort(self) -> int: |
| | """Aborts an ongoing LLM task.""" |
| | ret = self.lib.rkllm_abort(self.llm_handle) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_abort failed with error code {ret}") |
| | return ret |
| |
|
| | def is_running(self) -> bool: |
| | """Checks if an LLM task is currently running. Returns True if running.""" |
| | |
| | |
| | return self.lib.rkllm_is_running(self.llm_handle) == 0 |
| |
|
| | def clear_kv_cache(self, keep_system_prompt: bool) -> int: |
| | """Clears the key-value cache.""" |
| | ret = self.lib.rkllm_clear_kv_cache(self.llm_handle, ctypes.c_int(1 if keep_system_prompt else 0)) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_clear_kv_cache failed with error code {ret}") |
| | return ret |
| |
|
| | def set_chat_template(self, system_prompt: str, prompt_prefix: str, prompt_postfix: str) -> int: |
| | """Sets the chat template for the LLM.""" |
| | c_system = system_prompt.encode('utf-8') if system_prompt else b"" |
| | c_prefix = prompt_prefix.encode('utf-8') if prompt_prefix else b"" |
| | c_postfix = prompt_postfix.encode('utf-8') if prompt_postfix else b"" |
| | |
| | ret = self.lib.rkllm_set_chat_template(self.llm_handle, c_system, c_prefix, c_postfix) |
| | if ret != 0: |
| | raise RuntimeError(f"rkllm_set_chat_template failed with error code {ret}") |
| | return ret |
| |
|
| | def __enter__(self): |
| | return self |
| |
|
| | def __exit__(self, exc_type, exc_val, exc_tb): |
| | self.destroy() |
| |
|
| | def __del__(self): |
| | self.destroy() |
| |
|
| | |
| | if __name__ == "__main__": |
| | |
| | |
| |
|
| | |
| | results_buffer = [] |
| |
|
| | def my_python_callback(result_ptr, userdata_ptr, state_enum): |
| | """ |
| | Callback function to be called by the C library. |
| | """ |
| | global results_buffer |
| | state = LLMCallState(state_enum) |
| | result = result_ptr.contents |
| |
|
| | current_text = "" |
| | if result.text: |
| | current_text = result.text.decode('utf-8', errors='ignore') |
| | |
| | print(f"Callback: State={state.name}, TokenID={result.token_id}, Text='{current_text}'") |
| | results_buffer.append(current_text) |
| |
|
| | if state == LLMCallState.RKLLM_RUN_FINISH: |
| | print("Inference finished.") |
| | elif state == LLMCallState.RKLLM_RUN_ERROR: |
| | print("Inference error.") |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| | try: |
| | print("Initializing RKLLMRuntime...") |
| | |
| | |
| | rk_llm = RKLLMRuntime() |
| |
|
| | print("Creating default parameters...") |
| | params = rk_llm.create_default_param() |
| |
|
| | |
| | |
| | |
| | |
| | model_file = "dummy_model.rkllm" |
| | if not os.path.exists(model_file): |
| | print(f"Warning: Model file '{model_file}' does not exist. Init will likely fail.") |
| | |
| | |
| | with open(model_file, "w") as f: |
| | f.write("dummy content") |
| |
|
| | params.model_path = model_file.encode('utf-8') |
| | params.max_context_len = 512 |
| | params.max_new_tokens = 128 |
| | params.top_k = 1 |
| | params.temperature = 0.7 |
| | params.repeat_penalty = 1.1 |
| | |
| |
|
| | print(f"Initializing LLM with model: {params.model_path.decode()}...") |
| | |
| | try: |
| | rk_llm.init(params, my_python_callback) |
| | print("LLM Initialized.") |
| | except RuntimeError as e: |
| | print(f"Error during LLM initialization: {e}") |
| | print("This is expected if 'dummy_model.rkllm' is not a valid model.") |
| | print("Replace 'dummy_model.rkllm' with a real model path to test further.") |
| | exit() |
| |
|
| |
|
| | |
| | print("Preparing input...") |
| | rk_input = RKLLMInput() |
| | rk_input.input_type = RKLLMInputType.RKLLM_INPUT_PROMPT |
| | |
| | prompt_text = "Translate the following English text to French: 'Hello, world!'" |
| | c_prompt = prompt_text.encode('utf-8') |
| | rk_input._union_data.prompt_input = c_prompt |
| |
|
| | |
| | print("Preparing inference parameters...") |
| | infer_params = RKLLMInferParam() |
| | infer_params.mode = RKLLMInferMode.RKLLM_INFER_GENERATE |
| | infer_params.keep_history = 1 |
| | |
| | |
| |
|
| | |
| | print(f"Running inference with prompt: '{prompt_text}'") |
| | results_buffer.clear() |
| | try: |
| | rk_llm.run(rk_input, infer_params) |
| | print("\n--- Full Response ---") |
| | print("".join(results_buffer)) |
| | print("---------------------\n") |
| | except RuntimeError as e: |
| | print(f"Error during LLM run: {e}") |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | except OSError as e: |
| | print(f"OSError: {e}. Could not load the RKLLM library.") |
| | print("Please ensure 'librkllmrt.so' is in your LD_LIBRARY_PATH or provide the full path.") |
| | except Exception as e: |
| | print(f"An unexpected error occurred: {e}") |
| | finally: |
| | if 'rk_llm' in locals() and rk_llm.llm_handle and rk_llm.llm_handle.value: |
| | print("Destroying LLM instance...") |
| | rk_llm.destroy() |
| | print("LLM instance destroyed.") |
| | if os.path.exists(model_file) and model_file == "dummy_model.rkllm": |
| | os.remove(model_file) |
| |
|
| | print("Example finished.") |