Instructions to use MebinThattil/tiny-llama-q4_0 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use MebinThattil/tiny-llama-q4_0 with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="MebinThattil/tiny-llama-q4_0", filename="tinyllama-1.1B-q4.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- llama.cpp
How to use MebinThattil/tiny-llama-q4_0 with llama.cpp:
Install from brew
brew install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf MebinThattil/tiny-llama-q4_0 # Run inference directly in the terminal: llama-cli -hf MebinThattil/tiny-llama-q4_0
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama-server -hf MebinThattil/tiny-llama-q4_0 # Run inference directly in the terminal: llama-cli -hf MebinThattil/tiny-llama-q4_0
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf MebinThattil/tiny-llama-q4_0 # Run inference directly in the terminal: ./llama-cli -hf MebinThattil/tiny-llama-q4_0
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf MebinThattil/tiny-llama-q4_0 # Run inference directly in the terminal: ./build/bin/llama-cli -hf MebinThattil/tiny-llama-q4_0
Use Docker
docker model run hf.co/MebinThattil/tiny-llama-q4_0
- LM Studio
- Jan
- Ollama
How to use MebinThattil/tiny-llama-q4_0 with Ollama:
ollama run hf.co/MebinThattil/tiny-llama-q4_0
- Unsloth Studio new
How to use MebinThattil/tiny-llama-q4_0 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for MebinThattil/tiny-llama-q4_0 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for MebinThattil/tiny-llama-q4_0 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for MebinThattil/tiny-llama-q4_0 to start chatting
- Docker Model Runner
How to use MebinThattil/tiny-llama-q4_0 with Docker Model Runner:
docker model run hf.co/MebinThattil/tiny-llama-q4_0
- Lemonade
How to use MebinThattil/tiny-llama-q4_0 with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull MebinThattil/tiny-llama-q4_0
Run and chat with the model
lemonade run user.tiny-llama-q4_0-{{QUANT_TAG}}List all available models
lemonade list
| from __future__ import annotations | |
| import os | |
| from ctypes import ( | |
| c_bool, | |
| c_char_p, | |
| c_int, | |
| c_uint8, | |
| c_float, | |
| c_void_p, | |
| POINTER, | |
| _Pointer, # type: ignore | |
| Structure, | |
| ) | |
| import pathlib | |
| from typing import ( | |
| Union, | |
| NewType, | |
| Optional, | |
| TYPE_CHECKING, | |
| ) | |
| import llama_cpp.llama_cpp as llama_cpp | |
| from llama_cpp._ctypes_extensions import ( | |
| load_shared_library, | |
| ctypes_function_for_shared_library, | |
| ) | |
| if TYPE_CHECKING: | |
| from llama_cpp._ctypes_extensions import ( | |
| CtypesArray, | |
| ) | |
| # Specify the base name of the shared library to load | |
| _libllava_base_name = "llava" | |
| _libllava_override_path = os.environ.get("LLAVA_CPP_LIB") | |
| _libllava_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" if _libllava_override_path is None else pathlib.Path() | |
| # Load the library | |
| _libllava = load_shared_library(_libllava_base_name, _libllava_base_path) | |
| ctypes_function = ctypes_function_for_shared_library(_libllava) | |
| ################################################ | |
| # llava.h | |
| ################################################ | |
| # struct clip_ctx; | |
| clip_ctx_p = NewType("clip_ctx_p", int) | |
| clip_ctx_p_ctypes = c_void_p | |
| # struct llava_image_embed { | |
| # float * embed; | |
| # int n_image_pos; | |
| # }; | |
| class llava_image_embed(Structure): | |
| _fields_ = [ | |
| ("embed", POINTER(c_float)), | |
| ("n_image_pos", c_int), | |
| ] | |
| # /** sanity check for clip <-> llava embed size match */ | |
| # LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip); | |
| def llava_validate_embed_size( | |
| ctx_llama: llama_cpp.llama_context_p, ctx_clip: clip_ctx_p, / | |
| ) -> bool: | |
| ... | |
| # /** build an image embed from image file bytes */ | |
| # LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); | |
| def llava_image_embed_make_with_bytes( | |
| ctx_clip: clip_ctx_p, | |
| n_threads: Union[c_int, int], | |
| image_bytes: CtypesArray[c_uint8], | |
| image_bytes_length: Union[c_int, int], | |
| /, | |
| ) -> "_Pointer[llava_image_embed]": | |
| ... | |
| # /** build an image embed from a path to an image filename */ | |
| # LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path); | |
| def llava_image_embed_make_with_filename( | |
| ctx_clip: clip_ctx_p, n_threads: Union[c_int, int], image_path: bytes, / | |
| ) -> "_Pointer[llava_image_embed]": | |
| ... | |
| # LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed); | |
| # /** free an embedding made with llava_image_embed_make_* */ | |
| def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /): | |
| ... | |
| # /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */ | |
| # LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past); | |
| def llava_eval_image_embed( | |
| ctx_llama: llama_cpp.llama_context_p, | |
| embed: "_Pointer[llava_image_embed]", | |
| n_batch: Union[c_int, int], | |
| n_past: "_Pointer[c_int]", | |
| /, | |
| ) -> bool: | |
| ... | |
| ################################################ | |
| # clip.h | |
| ################################################ | |
| # /** load mmproj model */ | |
| # CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity); | |
| def clip_model_load( | |
| fname: bytes, verbosity: Union[c_int, int], / | |
| ) -> Optional[clip_ctx_p]: | |
| ... | |
| # /** free mmproj model */ | |
| # CLIP_API void clip_free(struct clip_ctx * ctx); | |
| def clip_free(ctx: clip_ctx_p, /): | |
| ... | |