Instructions to use MebinThattil/tiny-llama-q4_0 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use MebinThattil/tiny-llama-q4_0 with llama-cpp-python:

# !pip install llama-cpp-python

from llama_cpp import Llama

llm = Llama.from_pretrained(
	repo_id="MebinThattil/tiny-llama-q4_0",
	filename="tinyllama-1.1B-q4.gguf",
)

llm.create_chat_completion(
	messages = "No input example has been defined for this model task."
)

Notebooks
Google Colab
Kaggle
Local Apps

llama.cpp

How to use MebinThattil/tiny-llama-q4_0 with llama.cpp:

Install from brew

brew install llama.cpp
# Start a local OpenAI-compatible server with a web UI:
llama-server -hf MebinThattil/tiny-llama-q4_0
# Run inference directly in the terminal:
llama-cli -hf MebinThattil/tiny-llama-q4_0

Install from WinGet (Windows)

winget install llama.cpp
# Start a local OpenAI-compatible server with a web UI:
llama-server -hf MebinThattil/tiny-llama-q4_0
# Run inference directly in the terminal:
llama-cli -hf MebinThattil/tiny-llama-q4_0

Use pre-built binary

# Download pre-built binary from:
# https://github.com/ggerganov/llama.cpp/releases
# Start a local OpenAI-compatible server with a web UI:
./llama-server -hf MebinThattil/tiny-llama-q4_0
# Run inference directly in the terminal:
./llama-cli -hf MebinThattil/tiny-llama-q4_0

Build from source code

git clone https://github.com/ggerganov/llama.cpp.git
cd llama.cpp
cmake -B build
cmake --build build -j --target llama-server llama-cli
# Start a local OpenAI-compatible server with a web UI:
./build/bin/llama-server -hf MebinThattil/tiny-llama-q4_0
# Run inference directly in the terminal:
./build/bin/llama-cli -hf MebinThattil/tiny-llama-q4_0

Use Docker

docker model run hf.co/MebinThattil/tiny-llama-q4_0

LM Studio
Jan
Ollama
How to use MebinThattil/tiny-llama-q4_0 with Ollama:
```
ollama run hf.co/MebinThattil/tiny-llama-q4_0
```

Unsloth Studio new

How to use MebinThattil/tiny-llama-q4_0 with Unsloth Studio:

Install Unsloth Studio (macOS, Linux, WSL)

curl -fsSL https://unsloth.ai/install.sh | sh
# Run unsloth studio
unsloth studio -H 0.0.0.0 -p 8888
# Then open http://localhost:8888 in your browser
# Search for MebinThattil/tiny-llama-q4_0 to start chatting

Install Unsloth Studio (Windows)

irm https://unsloth.ai/install.ps1 | iex
# Run unsloth studio
unsloth studio -H 0.0.0.0 -p 8888
# Then open http://localhost:8888 in your browser
# Search for MebinThattil/tiny-llama-q4_0 to start chatting

Using HuggingFace Spaces for Unsloth

# No setup required
# Open https://huggingface.co/spaces/unsloth/studio in your browser
# Search for MebinThattil/tiny-llama-q4_0 to start chatting

Docker Model Runner
How to use MebinThattil/tiny-llama-q4_0 with Docker Model Runner:
```
docker model run hf.co/MebinThattil/tiny-llama-q4_0
```

Lemonade

How to use MebinThattil/tiny-llama-q4_0 with Lemonade:

Pull the model

# Download Lemonade from https://lemonade-server.ai/
lemonade pull MebinThattil/tiny-llama-q4_0

Run and chat with the model

lemonade run user.tiny-llama-q4_0-{{QUANT_TAG}}

List all available models

lemonade list

tiny-llama-q4_0 / llama_cpp /llava_cpp.py

MebinThattil

Upload folder using huggingface_hub

5d62acd verified 11 months ago

raw

history blame contribute delete

4.55 kB

	from __future__ import annotations

	import os
	from ctypes import (
	c_bool,
	c_char_p,
	c_int,
	c_uint8,
	c_float,
	c_void_p,
	POINTER,
	_Pointer, # type: ignore
	Structure,
	)
	import pathlib
	from typing import (
	Union,
	NewType,
	Optional,
	TYPE_CHECKING,
	)

	import llama_cpp.llama_cpp as llama_cpp

	from llama_cpp._ctypes_extensions import (
	load_shared_library,
	ctypes_function_for_shared_library,
	)

	if TYPE_CHECKING:
	from llama_cpp._ctypes_extensions import (
	CtypesArray,
	)


	# Specify the base name of the shared library to load
	_libllava_base_name = "llava"
	_libllava_override_path = os.environ.get("LLAVA_CPP_LIB")
	_libllava_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" if _libllava_override_path is None else pathlib.Path()

	# Load the library
	_libllava = load_shared_library(_libllava_base_name, _libllava_base_path)

	ctypes_function = ctypes_function_for_shared_library(_libllava)


	################################################
	# llava.h
	################################################

	# struct clip_ctx;
	clip_ctx_p = NewType("clip_ctx_p", int)
	clip_ctx_p_ctypes = c_void_p


	# struct llava_image_embed {
	# float * embed;
	# int n_image_pos;
	# };
	class llava_image_embed(Structure):
	_fields_ = [
	("embed", POINTER(c_float)),
	("n_image_pos", c_int),
	]


	# /** sanity check for clip <-> llava embed size match */
	# LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
	@ctypes_function(
	"llava_validate_embed_size",
	[llama_cpp.llama_context_p_ctypes, clip_ctx_p_ctypes],
	c_bool,
	)
	def llava_validate_embed_size(
	ctx_llama: llama_cpp.llama_context_p, ctx_clip: clip_ctx_p, /
	) -> bool:
	...


	# /** build an image embed from image file bytes */
	# LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
	@ctypes_function(
	"llava_image_embed_make_with_bytes",
	[clip_ctx_p_ctypes, c_int, POINTER(c_uint8), c_int],
	POINTER(llava_image_embed),
	)
	def llava_image_embed_make_with_bytes(
	ctx_clip: clip_ctx_p,
	n_threads: Union[c_int, int],
	image_bytes: CtypesArray[c_uint8],
	image_bytes_length: Union[c_int, int],
	/,
	) -> "_Pointer[llava_image_embed]":
	...


	# /** build an image embed from a path to an image filename */
	# LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
	@ctypes_function(
	"llava_image_embed_make_with_filename",
	[clip_ctx_p_ctypes, c_int, c_char_p],
	POINTER(llava_image_embed),
	)
	def llava_image_embed_make_with_filename(
	ctx_clip: clip_ctx_p, n_threads: Union[c_int, int], image_path: bytes, /
	) -> "_Pointer[llava_image_embed]":
	...


	# LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
	# /** free an embedding made with llava_image_embed_make_* */
	@ctypes_function("llava_image_embed_free", [POINTER(llava_image_embed)], None)
	def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /):
	...


	# /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */
	# LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);
	@ctypes_function(
	"llava_eval_image_embed",
	[
	llama_cpp.llama_context_p_ctypes,
	POINTER(llava_image_embed),
	c_int,
	POINTER(c_int),
	],
	c_bool,
	)
	def llava_eval_image_embed(
	ctx_llama: llama_cpp.llama_context_p,
	embed: "_Pointer[llava_image_embed]",
	n_batch: Union[c_int, int],
	n_past: "_Pointer[c_int]",
	/,
	) -> bool:
	...


	################################################
	# clip.h
	################################################


	# /** load mmproj model */
	# CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity);
	@ctypes_function("clip_model_load", [c_char_p, c_int], clip_ctx_p_ctypes)
	def clip_model_load(
	fname: bytes, verbosity: Union[c_int, int], /
	) -> Optional[clip_ctx_p]:
	...


	# /** free mmproj model */
	# CLIP_API void clip_free(struct clip_ctx * ctx);
	@ctypes_function("clip_free", [clip_ctx_p_ctypes], None)
	def clip_free(ctx: clip_ctx_p, /):
	...