walidsobhie-code

refactor: Squeeze folders further - cleaner structure

65888d5 22 days ago

893 Bytes

	# Stack 2.9 Configuration
	# This file provides additional configuration options

	server:
	host: "0.0.0.0"
	port: 8000
	workers: 1

	model:
	# Default model - can be overridden by MODEL_ID env var
	id: "TheBloke/Llama-2-7B-Chat-AWQ"
	cache_dir: "/home/vllm/.cache/huggingface"
	trust_remote_code: true

	# vLLM engine configuration
	vllm:
	tensor_parallel_size: 1
	gpu_memory_utilization: 0.9
	max_model_len: 4096
	max_num_seqs: 64
	max_num_batched_tokens: 4096
	quantization: "awq"
	enforce_eager: false
	disable_log_stats: false

	# Performance tuning
	performance:
	# Thread configuration
	omp_num_threads: 4
	# CUDA settings
	cuda_launch_blocking: 0
	cudnn_loginfo_dbg: 1

	# CORS (if needed for web UI)
	cors:
	enabled: false
	allow_origins: ["*"]
	allow_methods: ["*"]
	allow_headers: ["*"]

	# Logging
	logging:
	level: "INFO"
	format: "json"
	include_timestamps: true