docker run --rm --gpus all \ -p8000:8000 \ -p8001:8001 \ -p8002:8002 \ -v $PWD/model_repo:/models \ nvcr.io/nvidia/tritonserver:25.04-py3 \ tritonserver --model-repository=/models