oops / gemini_nearest_obj.py
deansmile123's picture
Upload folder using huggingface_hub
75f0bc0 verified
# gemini_nearest_obstacle_name.py
import os
import argparse
from PIL import Image
from google import genai
from google.genai import types
SYSTEM_MESSAGE = (
"You are a mobility assistant who analyzes the scene for safe navigation. "
"Be concise and accurate."
)
QUESTION = (
"Identify the nearest obstacle on the sidewalk or walkable path ahead. "
"Output ONLY the object name. "
"No punctuation, no explanation, no full sentences. "
)
# Your original script used: MODEL_ID = "gemini-3-pro-preview"
DEFAULT_MODEL = "gemini-3-pro-preview"
def ask_gemini_object_name(image_path: str, model_id: str):
api_key = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY"
if not api_key:
raise RuntimeError("Missing GEMINI_API_KEY env var. Do: export GEMINI_API_KEY='...'\n")
client = genai.Client(api_key=api_key)
image = Image.open(image_path).convert("RGB")
image.thumbnail((768, 768))
contents = [
SYSTEM_MESSAGE,
image,
QUESTION,
]
# Use low temperature for stable short labels
resp = client.models.generate_content(
model=model_id,
contents=contents
)
# Clean up: keep first line, strip quotes/punctuation
text = (resp.text or "")
if not resp.text:
print("Warning: Gemini response is empty.")
return ""
return text
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--image", required=True, help="Path to input image")
parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model id")
args = parser.parse_args()
name = ask_gemini_object_name(args.image, args.model)
print(name)
if __name__ == "__main__":
main()