# gemini_nearest_obstacle_name.py import os import argparse from PIL import Image from google import genai from google.genai import types SYSTEM_MESSAGE = ( "You are a mobility assistant who analyzes the scene for safe navigation. " "Be concise and accurate." ) QUESTION = ( "Identify the nearest obstacle on the sidewalk or walkable path ahead. " "Output ONLY the object name. " "No punctuation, no explanation, no full sentences. " ) # Your original script used: MODEL_ID = "gemini-3-pro-preview" DEFAULT_MODEL = "gemini-3-pro-preview" def ask_gemini_object_name(image_path: str, model_id: str): api_key = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY" if not api_key: raise RuntimeError("Missing GEMINI_API_KEY env var. Do: export GEMINI_API_KEY='...'\n") client = genai.Client(api_key=api_key) image = Image.open(image_path).convert("RGB") image.thumbnail((768, 768)) contents = [ SYSTEM_MESSAGE, image, QUESTION, ] # Use low temperature for stable short labels resp = client.models.generate_content( model=model_id, contents=contents ) # Clean up: keep first line, strip quotes/punctuation text = (resp.text or "") if not resp.text: print("Warning: Gemini response is empty.") return "" return text def main(): parser = argparse.ArgumentParser() parser.add_argument("--image", required=True, help="Path to input image") parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model id") args = parser.parse_args() name = ask_gemini_object_name(args.image, args.model) print(name) if __name__ == "__main__": main()