| |
| import os |
| import argparse |
| from PIL import Image |
| from google import genai |
| from google.genai import types |
|
|
| SYSTEM_MESSAGE = ( |
| "You are a mobility assistant who analyzes the scene for safe navigation. " |
| "Be concise and accurate." |
| ) |
|
|
| QUESTION = ( |
| "Identify the nearest obstacle on the sidewalk or walkable path ahead. " |
| "Output ONLY the object name. " |
| "No punctuation, no explanation, no full sentences. " |
| ) |
|
|
| |
| DEFAULT_MODEL = "gemini-3-pro-preview" |
|
|
| def ask_gemini_object_name(image_path: str, model_id: str): |
| api_key = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY" |
| if not api_key: |
| raise RuntimeError("Missing GEMINI_API_KEY env var. Do: export GEMINI_API_KEY='...'\n") |
|
|
| client = genai.Client(api_key=api_key) |
|
|
| image = Image.open(image_path).convert("RGB") |
| image.thumbnail((768, 768)) |
|
|
| contents = [ |
| SYSTEM_MESSAGE, |
| image, |
| QUESTION, |
| ] |
|
|
| |
| resp = client.models.generate_content( |
| model=model_id, |
| contents=contents |
| ) |
| |
| text = (resp.text or "") |
| if not resp.text: |
| print("Warning: Gemini response is empty.") |
| return "" |
| return text |
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--image", required=True, help="Path to input image") |
| parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model id") |
| args = parser.parse_args() |
|
|
| name = ask_gemini_object_name(args.image, args.model) |
| print(name) |
|
|
| if __name__ == "__main__": |
| main() |
|
|