deansmile123
/

oops

Model card Files Files and versions

oops / gemini_nearest_obj.py

deansmile123's picture

Upload folder using huggingface_hub

75f0bc0 verified 11 days ago

history blame contribute delete

1.7 kB

	# gemini_nearest_obstacle_name.py
	import os
	import argparse
	from PIL import Image
	from google import genai
	from google.genai import types

	SYSTEM_MESSAGE = (
	"You are a mobility assistant who analyzes the scene for safe navigation. "
	"Be concise and accurate."
	)

	QUESTION = (
	"Identify the nearest obstacle on the sidewalk or walkable path ahead. "
	"Output ONLY the object name. "
	"No punctuation, no explanation, no full sentences. "
	)

	# Your original script used: MODEL_ID = "gemini-3-pro-preview"
	DEFAULT_MODEL = "gemini-3-pro-preview"

	def ask_gemini_object_name(image_path: str, model_id: str):
	api_key = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY"
	if not api_key:
	raise RuntimeError("Missing GEMINI_API_KEY env var. Do: export GEMINI_API_KEY='...'\n")

	client = genai.Client(api_key=api_key)

	image = Image.open(image_path).convert("RGB")
	image.thumbnail((768, 768))

	contents = [
	SYSTEM_MESSAGE,
	image,
	QUESTION,
	]

	# Use low temperature for stable short labels
	resp = client.models.generate_content(
	model=model_id,
	contents=contents
	)
	# Clean up: keep first line, strip quotes/punctuation
	text = (resp.text or "")
	if not resp.text:
	print("Warning: Gemini response is empty.")
	return ""
	return text

	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--image", required=True, help="Path to input image")
	parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model id")
	args = parser.parse_args()

	name = ask_gemini_object_name(args.image, args.model)
	print(name)

	if __name__ == "__main__":
	main()