MSheng-Lee
/

code

Model card Files Files and versions

code / generate_input_mp /mingyu /2.py

MSheng-Lee's picture

Upload folder using huggingface_hub

f20b100 verified 10 months ago

history blame contribute delete

3.48 kB

	from openai import AzureOpenAI
	import json
	import time
	import pandas as pd

	client = AzureOpenAI(
	# https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
	api_version="2025-01-01-preview",
	api_key="M4hT7ULYSumBpJ3rREIyf0Xxd286HwKG",
	# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
	azure_endpoint="http://api.gameai-llm.woa.com/llm-service/azure/public",
	)

	local_assets = pd.read_excel("../assets/copy.xlsx", skiprows=2)
	captions = local_assets["caption_clip"].tolist()

	# Function to generate a room description
	def generate_room_descriptions(n=100, batch_size=50):
	descriptions = []

	prompt_template = """Generate {count} unique, simple and clear room descriptions.
	Each user_input should include both a description of the room and a brief scene setup.
	Provide 0 to 5 simple and common objects in the user_input. The objects can only be chosen from {captions}.
	Moreover, the length and width of the room are 5.0 meters, and the height is 3.0 meters.
	The description should not include spatial relationships (front, back, left, right, above, under, etc.) and should not mention doors or windows.
	The description should not include words like 'high-poly', 'material', 'high quality' or similar terms.
	The output format should be a JSON list where each item is formatted as:
	{{
	"user_input": "<room description>",
	}}

	Examples:
	[
	{{
	"user_input": "A cozy living room in a warm style with a brown fabric sofa, a brand new large screen TV with thin bezel and stand.",
	}},
	{{
	"user_input": "This is a kid bedroom. There is a single bed, a modern style minimalist dressing table with drawers, and a folding wooden dining chair.",
	}},
	{{
	"user_input": "Design me a room with a rectangular billiard table in entertainment area. The room should have a modern style.",
	}}
	...
	]

	Now generate {count} descriptions following this format.
	"""

	for i in range(0, n, batch_size):
	count = min(batch_size, n - i)
	prompt = prompt_template.format(count=count, captions=captions)

	try:
	response = client.chat.completions.create(
	model="gpt-4-1106-Preview",
	messages=[{"role": "system", "content": "You are a creative assistant generating structured room descriptions, with a focus on meeting human practical needs."},
	{"role": "user", "content": prompt}],
	temperature=0.7,
	seed=5
	)
	result = json.loads(response.choices[0].message.content.strip("```json\n").strip("```"))
	descriptions.extend(result)

	except Exception as e:
	print(f"Error at batch {i}: {e}")
	time.sleep(0.2) # Wait before retrying

	# Avoid rate limits
	time.sleep(0.1)
	print('--------------------------------')
	print(f"Generated {i+count} descriptions")
	print('--------------------------------')

	return descriptions

	import time
	start_time = time.time()
	# Generate 10,000 room descriptions
	room_data = generate_room_descriptions(n=400, batch_size=50)
	end_time = time.time()
	print(f"Time taken: {(end_time - start_time) / 60} minutes")

	# Save to file
	file_path = "2.json"
	with open(file_path, "w") as f:
	json.dump(room_data, f)

	print(f"Room descriptions saved to: {file_path}")