Spaces:

cuilabs
/

bee

Running

App Files Files Community

bee / scripts /invent.py

ceocxx

chore: deploy Bee API backend (bee/, Dockerfile, requirements)

db82745 verified 1 day ago

raw

history blame contribute delete

5.01 kB

	"""Bee Autonomous Invention — Run the invention engine to discover novel algorithms.

	This is the MAIN EVIDENCE script. It will:
	1. Use a small LLM (SmolLM2-135M) as the 'inventor brain' to generate candidate code
	2. Sandbox-execute each candidate against objective metrics
	3. Evolve the population via tournament selection
	4. Output the winning inventions with PROVABLE metrics

	Run:
	python scripts/invent.py --generations 3 --population 4 --device mps
	"""

	import argparse
	import json
	import logging
	import os
	import sys
	import time
	from pathlib import Path

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
	from bee.invention_engine import InventionEngine

	logging.basicConfig(level=logging.INFO, format="%(asctime)s \| %(levelname)s \| %(name)s \| %(message)s")
	logger = logging.getLogger("bee.invent")


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--brain", type=str, default="HuggingFaceTB/SmolLM2-135M",
	help="LLM used to generate candidate inventions")
	parser.add_argument("--device", type=str, default="mps" if torch.backends.mps.is_available() else "cpu")
	parser.add_argument("--generations", type=int, default=3)
	parser.add_argument("--population", type=int, default=4)
	parser.add_argument("--output_dir", type=str, default="./inventions")
	parser.add_argument("--module", type=str, default="all",
	choices=["all", "attention", "compression", "state_space", "memory"])
	args = parser.parse_args()

	os.makedirs(args.output_dir, exist_ok=True)

	logger.info("Loading inventor brain: %s", args.brain)
	brain = AutoModelForCausalLM.from_pretrained(args.brain, trust_remote_code=True).to(args.device).eval()
	tokenizer = AutoTokenizer.from_pretrained(args.brain, trust_remote_code=True)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	def model_generate_fn(prompt: str, max_new_tokens: int = 512) -> str:
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256).to(args.device)
	logger.info(" [Brain] Generating %d tokens...", max_new_tokens)
	t0 = time.time()
	with torch.no_grad():
	out = brain.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=0.9,
	top_p=0.95,
	pad_token_id=tokenizer.pad_token_id,
	)
	logger.info(" [Brain] Generation done in %.1fs", time.time() - t0)
	return tokenizer.decode(out[0], skip_special_tokens=True)

	logger.info("Brain loaded. Starting autonomous invention engine...")
	logger.info("=" * 60)

	engine = InventionEngine(
	model_generate_fn=model_generate_fn,
	population_size=args.population,
	max_generations=args.generations,
	)

	modules = ["attention", "compression", "state_space", "memory"] if args.module == "all" else [args.module]
	all_results = {}

	for module_type in modules:
	logger.info("\n>>> INVENTING: %s", module_type.upper())
	logger.info("-" * 40)
	try:
	best = engine.evolve(module_type)
	all_results[module_type] = {
	"invention_id": best.invention_id,
	"generation": best.generation,
	"score": best.score,
	"metrics": best.metrics,
	"code_length": len(best.source_code),
	"code_preview": best.source_code[:500],
	}

	# Save winning invention code
	code_path = os.path.join(args.output_dir, f"{best.invention_id}.py")
	with open(code_path, "w") as f:
	f.write(f'"""Bee Autonomous Invention: {module_type}\n')
	f.write(f'Score: {best.score:.3f}\n')
	f.write(f'Metrics: {json.dumps(best.metrics, indent=2)}\n')
	f.write(f'Parent IDs: {best.parent_ids}\n')
	f.write(f'"""\n\n')
	f.write(best.source_code)
	logger.info("Saved winning invention to %s", code_path)

	except Exception as e:
	logger.error("Invention failed for %s: %s", module_type, e, exc_info=True)
	all_results[module_type] = {"error": str(e)}

	# Save summary
	summary_path = os.path.join(args.output_dir, "invention_summary.json")
	with open(summary_path, "w") as f:
	json.dump(all_results, f, indent=2)

	logger.info("\n" + "=" * 60)
	logger.info("INVENTION SUMMARY")
	logger.info("=" * 60)
	for module, result in all_results.items():
	if "error" in result:
	logger.info("%-15s \| FAILED: %s", module, result["error"])
	else:
	logger.info("%-15s \| Score: %.3f \| %s", module, result["score"], result["metrics"])
	logger.info("Full results: %s", summary_path)


	if __name__ == "__main__":
	main()