| """Bee Autonomous Invention — Run the invention engine to discover novel algorithms. |
| |
| This is the MAIN EVIDENCE script. It will: |
| 1. Use a small LLM (SmolLM2-135M) as the 'inventor brain' to generate candidate code |
| 2. Sandbox-execute each candidate against objective metrics |
| 3. Evolve the population via tournament selection |
| 4. Output the winning inventions with PROVABLE metrics |
| |
| Run: |
| python scripts/invent.py --generations 3 --population 4 --device mps |
| """ |
|
|
| import argparse |
| import json |
| import logging |
| import os |
| import sys |
| import time |
| from pathlib import Path |
|
|
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) |
| from bee.invention_engine import InventionEngine |
|
|
| logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(name)s | %(message)s") |
| logger = logging.getLogger("bee.invent") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--brain", type=str, default="HuggingFaceTB/SmolLM2-135M", |
| help="LLM used to generate candidate inventions") |
| parser.add_argument("--device", type=str, default="mps" if torch.backends.mps.is_available() else "cpu") |
| parser.add_argument("--generations", type=int, default=3) |
| parser.add_argument("--population", type=int, default=4) |
| parser.add_argument("--output_dir", type=str, default="./inventions") |
| parser.add_argument("--module", type=str, default="all", |
| choices=["all", "attention", "compression", "state_space", "memory"]) |
| args = parser.parse_args() |
|
|
| os.makedirs(args.output_dir, exist_ok=True) |
|
|
| logger.info("Loading inventor brain: %s", args.brain) |
| brain = AutoModelForCausalLM.from_pretrained(args.brain, trust_remote_code=True).to(args.device).eval() |
| tokenizer = AutoTokenizer.from_pretrained(args.brain, trust_remote_code=True) |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
|
|
| def model_generate_fn(prompt: str, max_new_tokens: int = 512) -> str: |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256).to(args.device) |
| logger.info(" [Brain] Generating %d tokens...", max_new_tokens) |
| t0 = time.time() |
| with torch.no_grad(): |
| out = brain.generate( |
| **inputs, |
| max_new_tokens=max_new_tokens, |
| do_sample=True, |
| temperature=0.9, |
| top_p=0.95, |
| pad_token_id=tokenizer.pad_token_id, |
| ) |
| logger.info(" [Brain] Generation done in %.1fs", time.time() - t0) |
| return tokenizer.decode(out[0], skip_special_tokens=True) |
|
|
| logger.info("Brain loaded. Starting autonomous invention engine...") |
| logger.info("=" * 60) |
|
|
| engine = InventionEngine( |
| model_generate_fn=model_generate_fn, |
| population_size=args.population, |
| max_generations=args.generations, |
| ) |
|
|
| modules = ["attention", "compression", "state_space", "memory"] if args.module == "all" else [args.module] |
| all_results = {} |
|
|
| for module_type in modules: |
| logger.info("\n>>> INVENTING: %s", module_type.upper()) |
| logger.info("-" * 40) |
| try: |
| best = engine.evolve(module_type) |
| all_results[module_type] = { |
| "invention_id": best.invention_id, |
| "generation": best.generation, |
| "score": best.score, |
| "metrics": best.metrics, |
| "code_length": len(best.source_code), |
| "code_preview": best.source_code[:500], |
| } |
|
|
| |
| code_path = os.path.join(args.output_dir, f"{best.invention_id}.py") |
| with open(code_path, "w") as f: |
| f.write(f'"""Bee Autonomous Invention: {module_type}\n') |
| f.write(f'Score: {best.score:.3f}\n') |
| f.write(f'Metrics: {json.dumps(best.metrics, indent=2)}\n') |
| f.write(f'Parent IDs: {best.parent_ids}\n') |
| f.write(f'"""\n\n') |
| f.write(best.source_code) |
| logger.info("Saved winning invention to %s", code_path) |
|
|
| except Exception as e: |
| logger.error("Invention failed for %s: %s", module_type, e, exc_info=True) |
| all_results[module_type] = {"error": str(e)} |
|
|
| |
| summary_path = os.path.join(args.output_dir, "invention_summary.json") |
| with open(summary_path, "w") as f: |
| json.dump(all_results, f, indent=2) |
|
|
| logger.info("\n" + "=" * 60) |
| logger.info("INVENTION SUMMARY") |
| logger.info("=" * 60) |
| for module, result in all_results.items(): |
| if "error" in result: |
| logger.info("%-15s | FAILED: %s", module, result["error"]) |
| else: |
| logger.info("%-15s | Score: %.3f | %s", module, result["score"], result["metrics"]) |
| logger.info("Full results: %s", summary_path) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|