Spaces:

cuilabs
/

bee

Running

File size: 5,012 Bytes

db82745

"""Bee Autonomous Invention — Run the invention engine to discover novel algorithms.

This is the MAIN EVIDENCE script. It will:
  1. Use a small LLM (SmolLM2-135M) as the 'inventor brain' to generate candidate code
  2. Sandbox-execute each candidate against objective metrics
  3. Evolve the population via tournament selection
  4. Output the winning inventions with PROVABLE metrics

Run:
    python scripts/invent.py --generations 3 --population 4 --device mps
"""

import argparse
import json
import logging
import os
import sys
import time
from pathlib import Path

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from bee.invention_engine import InventionEngine

logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(name)s | %(message)s")
logger = logging.getLogger("bee.invent")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--brain", type=str, default="HuggingFaceTB/SmolLM2-135M",
                        help="LLM used to generate candidate inventions")
    parser.add_argument("--device", type=str, default="mps" if torch.backends.mps.is_available() else "cpu")
    parser.add_argument("--generations", type=int, default=3)
    parser.add_argument("--population", type=int, default=4)
    parser.add_argument("--output_dir", type=str, default="./inventions")
    parser.add_argument("--module", type=str, default="all",
                        choices=["all", "attention", "compression", "state_space", "memory"])
    args = parser.parse_args()

    os.makedirs(args.output_dir, exist_ok=True)

    logger.info("Loading inventor brain: %s", args.brain)
    brain = AutoModelForCausalLM.from_pretrained(args.brain, trust_remote_code=True).to(args.device).eval()
    tokenizer = AutoTokenizer.from_pretrained(args.brain, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    def model_generate_fn(prompt: str, max_new_tokens: int = 512) -> str:
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256).to(args.device)
        logger.info("  [Brain] Generating %d tokens...", max_new_tokens)
        t0 = time.time()
        with torch.no_grad():
            out = brain.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                do_sample=True,
                temperature=0.9,
                top_p=0.95,
                pad_token_id=tokenizer.pad_token_id,
            )
        logger.info("  [Brain] Generation done in %.1fs", time.time() - t0)
        return tokenizer.decode(out[0], skip_special_tokens=True)

    logger.info("Brain loaded. Starting autonomous invention engine...")
    logger.info("=" * 60)

    engine = InventionEngine(
        model_generate_fn=model_generate_fn,
        population_size=args.population,
        max_generations=args.generations,
    )

    modules = ["attention", "compression", "state_space", "memory"] if args.module == "all" else [args.module]
    all_results = {}

    for module_type in modules:
        logger.info("\n>>> INVENTING: %s", module_type.upper())
        logger.info("-" * 40)
        try:
            best = engine.evolve(module_type)
            all_results[module_type] = {
                "invention_id": best.invention_id,
                "generation": best.generation,
                "score": best.score,
                "metrics": best.metrics,
                "code_length": len(best.source_code),
                "code_preview": best.source_code[:500],
            }

            # Save winning invention code
            code_path = os.path.join(args.output_dir, f"{best.invention_id}.py")
            with open(code_path, "w") as f:
                f.write(f'"""Bee Autonomous Invention: {module_type}\n')
                f.write(f'Score: {best.score:.3f}\n')
                f.write(f'Metrics: {json.dumps(best.metrics, indent=2)}\n')
                f.write(f'Parent IDs: {best.parent_ids}\n')
                f.write(f'"""\n\n')
                f.write(best.source_code)
            logger.info("Saved winning invention to %s", code_path)

        except Exception as e:
            logger.error("Invention failed for %s: %s", module_type, e, exc_info=True)
            all_results[module_type] = {"error": str(e)}

    # Save summary
    summary_path = os.path.join(args.output_dir, "invention_summary.json")
    with open(summary_path, "w") as f:
        json.dump(all_results, f, indent=2)

    logger.info("\n" + "=" * 60)
    logger.info("INVENTION SUMMARY")
    logger.info("=" * 60)
    for module, result in all_results.items():
        if "error" in result:
            logger.info("%-15s | FAILED: %s", module, result["error"])
        else:
            logger.info("%-15s | Score: %.3f | %s", module, result["score"], result["metrics"])
    logger.info("Full results: %s", summary_path)


if __name__ == "__main__":
    main()