| | """ |
| | LLM-based question generation utilities. |
| | |
| | Supports multiple LLM providers for generating natural, lexically consistent questions. |
| | """ |
| |
|
| | import os |
| | import random |
| | from typing import Dict, List, Optional, Tuple |
| | import json |
| |
|
| | from .logger import setup_logger |
| |
|
| | logger = setup_logger(__name__) |
| |
|
| |
|
| | class LLMQuestionGenerator: |
| | """Generate questions using local Llama 3.1 8B Instruct LLM.""" |
| | |
| | def __init__( |
| | self, |
| | enabled: bool = False, |
| | template_questions: Optional[Dict] = None |
| | ): |
| | """ |
| | Initialize LLM question generator. |
| | |
| | Args: |
| | enabled: Whether LLM generation is enabled |
| | template_questions: Template questions for fallback |
| | """ |
| | self.enabled = enabled |
| | self.template_questions = template_questions or {} |
| | |
| | if not self.enabled: |
| | logger.info("LLM generation disabled, using templates") |
| | return |
| | |
| | |
| | |
| | logger.info("LLM generation enabled (local Llama 3.1 8B)") |
| | logger.warning("Local LLM integration not yet implemented, falling back to templates") |
| |
|
| | |
| | def generate_count_questions( |
| | self, |
| | correct_count: int, |
| | categories_present: List[str], |
| | generate_both: bool = True |
| | ) -> Dict: |
| | """ |
| | Generate count task questions. |
| | |
| | Args: |
| | correct_count: Correct number of unique sounds |
| | categories_present: List of sound categories in the audio |
| | generate_both: Whether to generate both MCQ and open-text |
| | |
| | Returns: |
| | Dictionary with mcq_question and/or open_text_question |
| | """ |
| | |
| | |
| | return self._generate_count_template(correct_count) |
| | |
| | def generate_category_questions( |
| | self, |
| | task_type: str, |
| | correct_category: str, |
| | categories_present: List[str], |
| | context: Optional[Dict] = None |
| | ) -> Dict: |
| | """ |
| | Generate questions where the answer is a sound category. |
| | |
| | Args: |
| | task_type: Type of task (duration, order, volume) |
| | correct_category: Correct answer category |
| | categories_present: All categories in the audio |
| | context: Additional context (e.g., question_type, reference_sound) |
| | |
| | Returns: |
| | Dictionary with mcq_question and open_text_question |
| | """ |
| | |
| | |
| | return self._generate_category_template(task_type, correct_category, context) |
| | |
| | def _generate_count_template(self, correct_count: int) -> Dict: |
| | """Generate count questions from templates.""" |
| | mcq_templates = self.template_questions.get("count", {}).get("mcq", [ |
| | "What is the number of distinct sound sources in the audio file?", |
| | "How many different types of sounds can be identified in this recording?" |
| | ]) |
| | open_templates = self.template_questions.get("count", {}).get("open_text", [ |
| | "How many distinct sound sources are present in the audio?", |
| | "Count the number of unique sounds in this recording." |
| | ]) |
| | |
| | return { |
| | "mcq_question": random.choice(mcq_templates), |
| | "open_text_question": random.choice(open_templates) |
| | } |
| | |
| | def _generate_category_template( |
| | self, |
| | task_type: str, |
| | correct_category: str, |
| | context: Optional[Dict] |
| | ) -> Dict: |
| | """Generate category questions from templates.""" |
| | context = context or {} |
| | |
| | if task_type == "duration": |
| | q_type = context.get("question_type", "shortest") |
| | mcq_q = f"Which of the following sounds is heard for the {q_type} duration?" |
| | open_q = f"Which sound is heard for the {q_type} duration in the audio?" |
| | |
| | elif task_type == "order": |
| | q_subtype = context.get("question_subtype", "first") |
| | if q_subtype == "first": |
| | mcq_q = "Which sound appears first in the audio clip?" |
| | open_q = "What is the first sound you hear in the audio?" |
| | elif q_subtype == "last": |
| | mcq_q = "Which sound appears last in the audio clip?" |
| | open_q = "What is the last sound you hear in the audio?" |
| | elif q_subtype == "after": |
| | ref = context.get("reference_sound", "") |
| | mcq_q = f"Which sound comes after {ref}?" |
| | open_q = f"What sound comes after {ref}?" |
| | else: |
| | ref = context.get("reference_sound", "") |
| | mcq_q = f"Which sound comes before {ref}?" |
| | open_q = f"What sound comes before {ref}?" |
| | |
| | else: |
| | q_type = context.get("question_type", "loudest") |
| | mcq_q = f"Which sound is the {q_type} in the audio?" |
| | open_q = f"Identify the {q_type} sound in the audio clip." |
| | |
| | return { |
| | "mcq_question": mcq_q, |
| | "open_text_question": open_q |
| | } |
| |
|