| | """ |
| | Constants for ACE-Step |
| | Centralized constants used across the codebase |
| | """ |
| |
|
| | |
| | |
| | |
| |
|
| | VALID_LANGUAGES = [ |
| | 'ar', 'az', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', |
| | 'es', 'fa', 'fi', 'fr', 'he', 'hi', 'hr', 'ht', 'hu', 'id', |
| | 'is', 'it', 'ja', 'ko', 'la', 'lt', 'ms', 'ne', 'nl', 'no', |
| | 'pa', 'pl', 'pt', 'ro', 'ru', 'sa', 'sk', 'sr', 'sv', 'sw', |
| | 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'yue', 'zh', |
| | 'unknown' |
| | ] |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | KEYSCALE_NOTES = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] |
| | KEYSCALE_ACCIDENTALS = ['', '#', 'b', '♯', '♭'] |
| | KEYSCALE_MODES = ['major', 'minor'] |
| |
|
| | |
| | VALID_KEYSCALES = set() |
| | for note in KEYSCALE_NOTES: |
| | for acc in KEYSCALE_ACCIDENTALS: |
| | for mode in KEYSCALE_MODES: |
| | VALID_KEYSCALES.add(f"{note}{acc} {mode}") |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | BPM_MIN = 30 |
| | BPM_MAX = 300 |
| |
|
| | |
| | DURATION_MIN = 10 |
| | DURATION_MAX = 600 |
| |
|
| | |
| | VALID_TIME_SIGNATURES = [2, 3, 4, 6] |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | TASK_TYPES = ["text2music", "repaint", "cover", "extract", "lego", "complete"] |
| |
|
| | |
| | TASK_TYPES_TURBO = ["text2music", "repaint", "cover"] |
| |
|
| | |
| | TASK_TYPES_BASE = ["text2music", "repaint", "cover", "extract", "lego", "complete"] |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:" |
| | DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:" |
| | DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:" |
| | DEFAULT_LM_INSPIRED_INSTRUCTION = "Expand the user's input into a more detailed and specific musical description:" |
| | DEFAULT_LM_REWRITE_INSTRUCTION = "Format the user's input into a more detailed and specific musical description:" |
| |
|
| | |
| | |
| | |
| | TASK_INSTRUCTIONS = { |
| | "text2music": "Fill the audio semantic mask based on the given conditions:", |
| | "repaint": "Repaint the mask area based on the given conditions:", |
| | "cover": "Generate audio semantic tokens based on the given conditions:", |
| | "extract": "Extract the {TRACK_NAME} track from the audio:", |
| | "extract_default": "Extract the track from the audio:", |
| | "lego": "Generate the {TRACK_NAME} track based on the audio context:", |
| | "lego_default": "Generate the track based on the audio context:", |
| | "complete": "Complete the input track with {TRACK_CLASSES}:", |
| | "complete_default": "Complete the input track:", |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | TRACK_NAMES = [ |
| | "woodwinds", "brass", "fx", "synth", "strings", "percussion", |
| | "keyboard", "guitar", "bass", "drums", "backing_vocals", "vocals" |
| | ] |
| |
|
| | SFT_GEN_PROMPT = """# Instruction |
| | {} |
| | |
| | # Caption |
| | {} |
| | |
| | # Metas |
| | {}<|endoftext|> |
| | """ |
| |
|