Commit ·
1b7da60
1
Parent(s): 6aef519
loadenv
Browse filesSigned-off-by: Rahul D Shetty <35rahuldshetty@gmail.com>
- .gitignore +1 -0
- create_synthetic_dataset.py +2 -1
- inference_midi_gpt.py +2 -1
- inference_midi_qwen3.py +2 -0
- prepare_dataset.py +3 -0
- prepare_dataset_qwen3.py +3 -0
- test_end_to_end.py +3 -0
- test_qwen3_e2e.py +3 -0
- train_midi_gpt.py +3 -0
- train_midi_qwen3.py +3 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
create_synthetic_dataset.py
CHANGED
|
@@ -10,7 +10,8 @@ from datasets import Dataset
|
|
| 10 |
from miditok import REMI, TokenizerConfig
|
| 11 |
from miditoolkit import MidiFile, Instrument, Note, TempoChange
|
| 12 |
from tqdm import tqdm
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
def create_simple_midi(genre, mood, tempo, num_notes=50):
|
| 16 |
"""Create a simple synthetic MIDI file."""
|
|
|
|
| 10 |
from miditok import REMI, TokenizerConfig
|
| 11 |
from miditoolkit import MidiFile, Instrument, Note, TempoChange
|
| 12 |
from tqdm import tqdm
|
| 13 |
+
from dotenv import load_dotenv
|
| 14 |
+
load_dotenv()
|
| 15 |
|
| 16 |
def create_simple_midi(genre, mood, tempo, num_notes=50):
|
| 17 |
"""Create a simple synthetic MIDI file."""
|
inference_midi_gpt.py
CHANGED
|
@@ -21,7 +21,8 @@ import torch
|
|
| 21 |
from miditok import REMI
|
| 22 |
from miditoolkit import MidiFile
|
| 23 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
def compute_special_token_offset(text_vocab_size: int):
|
| 27 |
return {
|
|
|
|
| 21 |
from miditok import REMI
|
| 22 |
from miditoolkit import MidiFile
|
| 23 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 24 |
+
from dotenv import load_dotenv
|
| 25 |
+
load_dotenv()
|
| 26 |
|
| 27 |
def compute_special_token_offset(text_vocab_size: int):
|
| 28 |
return {
|
inference_midi_qwen3.py
CHANGED
|
@@ -21,6 +21,8 @@ import torch
|
|
| 21 |
from miditok import REMI
|
| 22 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 23 |
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def generate_midi(
|
| 26 |
model,
|
|
|
|
| 21 |
from miditok import REMI
|
| 22 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 23 |
|
| 24 |
+
from dotenv import load_dotenv
|
| 25 |
+
load_dotenv()
|
| 26 |
|
| 27 |
def generate_midi(
|
| 28 |
model,
|
prepare_dataset.py
CHANGED
|
@@ -19,6 +19,9 @@ from miditoolkit import MidiFile
|
|
| 19 |
from tqdm import tqdm
|
| 20 |
from transformers import GPT2Tokenizer
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def build_prompt(example):
|
| 24 |
"""Build a text prompt from dataset metadata fields."""
|
|
|
|
| 19 |
from tqdm import tqdm
|
| 20 |
from transformers import GPT2Tokenizer
|
| 21 |
|
| 22 |
+
from dotenv import load_dotenv
|
| 23 |
+
load_dotenv()
|
| 24 |
+
|
| 25 |
|
| 26 |
def build_prompt(example):
|
| 27 |
"""Build a text prompt from dataset metadata fields."""
|
prepare_dataset_qwen3.py
CHANGED
|
@@ -19,6 +19,9 @@ from tqdm import tqdm
|
|
| 19 |
from transformers import AutoTokenizer
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
def build_rich_prompt(example):
|
| 23 |
"""Build a comprehensive text prompt from all available music metadata."""
|
| 24 |
parts = []
|
|
|
|
| 19 |
from transformers import AutoTokenizer
|
| 20 |
|
| 21 |
|
| 22 |
+
from dotenv import load_dotenv
|
| 23 |
+
load_dotenv()
|
| 24 |
+
|
| 25 |
def build_rich_prompt(example):
|
| 26 |
"""Build a comprehensive text prompt from all available music metadata."""
|
| 27 |
parts = []
|
test_end_to_end.py
CHANGED
|
@@ -9,6 +9,9 @@ from miditok import REMI, TokenizerConfig
|
|
| 9 |
from transformers import GPT2Tokenizer
|
| 10 |
import io
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
# Add app to path
|
| 13 |
sys.path.insert(0, "/app")
|
| 14 |
|
|
|
|
| 9 |
from transformers import GPT2Tokenizer
|
| 10 |
import io
|
| 11 |
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
# Add app to path
|
| 16 |
sys.path.insert(0, "/app")
|
| 17 |
|
test_qwen3_e2e.py
CHANGED
|
@@ -9,6 +9,9 @@ from datasets import load_from_disk
|
|
| 9 |
from miditok import REMI
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
sys.path.insert(0, "/app")
|
| 13 |
|
| 14 |
print("=" * 70)
|
|
|
|
| 9 |
from miditok import REMI
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
sys.path.insert(0, "/app")
|
| 16 |
|
| 17 |
print("=" * 70)
|
train_midi_gpt.py
CHANGED
|
@@ -18,6 +18,9 @@ import os
|
|
| 18 |
import random
|
| 19 |
from pathlib import Path
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
import numpy as np
|
| 22 |
import torch
|
| 23 |
from datasets import load_from_disk
|
|
|
|
| 18 |
import random
|
| 19 |
from pathlib import Path
|
| 20 |
|
| 21 |
+
from dotenv import load_dotenv
|
| 22 |
+
load_dotenv()
|
| 23 |
+
|
| 24 |
import numpy as np
|
| 25 |
import torch
|
| 26 |
from datasets import load_from_disk
|
train_midi_qwen3.py
CHANGED
|
@@ -31,6 +31,9 @@ from transformers import (
|
|
| 31 |
TrainerCallback,
|
| 32 |
)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Special token strings that will be added to the tokenizer
|
| 36 |
BOS_MIDI_TOKEN = "<|midi_start|>"
|
|
|
|
| 31 |
TrainerCallback,
|
| 32 |
)
|
| 33 |
|
| 34 |
+
from dotenv import load_dotenv
|
| 35 |
+
load_dotenv()
|
| 36 |
+
|
| 37 |
|
| 38 |
# Special token strings that will be added to the tokenizer
|
| 39 |
BOS_MIDI_TOKEN = "<|midi_start|>"
|