| | import numpy as np |
| | import dataclasses as dc |
| |
|
| |
|
| | @dc.dataclass |
| | class CtrlArguments: |
| | train_data: str = dc.field( |
| | default="data/training_cunique_with_distractors.json", |
| | metadata={"help": "A CSV list of training data files"} |
| | ) |
| |
|
| | formulation: str = dc.field( |
| | default="areg_ltr", |
| | metadata={"help": "Type of problem definition: autoregressive (areg) or u-PMLM (upmlm) or mixed (if predict_questions is set)"} |
| | ) |
| |
|
| | context_strategy: str = dc.field( |
| | default="take_first", |
| | metadata={"help": "How to deal with contexts greater than a specified length"} |
| | ) |
| |
|
| | tokenizer_file: str = dc.field( |
| | default="tokenizer.json", |
| | metadata={"help": "A JSON file (in the format provided by HuggingFace's tokenizers library) with a trained tokenizer"} |
| | ) |
| |
|
| | sequence_length: int = dc.field( |
| | default=256, |
| | metadata={"help": "The max sequence length"} |
| | ) |
| |
|
| | force_prepend_control: bool = dc.field( |
| | default=False, |
| | metadata={"help": "If the control code should be prepended for all sliding windows. Otherwise, it is only prepended at the start of the sequence"} |
| | ) |
| |
|
| |
|
| | class GradientPrinter: |
| | def __init__(self, name): |
| | self.name = name |
| |
|
| | def __call__(self, grad): |
| | np_grad = grad.cpu().numpy() |
| | print("======== GRAD FOR {} ========".format(self.name)) |
| | print("\tGRAD {}".format(grad)) |
| | print("\tGRAD NORM {}".format(np.linalg.norm(np_grad))) |
| | print("\tGRAD MEAN {}".format(np.mean(np_grad))) |
| | print() |