File size: 1,748 Bytes
714cf46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # Only error/warning messages
os.environ['DISABLE_PANDERA_IMPORT_WARNING'] = 'true'
os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

# Suppress TensorFlow deprecation warning for tf.losses.sparse_softmax_cross_entropy
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings(
        "ignore",
        message="The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.",
        category=FutureWarning,
        module=".*tf_keras\\.src\\.losses.*"
    )
    try:
        import tensorflow as tf
    except ImportError:
        pass

import torch
import torch._inductor.config as inductor_config
import torch._dynamo as dynamo

# Enable TensorFloat32 tensor cores for float32 matmul (Ampere+ GPUs)
# Provides significant speedup with minimal precision loss
torch.set_float32_matmul_precision('high')

# Enable TF32 for matrix multiplications and cuDNN operations
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

# Enable cuDNN autotuner - finds fastest algorithms for your hardware
# Best when input sizes are consistent; may slow down first iterations
torch.backends.cudnn.benchmark = True
inductor_config.max_autotune_gemm_backends = "ATEN,CUTLASS,FBGEMM"    

dynamo.config.capture_scalar_outputs = True
torch._dynamo.config.recompile_limit = 16

try:
    import wandb
    os.environ["WANDB_AVAILABLE"] = 'true'
except ImportError:
    os.environ["WANDB_AVAILABLE"] = 'false'