Spaces:
Running
Running
Commit ·
7f31eab
1
Parent(s): 9de43a5
print log about runtime cocurrency
Browse files- lilyscript/generator.py +42 -0
lilyscript/generator.py
CHANGED
|
@@ -15,6 +15,9 @@ log) and `pretty = postprocess(raw)` (for the editor, segmented by measure).
|
|
| 15 |
|
| 16 |
import os
|
| 17 |
import json
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
import numpy as np
|
| 20 |
import onnxruntime as ort
|
|
@@ -68,6 +71,44 @@ def _softmax (x):
|
|
| 68 |
return e / e.sum()
|
| 69 |
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
class StreamingLilyletGenerator:
|
| 72 |
'''Loads the int8 KV ONNX sessions + vendored assets and streams generation.'''
|
| 73 |
|
|
@@ -100,6 +141,7 @@ class StreamingLilyletGenerator:
|
|
| 100 |
os.path.join(model_dir, 'token_kv_int8.onnx'), so, providers=['CPUExecutionProvider'])
|
| 101 |
self.patch_out_names = [o.name for o in self.patch_kv_sess.get_outputs()]
|
| 102 |
self.token_out_names = [o.name for o in self.token_kv_sess.get_outputs()]
|
|
|
|
| 103 |
|
| 104 |
# ---- text helpers (mirror LilyletPatchyGenerator.patch_to_text) ----
|
| 105 |
|
|
|
|
| 15 |
|
| 16 |
import os
|
| 17 |
import json
|
| 18 |
+
import logging
|
| 19 |
+
|
| 20 |
+
LOG = logging.getLogger('lilyscript')
|
| 21 |
|
| 22 |
import numpy as np
|
| 23 |
import onnxruntime as ort
|
|
|
|
| 71 |
return e / e.sum()
|
| 72 |
|
| 73 |
|
| 74 |
+
def _physical_cores ():
|
| 75 |
+
'''Best-effort physical (not logical/HT) core count via /proc/cpuinfo; None if
|
| 76 |
+
unavailable. ORT's intra_op default (=0) maps to this on most CPU builds.'''
|
| 77 |
+
try:
|
| 78 |
+
phys, cur = set(), {}
|
| 79 |
+
for line in open('/proc/cpuinfo'):
|
| 80 |
+
line = line.strip()
|
| 81 |
+
if not line:
|
| 82 |
+
if 'physical id' in cur and 'core id' in cur:
|
| 83 |
+
phys.add((cur['physical id'], cur['core id']))
|
| 84 |
+
cur = {}
|
| 85 |
+
continue
|
| 86 |
+
if ':' in line:
|
| 87 |
+
k, v = line.split(':', 1)
|
| 88 |
+
cur[k.strip()] = v.strip()
|
| 89 |
+
return len(phys) or None
|
| 90 |
+
except Exception:
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _log_thread_info (so, sess):
|
| 95 |
+
'''Log host CPU capacity + the ONNX Runtime intra/inter-op thread settings that
|
| 96 |
+
are actually in effect. intra_op_num_threads/inter_op_num_threads == 0 means
|
| 97 |
+
"ORT auto" — it picks the number of physical cores for the intra-op pool.'''
|
| 98 |
+
logical = os.cpu_count()
|
| 99 |
+
affinity = len(os.sched_getaffinity(0)) if hasattr(os, 'sched_getaffinity') else logical
|
| 100 |
+
physical = _physical_cores()
|
| 101 |
+
intra = so.intra_op_num_threads
|
| 102 |
+
inter = so.inter_op_num_threads
|
| 103 |
+
effective_intra = intra if intra else (physical or affinity or logical)
|
| 104 |
+
LOG.info('CPU: %s logical / %s physical cores, %s available (affinity)',
|
| 105 |
+
logical, physical if physical is not None else '?', affinity)
|
| 106 |
+
LOG.info('ONNX Runtime threads: intra_op=%s (%s), inter_op=%s (%s) | execution_mode=%s',
|
| 107 |
+
intra, 'auto -> ~%s' % effective_intra if intra == 0 else 'explicit',
|
| 108 |
+
inter, 'auto' if inter == 0 else 'explicit',
|
| 109 |
+
getattr(so, 'execution_mode', '?'))
|
| 110 |
+
|
| 111 |
+
|
| 112 |
class StreamingLilyletGenerator:
|
| 113 |
'''Loads the int8 KV ONNX sessions + vendored assets and streams generation.'''
|
| 114 |
|
|
|
|
| 141 |
os.path.join(model_dir, 'token_kv_int8.onnx'), so, providers=['CPUExecutionProvider'])
|
| 142 |
self.patch_out_names = [o.name for o in self.patch_kv_sess.get_outputs()]
|
| 143 |
self.token_out_names = [o.name for o in self.token_kv_sess.get_outputs()]
|
| 144 |
+
_log_thread_info(so, self.patch_kv_sess)
|
| 145 |
|
| 146 |
# ---- text helpers (mirror LilyletPatchyGenerator.patch_to_text) ----
|
| 147 |
|