Spaces:
Running
Running
Commit ·
bd633c5
1
Parent(s): 0f05ade
upgraded lilylet version.
Browse files- app.py +12 -4
- lilyscript/generator.py +34 -13
- web/vendor/lilylet.bundle.js +2 -2
app.py
CHANGED
|
@@ -36,6 +36,7 @@ HERE = os.path.dirname(os.path.abspath(__file__))
|
|
| 36 |
HF_MODEL_REPO = os.environ.get('LILYSCRIPT_MODEL_REPO', 'k-l-lambda/LilyNota')
|
| 37 |
HF_MODEL_SUBDIR = 'onnx' # weights + geometry + tokenizer live here in the repo
|
| 38 |
MODEL_DIR = os.environ.get('LILYSCRIPT_MODEL_DIR') # set -> use this local dir instead of the hub
|
|
|
|
| 39 |
ASSET_DIR = os.path.join(HERE, 'assets')
|
| 40 |
EXAMPLES_DIR = os.path.join(HERE, 'examples')
|
| 41 |
OUTPUT_DIR = os.path.join(HERE, 'outputs')
|
|
@@ -107,12 +108,19 @@ _init_logging()
|
|
| 107 |
|
| 108 |
|
| 109 |
def resolve_model_dir ():
|
| 110 |
-
'''Where the ONNX weights live
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
if MODEL_DIR:
|
| 115 |
return MODEL_DIR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
from huggingface_hub import snapshot_download
|
| 117 |
LOG.info('downloading model weights from hf:%s (%s/) ...', HF_MODEL_REPO, HF_MODEL_SUBDIR)
|
| 118 |
local = snapshot_download(
|
|
|
|
| 36 |
HF_MODEL_REPO = os.environ.get('LILYSCRIPT_MODEL_REPO', 'k-l-lambda/LilyNota')
|
| 37 |
HF_MODEL_SUBDIR = 'onnx' # weights + geometry + tokenizer live here in the repo
|
| 38 |
MODEL_DIR = os.environ.get('LILYSCRIPT_MODEL_DIR') # set -> use this local dir instead of the hub
|
| 39 |
+
LOCAL_MODEL_DIR = os.path.join(HERE, 'models') # repo-local onnx bundle; preferred over the hub when present
|
| 40 |
ASSET_DIR = os.path.join(HERE, 'assets')
|
| 41 |
EXAMPLES_DIR = os.path.join(HERE, 'examples')
|
| 42 |
OUTPUT_DIR = os.path.join(HERE, 'outputs')
|
|
|
|
| 108 |
|
| 109 |
|
| 110 |
def resolve_model_dir ():
|
| 111 |
+
'''Where the ONNX weights live, in priority order:
|
| 112 |
+
1. LILYSCRIPT_MODEL_DIR (explicit override, local dev),
|
| 113 |
+
2. the repo-local `models/` dir IF it holds the full weight bundle,
|
| 114 |
+
3. otherwise pull the `onnx/` bundle from the HF model repo.
|
| 115 |
+
The tokenizer is NOT pulled — it's read from the app's own assets/ dir — so we
|
| 116 |
+
only fetch the weight files.'''
|
| 117 |
if MODEL_DIR:
|
| 118 |
return MODEL_DIR
|
| 119 |
+
required = ('geometry.json', 'patch_kv_int8.onnx', 'token_kv_int8.onnx', 'wte.npy')
|
| 120 |
+
if os.path.isdir(LOCAL_MODEL_DIR) and all(
|
| 121 |
+
os.path.isfile(os.path.join(LOCAL_MODEL_DIR, name)) for name in required):
|
| 122 |
+
LOG.info('using local model weights in %s', LOCAL_MODEL_DIR)
|
| 123 |
+
return LOCAL_MODEL_DIR
|
| 124 |
from huggingface_hub import snapshot_download
|
| 125 |
LOG.info('downloading model weights from hf:%s (%s/) ...', HF_MODEL_REPO, HF_MODEL_SUBDIR)
|
| 126 |
local = snapshot_download(
|
lilyscript/generator.py
CHANGED
|
@@ -159,6 +159,33 @@ class StreamingLilyletGenerator:
|
|
| 159 |
def patches_to_text (self, patches):
|
| 160 |
return ''.join(self.patch_to_text(p) for p in patches)
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
# ---- KV plumbing ----
|
| 163 |
|
| 164 |
def _empty_patch_past (self):
|
|
@@ -260,24 +287,18 @@ class StreamingLilyletGenerator:
|
|
| 260 |
'''
|
| 261 |
rng = np.random.default_rng(seed)
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
for line in prompt_text.splitlines():
|
| 267 |
-
ids = self.tokenizer.encode(line + '\n')
|
| 268 |
-
for i in range(0, len(ids), self.patch_size):
|
| 269 |
-
chunk = ids[i:i + self.patch_size]
|
| 270 |
-
patches.append(chunk + [self.pad_id] * (self.patch_size - len(chunk)))
|
| 271 |
-
|
| 272 |
-
out_text = self.patches_to_text(patches[1:])
|
| 273 |
# 0-based marker: `y` counts measures remaining AFTER this one (patchifier:
|
| 274 |
# y = total - i - 1), so `[r:0/{measures-1}]` yields exactly `measures` total.
|
| 275 |
prime_ids = self.tokenizer.encode(f'[r:0/{measures - 1}]') if measures is not None and measures >= 1 else None
|
| 276 |
primed = False
|
| 277 |
|
| 278 |
-
# seed the monitor's running context/stream from the
|
| 279 |
-
|
| 280 |
-
|
|
|
|
| 281 |
for tid in p:
|
| 282 |
monitor.commit_forced(tid)
|
| 283 |
|
|
|
|
| 159 |
def patches_to_text (self, patches):
|
| 160 |
return ''.join(self.patch_to_text(p) for p in patches)
|
| 161 |
|
| 162 |
+
def _encode_lines (self, lines):
|
| 163 |
+
'''Encode text lines into padded patches (trailing newline per line, matching
|
| 164 |
+
the patchifier). Returns a list of patch_size-long id lists.'''
|
| 165 |
+
patches = []
|
| 166 |
+
for line in lines:
|
| 167 |
+
ids = self.tokenizer.encode(line + '\n')
|
| 168 |
+
for i in range(0, len(ids), self.patch_size):
|
| 169 |
+
chunk = ids[i:i + self.patch_size]
|
| 170 |
+
patches.append(chunk + [self.pad_id] * (self.patch_size - len(chunk)))
|
| 171 |
+
return patches
|
| 172 |
+
|
| 173 |
+
def _seed_patches (self, prompt_text):
|
| 174 |
+
'''Build the seed patch list, mirroring the training patchifier's layout:
|
| 175 |
+
|
| 176 |
+
%-prompt patches -> <bos> patch -> [header] patches
|
| 177 |
+
|
| 178 |
+
Lines starting with `%` are the unsupervised style PROMPT (go BEFORE <bos>); the
|
| 179 |
+
rest (`[field "..."]` headers) are the supervised HEADER (go AFTER <bos>). With no
|
| 180 |
+
`%` lines this reduces to `[<bos>] + header` (the legacy <bos>-at-index-0 layout).
|
| 181 |
+
'''
|
| 182 |
+
prompt_lines, header_lines = [], []
|
| 183 |
+
for line in prompt_text.splitlines():
|
| 184 |
+
(prompt_lines if line.lstrip().startswith('%') else header_lines).append(line)
|
| 185 |
+
|
| 186 |
+
bos_patch = [self.bos_id] * (self.patch_size - 1) + [self.eos_id]
|
| 187 |
+
return self._encode_lines(prompt_lines) + [bos_patch] + self._encode_lines(header_lines)
|
| 188 |
+
|
| 189 |
# ---- KV plumbing ----
|
| 190 |
|
| 191 |
def _empty_patch_past (self):
|
|
|
|
| 287 |
'''
|
| 288 |
rng = np.random.default_rng(seed)
|
| 289 |
|
| 290 |
+
patches = self._seed_patches(prompt_text)
|
| 291 |
+
|
| 292 |
+
out_text = self.patches_to_text(patches)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
# 0-based marker: `y` counts measures remaining AFTER this one (patchifier:
|
| 294 |
# y = total - i - 1), so `[r:0/{measures-1}]` yields exactly `measures` total.
|
| 295 |
prime_ids = self.tokenizer.encode(f'[r:0/{measures - 1}]') if measures is not None and measures >= 1 else None
|
| 296 |
primed = False
|
| 297 |
|
| 298 |
+
# seed the monitor's running context/stream from the seed patches (commit_forced
|
| 299 |
+
# skips bos/pad/eos, so passing the whole list incl. the <bos> patch is safe).
|
| 300 |
+
if monitor is not None and len(patches) > 0:
|
| 301 |
+
for p in patches:
|
| 302 |
for tid in p:
|
| 303 |
monitor.commit_forced(tid)
|
| 304 |
|
web/vendor/lilylet.bundle.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:869c6da801fee553a8c8d6c1ca24b6fae3fbe65993167f9a3c0a01540cd413a8
|
| 3 |
+
size 86557
|