k-l-lambda commited on
Commit
bd633c5
·
1 Parent(s): 0f05ade

upgraded lilylet version.

Browse files
Files changed (3) hide show
  1. app.py +12 -4
  2. lilyscript/generator.py +34 -13
  3. web/vendor/lilylet.bundle.js +2 -2
app.py CHANGED
@@ -36,6 +36,7 @@ HERE = os.path.dirname(os.path.abspath(__file__))
36
  HF_MODEL_REPO = os.environ.get('LILYSCRIPT_MODEL_REPO', 'k-l-lambda/LilyNota')
37
  HF_MODEL_SUBDIR = 'onnx' # weights + geometry + tokenizer live here in the repo
38
  MODEL_DIR = os.environ.get('LILYSCRIPT_MODEL_DIR') # set -> use this local dir instead of the hub
 
39
  ASSET_DIR = os.path.join(HERE, 'assets')
40
  EXAMPLES_DIR = os.path.join(HERE, 'examples')
41
  OUTPUT_DIR = os.path.join(HERE, 'outputs')
@@ -107,12 +108,19 @@ _init_logging()
107
 
108
 
109
  def resolve_model_dir ():
110
- '''Where the ONNX weights live. If LILYSCRIPT_MODEL_DIR is set, use it as-is
111
- (local dev). Otherwise pull the `onnx/` bundle from the HF model repo and
112
- return its local snapshot path. The tokenizer is NOT pulled it's read from
113
- the app's own assets/ dir so we only fetch the weight files.'''
 
 
114
  if MODEL_DIR:
115
  return MODEL_DIR
 
 
 
 
 
116
  from huggingface_hub import snapshot_download
117
  LOG.info('downloading model weights from hf:%s (%s/) ...', HF_MODEL_REPO, HF_MODEL_SUBDIR)
118
  local = snapshot_download(
 
36
  HF_MODEL_REPO = os.environ.get('LILYSCRIPT_MODEL_REPO', 'k-l-lambda/LilyNota')
37
  HF_MODEL_SUBDIR = 'onnx' # weights + geometry + tokenizer live here in the repo
38
  MODEL_DIR = os.environ.get('LILYSCRIPT_MODEL_DIR') # set -> use this local dir instead of the hub
39
+ LOCAL_MODEL_DIR = os.path.join(HERE, 'models') # repo-local onnx bundle; preferred over the hub when present
40
  ASSET_DIR = os.path.join(HERE, 'assets')
41
  EXAMPLES_DIR = os.path.join(HERE, 'examples')
42
  OUTPUT_DIR = os.path.join(HERE, 'outputs')
 
108
 
109
 
110
  def resolve_model_dir ():
111
+ '''Where the ONNX weights live, in priority order:
112
+ 1. LILYSCRIPT_MODEL_DIR (explicit override, local dev),
113
+ 2. the repo-local `models/` dir IF it holds the full weight bundle,
114
+ 3. otherwise pull the `onnx/` bundle from the HF model repo.
115
+ The tokenizer is NOT pulled — it's read from the app's own assets/ dir — so we
116
+ only fetch the weight files.'''
117
  if MODEL_DIR:
118
  return MODEL_DIR
119
+ required = ('geometry.json', 'patch_kv_int8.onnx', 'token_kv_int8.onnx', 'wte.npy')
120
+ if os.path.isdir(LOCAL_MODEL_DIR) and all(
121
+ os.path.isfile(os.path.join(LOCAL_MODEL_DIR, name)) for name in required):
122
+ LOG.info('using local model weights in %s', LOCAL_MODEL_DIR)
123
+ return LOCAL_MODEL_DIR
124
  from huggingface_hub import snapshot_download
125
  LOG.info('downloading model weights from hf:%s (%s/) ...', HF_MODEL_REPO, HF_MODEL_SUBDIR)
126
  local = snapshot_download(
lilyscript/generator.py CHANGED
@@ -159,6 +159,33 @@ class StreamingLilyletGenerator:
159
  def patches_to_text (self, patches):
160
  return ''.join(self.patch_to_text(p) for p in patches)
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  # ---- KV plumbing ----
163
 
164
  def _empty_patch_past (self):
@@ -260,24 +287,18 @@ class StreamingLilyletGenerator:
260
  '''
261
  rng = np.random.default_rng(seed)
262
 
263
- bos_patch = [self.bos_id] * (self.patch_size - 1) + [self.eos_id]
264
- patches = [bos_patch]
265
- if prompt_text:
266
- for line in prompt_text.splitlines():
267
- ids = self.tokenizer.encode(line + '\n')
268
- for i in range(0, len(ids), self.patch_size):
269
- chunk = ids[i:i + self.patch_size]
270
- patches.append(chunk + [self.pad_id] * (self.patch_size - len(chunk)))
271
-
272
- out_text = self.patches_to_text(patches[1:])
273
  # 0-based marker: `y` counts measures remaining AFTER this one (patchifier:
274
  # y = total - i - 1), so `[r:0/{measures-1}]` yields exactly `measures` total.
275
  prime_ids = self.tokenizer.encode(f'[r:0/{measures - 1}]') if measures is not None and measures >= 1 else None
276
  primed = False
277
 
278
- # seed the monitor's running context/stream from the prompt patches (if any)
279
- if monitor is not None and len(patches) > 1:
280
- for p in patches[1:]:
 
281
  for tid in p:
282
  monitor.commit_forced(tid)
283
 
 
159
  def patches_to_text (self, patches):
160
  return ''.join(self.patch_to_text(p) for p in patches)
161
 
162
+ def _encode_lines (self, lines):
163
+ '''Encode text lines into padded patches (trailing newline per line, matching
164
+ the patchifier). Returns a list of patch_size-long id lists.'''
165
+ patches = []
166
+ for line in lines:
167
+ ids = self.tokenizer.encode(line + '\n')
168
+ for i in range(0, len(ids), self.patch_size):
169
+ chunk = ids[i:i + self.patch_size]
170
+ patches.append(chunk + [self.pad_id] * (self.patch_size - len(chunk)))
171
+ return patches
172
+
173
+ def _seed_patches (self, prompt_text):
174
+ '''Build the seed patch list, mirroring the training patchifier's layout:
175
+
176
+ %-prompt patches -> <bos> patch -> [header] patches
177
+
178
+ Lines starting with `%` are the unsupervised style PROMPT (go BEFORE <bos>); the
179
+ rest (`[field "..."]` headers) are the supervised HEADER (go AFTER <bos>). With no
180
+ `%` lines this reduces to `[<bos>] + header` (the legacy <bos>-at-index-0 layout).
181
+ '''
182
+ prompt_lines, header_lines = [], []
183
+ for line in prompt_text.splitlines():
184
+ (prompt_lines if line.lstrip().startswith('%') else header_lines).append(line)
185
+
186
+ bos_patch = [self.bos_id] * (self.patch_size - 1) + [self.eos_id]
187
+ return self._encode_lines(prompt_lines) + [bos_patch] + self._encode_lines(header_lines)
188
+
189
  # ---- KV plumbing ----
190
 
191
  def _empty_patch_past (self):
 
287
  '''
288
  rng = np.random.default_rng(seed)
289
 
290
+ patches = self._seed_patches(prompt_text)
291
+
292
+ out_text = self.patches_to_text(patches)
 
 
 
 
 
 
 
293
  # 0-based marker: `y` counts measures remaining AFTER this one (patchifier:
294
  # y = total - i - 1), so `[r:0/{measures-1}]` yields exactly `measures` total.
295
  prime_ids = self.tokenizer.encode(f'[r:0/{measures - 1}]') if measures is not None and measures >= 1 else None
296
  primed = False
297
 
298
+ # seed the monitor's running context/stream from the seed patches (commit_forced
299
+ # skips bos/pad/eos, so passing the whole list incl. the <bos> patch is safe).
300
+ if monitor is not None and len(patches) > 0:
301
+ for p in patches:
302
  for tid in p:
303
  monitor.commit_forced(tid)
304
 
web/vendor/lilylet.bundle.js CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51d680585d5628989b6d3c145a2b1a0cdd5010549c958fe512fca77d809f617a
3
- size 586799
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869c6da801fee553a8c8d6c1ca24b6fae3fbe65993167f9a3c0a01540cd413a8
3
+ size 86557