broadfield-dev commited on
Commit
2aef358
·
verified ·
1 Parent(s): 4da358a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -15
app.py CHANGED
@@ -86,32 +86,19 @@ def generate():
86
  for layer in data["past_key_values"]
87
  )
88
 
89
- # Ensure model config has caching enabled
90
- ee_model.config.use_cache = True
91
-
92
  with torch.no_grad():
93
  out = ee_model(
94
  inputs_embeds=inputs_embeds,
95
  attention_mask=attention_mask,
96
- past_key_values=past_key_values,
97
- use_cache=True,
98
  output_hidden_states=True,
99
  )
100
 
101
- # Final hidden state (sigma-space) — client decrypts + runs lm_head
102
  last_hidden = out.hidden_states[-1] # (1, seq_len, hidden)
103
 
104
- # Serialize KV cache — guard against None (some models/configs don't return it)
105
- new_past = None
106
- if out.past_key_values is not None:
107
- new_past = [
108
- [t.cpu().tolist() for t in layer]
109
- for layer in out.past_key_values
110
- ]
111
-
112
  return jsonify({
113
  "last_hidden": last_hidden.cpu().tolist(),
114
- "past_key_values": new_past,
115
  })
116
 
117
  except Exception as e:
 
86
  for layer in data["past_key_values"]
87
  )
88
 
 
 
 
89
  with torch.no_grad():
90
  out = ee_model(
91
  inputs_embeds=inputs_embeds,
92
  attention_mask=attention_mask,
93
+ use_cache=False,
 
94
  output_hidden_states=True,
95
  )
96
 
97
+ # Return final hidden state in sigma-space — client applies sigma_inv + lm_head
98
  last_hidden = out.hidden_states[-1] # (1, seq_len, hidden)
99
 
 
 
 
 
 
 
 
 
100
  return jsonify({
101
  "last_hidden": last_hidden.cpu().tolist(),
 
102
  })
103
 
104
  except Exception as e: