dmozzherin commited on
Commit
dbd6f57
·
1 Parent(s): 4c99959

add Go web tool

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. INSTRUCTIONS.md +6 -6
  3. tool/go.mod +3 -0
  4. tool/main.go +234 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ tool/labels
INSTRUCTIONS.md CHANGED
@@ -13,8 +13,8 @@ Copy one of these files from `output/gguf/` to your computer:
13
 
14
  | File | Size | Use when |
15
  |------|------|----------|
16
- | `insect-parser-q4_k_m.gguf` | 3.2 GB | Your computer has **8 GB RAM** (most laptops) |
17
- | `insect-parser-q5_k_m.gguf` | 3.4 GB | Your computer has **16 GB RAM or more** (slightly better quality) |
18
 
19
  Not sure how much RAM you have?
20
  - **Mac:** Apple menu → About This Mac → look for "Memory"
@@ -40,7 +40,7 @@ but running fully on your own machine.
40
 
41
  1. In LM Studio, click **My Models** in the left sidebar
42
  2. Click **"Load model from file"** (or drag the `.gguf` file into the window)
43
- 3. Navigate to the `insect-parser-q4_k_m.gguf` file you copied in Step 1
44
  4. Wait for the model to load (progress bar at the bottom)
45
 
46
  ### Configure the system prompt
@@ -110,7 +110,7 @@ Go to **ollama.com**, download, and install for your operating system.
110
  Open a terminal, navigate to the project folder, and run:
111
 
112
  ```bash
113
- ollama create insect-parser -f Modelfile
114
  ```
115
 
116
  You only need to do this once.
@@ -118,13 +118,13 @@ You only need to do this once.
118
  ### Parse a label
119
 
120
  ```bash
121
- ollama run insect-parser "U.S.A., Texas: Austin, 15.iv.2021, J. Doe"
122
  ```
123
 
124
  Or pipe a text file:
125
 
126
  ```bash
127
- cat my_label.txt | ollama run insect-parser
128
  ```
129
 
130
  ---
 
13
 
14
  | File | Size | Use when |
15
  |------|------|----------|
16
+ | `ento-label-parser-q4_k_m.gguf` | 3.2 GB | Your computer has **8 GB RAM** (most laptops) |
17
+ | `ento-label-parser-q5_k_m.gguf` | 3.4 GB | Your computer has **16 GB RAM or more** (slightly better quality) |
18
 
19
  Not sure how much RAM you have?
20
  - **Mac:** Apple menu → About This Mac → look for "Memory"
 
40
 
41
  1. In LM Studio, click **My Models** in the left sidebar
42
  2. Click **"Load model from file"** (or drag the `.gguf` file into the window)
43
+ 3. Navigate to the `ento-label-parser-q4_k_m.gguf` file you copied in Step 1
44
  4. Wait for the model to load (progress bar at the bottom)
45
 
46
  ### Configure the system prompt
 
110
  Open a terminal, navigate to the project folder, and run:
111
 
112
  ```bash
113
+ ollama create ento-label-parser -f Modelfile
114
  ```
115
 
116
  You only need to do this once.
 
118
  ### Parse a label
119
 
120
  ```bash
121
+ ollama run ento-label-parser "U.S.A., Texas: Austin, 15.iv.2021, J. Doe"
122
  ```
123
 
124
  Or pipe a text file:
125
 
126
  ```bash
127
+ cat my_label.txt | ollama run ento-label-parser
128
  ```
129
 
130
  ---
tool/go.mod ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ module labels
2
+
3
+ go 1.25.1
tool/main.go ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "bytes"
5
+ "encoding/json"
6
+ "fmt"
7
+ "io"
8
+ "log"
9
+ "net/http"
10
+ "strings"
11
+ )
12
+
13
+ const ollamaURL = "http://localhost:11434/api/chat"
14
+ const model = "ento-label-parser"
15
+
16
+ type ollamaRequest struct {
17
+ Model string `json:"model"`
18
+ Stream bool `json:"stream"`
19
+ Messages []ollamaMessage `json:"messages"`
20
+ }
21
+
22
+ type ollamaMessage struct {
23
+ Role string `json:"role"`
24
+ Content string `json:"content"`
25
+ }
26
+
27
+ type ollamaResponse struct {
28
+ Message struct {
29
+ Content string `json:"content"`
30
+ } `json:"message"`
31
+ }
32
+
33
+ type labelResult struct {
34
+ Verbatim string `json:"verbatim"`
35
+ Parsed json.RawMessage `json:"parsed,omitempty"`
36
+ Error string `json:"error,omitempty"`
37
+ }
38
+
39
+ func parseLabel(label string) labelResult {
40
+ result := labelResult{Verbatim: label}
41
+
42
+ reqBody, _ := json.Marshal(ollamaRequest{
43
+ Model: model,
44
+ Stream: false,
45
+ Messages: []ollamaMessage{
46
+ {Role: "user", Content: label},
47
+ },
48
+ })
49
+
50
+ resp, err := http.Post(ollamaURL, "application/json", bytes.NewReader(reqBody))
51
+ if err != nil {
52
+ result.Error = fmt.Sprintf("failed to call ollama: %v", err)
53
+ return result
54
+ }
55
+ defer resp.Body.Close()
56
+
57
+ body, err := io.ReadAll(resp.Body)
58
+ if err != nil {
59
+ result.Error = fmt.Sprintf("failed to read response: %v", err)
60
+ return result
61
+ }
62
+
63
+ var ollamaResp ollamaResponse
64
+ if err := json.Unmarshal(body, &ollamaResp); err != nil {
65
+ result.Error = fmt.Sprintf("failed to parse ollama response: %v", err)
66
+ return result
67
+ }
68
+
69
+ content := strings.TrimSpace(ollamaResp.Message.Content)
70
+ var parsed json.RawMessage
71
+ if err := json.Unmarshal([]byte(content), &parsed); err != nil {
72
+ result.Error = fmt.Sprintf("model returned invalid JSON: %v\nraw content: %s", err, content)
73
+ return result
74
+ }
75
+
76
+ result.Parsed = parsed
77
+ return result
78
+ }
79
+
80
+ func handleParse(w http.ResponseWriter, r *http.Request) {
81
+ if r.Method != http.MethodPost {
82
+ http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
83
+ return
84
+ }
85
+
86
+ if err := r.ParseForm(); err != nil {
87
+ http.Error(w, "bad request", http.StatusBadRequest)
88
+ return
89
+ }
90
+
91
+ raw := r.FormValue("labels")
92
+ lines := strings.Split(raw, "\n")
93
+
94
+ var results []labelResult
95
+ for _, line := range lines {
96
+ line = strings.TrimSpace(line)
97
+ if line == "" {
98
+ continue
99
+ }
100
+ results = append(results, parseLabel(line))
101
+ }
102
+
103
+ w.Header().Set("Content-Type", "application/json")
104
+ enc := json.NewEncoder(w)
105
+ enc.SetIndent("", " ")
106
+ enc.Encode(results)
107
+ }
108
+
109
+ const indexHTML = `<!DOCTYPE html>
110
+ <html lang="en">
111
+ <head>
112
+ <meta charset="UTF-8">
113
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
114
+ <title>Label Parser</title>
115
+ <style>
116
+ body { font-family: sans-serif; max-width: 900px; margin: 2rem auto; padding: 0 1rem; background: #f5f5f5; }
117
+ h1 { color: #333; }
118
+ textarea { width: 100%; height: 180px; font-family: monospace; font-size: 14px; padding: 0.5rem; box-sizing: border-box; border: 1px solid #ccc; border-radius: 4px; }
119
+ button { margin-top: 0.75rem; padding: 0.5rem 1.5rem; font-size: 15px; background: #2563eb; color: white; border: none; border-radius: 4px; cursor: pointer; }
120
+ button:hover { background: #1d4ed8; }
121
+ button:disabled { background: #93c5fd; cursor: default; }
122
+ #progress { margin-top: 0.75rem; font-size: 13px; color: #555; min-height: 1.2em; }
123
+ #meta { margin-top: 0.75rem; display: none; background: #fff; border: 1px solid #ddd; border-radius: 6px; padding: 0.6rem 1rem; font-size: 13px; }
124
+ #meta table { border-collapse: collapse; }
125
+ #meta td { padding: 2px 1.2rem 2px 0; }
126
+ #meta td:first-child { color: #888; }
127
+ #meta td.ok { color: #16a34a; font-weight: bold; }
128
+ #meta td.fail { color: #dc2626; font-weight: bold; }
129
+ #output { margin-top: 1rem; background: #1e1e1e; color: #d4d4d4; padding: 1rem; border-radius: 6px; font-family: monospace; font-size: 13px; white-space: pre-wrap; word-break: break-all; min-height: 3rem; }
130
+ label { font-weight: bold; display: block; margin-bottom: 0.4rem; }
131
+ p.hint { color: #666; font-size: 13px; margin-top: 0.25rem; }
132
+ </style>
133
+ </head>
134
+ <body>
135
+ <h1>Entomology Label Parser</h1>
136
+ <form id="form">
137
+ <label for="labels">Labels (one per line):</label>
138
+ <textarea id="labels" name="labels" placeholder="Kazakhstan, Akmola Region: Kokshetau Mountains near Terisakkan River, 23.VI-12.VIII.1957, Emeljanov"></textarea>
139
+ <p class="hint">Each non-empty line is sent separately to the model.</p>
140
+ <button type="submit" id="btn">Parse</button>
141
+ </form>
142
+ <div id="progress"></div>
143
+ <div id="meta"></div>
144
+ <div id="output">Results will appear here.</div>
145
+ <script>
146
+ function fmt(ms) {
147
+ if (ms < 1000) return ms.toFixed(0) + ' ms';
148
+ return (ms / 1000).toFixed(2) + ' s';
149
+ }
150
+
151
+ function avg(arr) {
152
+ return arr.length ? arr.reduce((a,b) => a+b, 0) / arr.length : null;
153
+ }
154
+
155
+ function renderMeta(total, successes, failures, okTimes, failTimes, totalMs, done) {
156
+ const meta = document.getElementById('meta');
157
+ meta.style.display = 'block';
158
+ const avgOk = avg(okTimes);
159
+ const avgFail = avg(failTimes);
160
+ meta.innerHTML =
161
+ '<table>' +
162
+ '<tr><td>labels</td><td>' + total + '</td></tr>' +
163
+ '<tr><td>successes</td><td class="ok">' + successes + '</td></tr>' +
164
+ '<tr><td>failures</td><td class="' + (failures ? 'fail' : 'ok') + '">' + failures + '</td></tr>' +
165
+ (avgOk !== null ? '<tr><td>avg time (success)</td><td>' + fmt(avgOk) + '</td></tr>' : '') +
166
+ (avgFail !== null ? '<tr><td>avg time (failure)</td><td>' + fmt(avgFail) + '</td></tr>' : '') +
167
+ (done ? '<tr><td>total time</td><td>' + fmt(totalMs) + '</td></tr>' : '') +
168
+ '</table>';
169
+ }
170
+
171
+ document.getElementById('form').addEventListener('submit', async e => {
172
+ e.preventDefault();
173
+ const btn = document.getElementById('btn');
174
+ const out = document.getElementById('output');
175
+ const prog = document.getElementById('progress');
176
+ const meta = document.getElementById('meta');
177
+ btn.disabled = true;
178
+ out.textContent = '';
179
+ prog.textContent = '';
180
+ meta.style.display = 'none';
181
+
182
+ const lines = document.getElementById('labels').value
183
+ .split('\n').map(l => l.trim()).filter(l => l !== '');
184
+ const total = lines.length;
185
+ if (total === 0) {
186
+ prog.textContent = 'No labels entered.';
187
+ btn.disabled = false;
188
+ return;
189
+ }
190
+
191
+ const results = [];
192
+ const okTimes = [], failTimes = [];
193
+ let successes = 0, failures = 0;
194
+ const globalStart = performance.now();
195
+ try {
196
+ for (let i = 0; i < total; i++) {
197
+ prog.textContent = 'Parsing ' + (i + 1) + ' of ' + total + '\u2026';
198
+ const fd = new URLSearchParams();
199
+ fd.append('labels', lines[i]);
200
+ const t0 = performance.now();
201
+ const resp = await fetch('/parse', { method: 'POST', body: fd });
202
+ const json = await resp.json();
203
+ const elapsed = performance.now() - t0;
204
+ for (const r of json) {
205
+ results.push(r);
206
+ if (r.error) { failures++; failTimes.push(elapsed); }
207
+ else { successes++; okTimes.push(elapsed); }
208
+ }
209
+ out.textContent = JSON.stringify(results, null, 2);
210
+ renderMeta(total, successes, failures, okTimes, failTimes, performance.now() - globalStart, false);
211
+ }
212
+ prog.textContent = 'Done \u2014 ' + total + ' label' + (total !== 1 ? 's' : '') + ' parsed.';
213
+ renderMeta(total, successes, failures, okTimes, failTimes, performance.now() - globalStart, true);
214
+ } catch (err) {
215
+ prog.textContent = 'Error: ' + err;
216
+ } finally {
217
+ btn.disabled = false;
218
+ }
219
+ });
220
+ </script>
221
+ </body>
222
+ </html>`
223
+
224
+ func handleIndex(w http.ResponseWriter, r *http.Request) {
225
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
226
+ fmt.Fprint(w, indexHTML)
227
+ }
228
+
229
+ func main() {
230
+ http.HandleFunc("/", handleIndex)
231
+ http.HandleFunc("/parse", handleParse)
232
+ log.Println("listening on http://localhost:8080")
233
+ log.Fatal(http.ListenAndServe(":8080", nil))
234
+ }