Sam-max1 commited on
Commit
2f1cebb
·
verified ·
1 Parent(s): 6cf3d23

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. NITDAA_HEALTHEXPERT_USER_GUIDE.md +1 -1
  2. app.py +121 -154
  3. update_sync.py +159 -0
NITDAA_HEALTHEXPERT_USER_GUIDE.md CHANGED
@@ -26,7 +26,7 @@ When a question is submitted:
26
  - Your question will instantly appear in the output window.
27
  - The system will display a **"Generating answer.. (1.2s)"** timer.
28
  - Once context is retrieved from the Vector and Graph databases, the LLM will stream the Markdown-formatted answer back to the screen.
29
- - analysis may take upto 300 seconds due to slow shared resources. please keep patience.
30
  - Responses remain preserved in the scrollable window for the duration of your session.
31
 
32
  ## 5. Security & Session Handling
 
26
  - Your question will instantly appear in the output window.
27
  - The system will display a **"Generating answer.. (1.2s)"** timer.
28
  - Once context is retrieved from the Vector and Graph databases, the LLM will stream the Markdown-formatted answer back to the screen.
29
+ - Analysis may take upto 300 seconds due to slow shared resources. Please keep patience.
30
  - Responses remain preserved in the scrollable window for the duration of your session.
31
 
32
  ## 5. Security & Session Handling
app.py CHANGED
@@ -692,103 +692,7 @@ def process_document_pipeline(path: str, orig_name: str, tier: str, token: str,
692
  except OSError as e:
693
  log.warning("Failed to delete %s: %s", path, e)
694
 
695
- def sync_he_data():
696
- try:
697
- import os, shutil
698
- from huggingface_hub import snapshot_download
699
- token = os.environ.get("HF_PRIVATE_TOKEN") or os.environ.get("HF_TOKEN")
700
- if not token:
701
- log.error("HF_PRIVATE_TOKEN or HF_TOKEN environment variable is not set. Dataset synchronization will be skipped.")
702
- return
703
-
704
- # --- 2-Way Log Sync on Startup ---
705
- log_dir = Path(__file__).parent / "app" / "logs"
706
- log_dir.mkdir(parents=True, exist_ok=True)
707
- try:
708
- if token:
709
- for log_file in ["nitdaa_sessions.json", "nitdaa_summary.json"]:
710
- local_p = log_dir / log_file
711
- try:
712
- dl_path = hf_hub_download(repo_id="Sam-max1/mat_data", filename=log_file, repo_type="dataset", token=token)
713
- if os.path.exists(dl_path):
714
- # Append any existing local logs not in the remote file to the remote file, then replace local
715
- remote_lines = set(open(dl_path).readlines())
716
- if local_p.exists():
717
- for line in open(local_p).readlines():
718
- if line not in remote_lines:
719
- remote_lines.add(line)
720
- with open(local_p, "w") as f:
721
- for line in sorted(list(remote_lines)):
722
- f.write(line)
723
- log.info(f"Successfully merged {log_file} from mat_data.")
724
- except Exception as e:
725
- log.warning(f"Could not download {log_file} from mat_data (it may not exist yet): {e}")
726
- except Exception as e:
727
- log.warning(f"Log sync failed: {e}")
728
- # ---------------------------------
729
-
730
- kbdocs_dir = Path(__file__).parent / "kbdocs"
731
- kbdocs_dir.mkdir(exist_ok=True)
732
-
733
- tmp_sync_dir = Path("/tmp/he_data_sync")
734
- if tmp_sync_dir.exists():
735
- shutil.rmtree(tmp_sync_dir)
736
- tmp_sync_dir.mkdir(exist_ok=True)
737
-
738
- log.info("Syncing fresh files from Sam-max1/he-data to local /tmp...")
739
- snapshot_download(
740
- repo_id="Sam-max1/he-data",
741
- repo_type="dataset",
742
- local_dir=str(tmp_sync_dir),
743
- token=token,
744
- ignore_patterns=[".git*"]
745
- )
746
-
747
- from pipeline import vector_store, graph_store
748
-
749
- # Determine if there are differences between kbdocs and the downloaded he-data
750
- local_files = {f.name: f.stat().st_size for f in kbdocs_dir.glob("*.*") if f.is_file()}
751
- remote_files = {f.name: f.stat().st_size for f in tmp_sync_dir.glob("*.*") if f.is_file()}
752
-
753
- is_different = False
754
- if set(local_files.keys()) != set(remote_files.keys()):
755
- is_different = True
756
- else:
757
- for k in local_files:
758
- if local_files[k] != remote_files[k]:
759
- is_different = True
760
- break
761
-
762
- if is_different:
763
- log.info("Detected changes in Sam-max1/he-data! Purging databases and re-syncing kbdocs.")
764
- # Clear DBs
765
- vector_store.purge()
766
- if graph_store.is_available():
767
- graph_store.purge()
768
-
769
- # Wipe local kbdocs and replace
770
- shutil.rmtree(kbdocs_dir)
771
- shutil.copytree(tmp_sync_dir, kbdocs_dir)
772
-
773
- ingested_count = 0
774
- for file_path in kbdocs_dir.glob("*.*"):
775
- if file_path.is_file():
776
- log.info(f"Auto-ingesting file: {file_path.name}")
777
- process_document_pipeline(str(file_path), file_path.name, "foundation", "admin", delete_after=False)
778
- ingested_count += 1
779
- log.info("=== Full Data Re-Ingestion Complete ===")
780
- else:
781
- log.info("kbdocs is completely up to date with he-data. No ingestion needed.")
782
 
783
- log.info(f"Vector DB Chunks: {vector_store.count()}")
784
- if graph_store.is_available():
785
- stats = graph_store.get_stats()
786
- log.info(f"Kuzu DB Nodes: {stats.get('nodes', 0)}, Edges: {stats.get('edges', 0)}")
787
-
788
- except Exception as e:
789
- log.error("Failed to sync he-data: %s", e)
790
-
791
- threading.Thread(target=sync_he_data, daemon=True).start()
792
 
793
  @app.route("/api/ingest", methods=["POST"])
794
  @limiter.limit("10 per minute")
@@ -1133,30 +1037,14 @@ def probe_embed():
1133
  def start_auto_ingest_thread():
1134
  def _auto_ingest_worker():
1135
  global _auto_ingest_status
1136
- kbdocs_dir = Path(__file__).parent / "kbdocs"
1137
- kbdocs_dir.mkdir(parents=True, exist_ok=True)
 
1138
 
1139
- hf_token = os.environ.get("HF_PRIVATE_TOKEN")
1140
- if hf_token:
1141
- import logging
1142
- from huggingface_hub import snapshot_download
1143
- try:
1144
- logging.info("HF_PRIVATE_TOKEN found, syncing dataset Sam-max1/he-data to %s...", kbdocs_dir)
1145
- snapshot_download(
1146
- repo_id="Sam-max1/he-data",
1147
- repo_type="dataset",
1148
- local_dir=str(kbdocs_dir),
1149
- token=hf_token
1150
- )
1151
- logging.info("Dataset synced successfully.")
1152
- except Exception as e:
1153
- logging.error("Failed to sync HuggingFace dataset: %s", e)
1154
- elif not kbdocs_dir.exists():
1155
- return
1156
-
1157
- import requests, time
1158
  log.info("Auto-ingest: waiting for LLM services to boot...")
1159
- # Wait up to 60s for models
1160
  for _ in range(30):
1161
  try:
1162
  r1 = requests.get(f"{config.EMBED_BASE_URL}/health", timeout=2)
@@ -1172,46 +1060,125 @@ def start_auto_ingest_thread():
1172
  _auto_ingest_status["done"] = True
1173
  return
1174
 
1175
- # Check existing documents to avoid re-ingesting
1176
- existing = {d["source"] for d in vector_store.list_documents("admin")}
1177
- files_to_ingest = []
1178
- for f in kbdocs_dir.iterdir():
1179
- if f.is_file() and _allowed(f.name) and f.name not in existing:
1180
- files_to_ingest.append(f)
1181
-
1182
- if not files_to_ingest:
1183
- log.info("Auto-ingest: no new files found in kbdocs.")
1184
  _auto_ingest_status["done"] = True
1185
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1186
 
1187
- log.info("Auto-ingesting %d files from kbdocs...", len(files_to_ingest))
1188
- config.current_session.set("admin")
1189
-
1190
- _auto_ingest_status["running"] = True
1191
- _auto_ingest_status["total"] = len(files_to_ingest)
1192
- _auto_ingest_status["completed"] = 0
1193
- _auto_ingest_status["results"] = []
1194
- _auto_ingest_status["done"] = False
1195
-
1196
- for path in files_to_ingest:
1197
- _auto_ingest_status["current_file"] = path.name
1198
- res = process_document_pipeline(str(path), path.name, tier="foundation", token="admin", delete_after=False)
1199
- _auto_ingest_status["completed"] += 1
1200
- _auto_ingest_status["results"].append({
1201
- "file": path.name,
1202
- "ok": res["ok"],
1203
- "result": res["result"],
1204
- })
1205
- if res["ok"]:
1206
- log.info("Auto-ingest successful for %s", path.name)
1207
- else:
1208
- log.error("Auto-ingest failed for %s: %s", path.name, res["result"])
1209
-
1210
- _auto_ingest_status["running"] = False
1211
- _auto_ingest_status["done"] = True
1212
- _auto_ingest_status["current_file"] = None
1213
- trigger_kv_cache_update("admin")
1214
 
 
 
 
 
 
1215
  threading.Thread(target=_auto_ingest_worker, daemon=True).start()
1216
 
1217
 
 
692
  except OSError as e:
693
  log.warning("Failed to delete %s: %s", path, e)
694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
 
 
 
 
 
 
 
 
 
 
696
 
697
  @app.route("/api/ingest", methods=["POST"])
698
  @limiter.limit("10 per minute")
 
1037
  def start_auto_ingest_thread():
1038
  def _auto_ingest_worker():
1039
  global _auto_ingest_status
1040
+ import requests, time, shutil, os
1041
+ from huggingface_hub import snapshot_download, hf_hub_download
1042
+ from pathlib import Path
1043
 
1044
+ token = os.environ.get("HF_PRIVATE_TOKEN") or os.environ.get("HF_TOKEN")
1045
+
1046
+ # --- Wait for LLM services to boot before doing anything ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1047
  log.info("Auto-ingest: waiting for LLM services to boot...")
 
1048
  for _ in range(30):
1049
  try:
1050
  r1 = requests.get(f"{config.EMBED_BASE_URL}/health", timeout=2)
 
1060
  _auto_ingest_status["done"] = True
1061
  return
1062
 
1063
+ if not token:
1064
+ log.error("HF_PRIVATE_TOKEN or HF_TOKEN environment variable is not set. Dataset synchronization will be skipped.")
1065
+ _auto_ingest_status["error"] = "HF Token missing"
 
 
 
 
 
 
1066
  _auto_ingest_status["done"] = True
1067
  return
1068
+
1069
+ # --- 2-Way Log Sync on Startup ---
1070
+ log_dir = Path(__file__).parent / "app" / "logs"
1071
+ log_dir.mkdir(parents=True, exist_ok=True)
1072
+ try:
1073
+ for log_file in ["nitdaa_sessions.json", "nitdaa_summary.json"]:
1074
+ local_p = log_dir / log_file
1075
+ try:
1076
+ dl_path = hf_hub_download(repo_id="Sam-max1/mat_data", filename=log_file, repo_type="dataset", token=token)
1077
+ if os.path.exists(dl_path):
1078
+ remote_lines = set(open(dl_path).readlines())
1079
+ if local_p.exists():
1080
+ for line in open(local_p).readlines():
1081
+ if line not in remote_lines:
1082
+ remote_lines.add(line)
1083
+ with open(local_p, "w") as f:
1084
+ for line in sorted(list(remote_lines)):
1085
+ f.write(line)
1086
+ log.info(f"Successfully merged {log_file} from mat_data.")
1087
+ except Exception as e:
1088
+ log.warning(f"Could not download {log_file} from mat_data (it may not exist yet): {e}")
1089
+ except Exception as e:
1090
+ log.warning(f"Log sync failed: {e}")
1091
+ # ---------------------------------
1092
+
1093
+ kbdocs_dir = Path(__file__).parent / "kbdocs"
1094
+ kbdocs_dir.mkdir(parents=True, exist_ok=True)
1095
+
1096
+ tmp_sync_dir = Path("/tmp/he_data_sync")
1097
+ if tmp_sync_dir.exists():
1098
+ shutil.rmtree(tmp_sync_dir)
1099
+ tmp_sync_dir.mkdir(exist_ok=True)
1100
+
1101
+ log.info("Syncing fresh files from Sam-max1/he-data to local /tmp...")
1102
+ try:
1103
+ snapshot_download(
1104
+ repo_id="Sam-max1/he-data",
1105
+ repo_type="dataset",
1106
+ local_dir=str(tmp_sync_dir),
1107
+ token=token,
1108
+ ignore_patterns=[".git*"]
1109
+ )
1110
+ except Exception as e:
1111
+ log.error(f"Failed to download he-data dataset: {e}")
1112
+ _auto_ingest_status["error"] = f"Download failed: {e}"
1113
+ _auto_ingest_status["done"] = True
1114
+ return
1115
+
1116
+ from pipeline import vector_store, graph_store
1117
+
1118
+ local_files = {f.name: f.stat().st_size for f in kbdocs_dir.glob("*.*") if f.is_file()}
1119
+ remote_files = {f.name: f.stat().st_size for f in tmp_sync_dir.glob("*.*") if f.is_file()}
1120
+
1121
+ is_different = False
1122
+ if set(local_files.keys()) != set(remote_files.keys()):
1123
+ is_different = True
1124
+ else:
1125
+ for k in local_files:
1126
+ if local_files[k] != remote_files[k]:
1127
+ is_different = True
1128
+ break
1129
+
1130
+ if is_different:
1131
+ log.info("Detected changes in Sam-max1/he-data! Purging databases and re-syncing kbdocs.")
1132
+ vector_store.purge()
1133
+ if graph_store.is_available():
1134
+ graph_store.purge()
1135
+
1136
+ shutil.rmtree(kbdocs_dir)
1137
+ shutil.copytree(tmp_sync_dir, kbdocs_dir)
1138
+
1139
+ files_to_ingest = [f for f in kbdocs_dir.glob("*.*") if f.is_file() and _allowed(f.name)]
1140
+ if not files_to_ingest:
1141
+ log.info("No valid files to ingest in he-data.")
1142
+ _auto_ingest_status["done"] = True
1143
+ return
1144
+
1145
+ config.current_session.set("admin")
1146
+ _auto_ingest_status["running"] = True
1147
+ _auto_ingest_status["total"] = len(files_to_ingest)
1148
+ _auto_ingest_status["completed"] = 0
1149
+ _auto_ingest_status["results"] = []
1150
+ _auto_ingest_status["done"] = False
1151
+
1152
+ for path in files_to_ingest:
1153
+ _auto_ingest_status["current_file"] = path.name
1154
+ log.info(f"Auto-ingesting file: {path.name}")
1155
+ res = process_document_pipeline(str(path), path.name, "foundation", "admin", delete_after=False)
1156
+ _auto_ingest_status["completed"] += 1
1157
+ _auto_ingest_status["results"].append({
1158
+ "file": path.name,
1159
+ "ok": res["ok"],
1160
+ "result": res["result"],
1161
+ })
1162
+ if res["ok"]:
1163
+ log.info("Auto-ingest successful for %s", path.name)
1164
+ else:
1165
+ log.error("Auto-ingest failed for %s: %s", path.name, res["result"])
1166
 
1167
+ _auto_ingest_status["running"] = False
1168
+ _auto_ingest_status["done"] = True
1169
+ _auto_ingest_status["current_file"] = None
1170
+ trigger_kv_cache_update("admin")
1171
+
1172
+ log.info("=== Full Data Re-Ingestion Complete ===")
1173
+ else:
1174
+ log.info("kbdocs is completely up to date with he-data. No ingestion needed.")
1175
+ _auto_ingest_status["done"] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1176
 
1177
+ log.info(f"Vector DB Chunks: {vector_store.count()}")
1178
+ if graph_store.is_available():
1179
+ stats = graph_store.get_stats()
1180
+ log.info(f"Kuzu DB Nodes: {stats.get('nodes', 0)}, Edges: {stats.get('edges', 0)}")
1181
+
1182
  threading.Thread(target=_auto_ingest_worker, daemon=True).start()
1183
 
1184
 
update_sync.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ with open('app.py', 'r') as f:
4
+ content = f.read()
5
+
6
+ # The unified function
7
+ new_func = """def start_auto_ingest_thread():
8
+ def _auto_ingest_worker():
9
+ global _auto_ingest_status
10
+ import requests, time, shutil, os
11
+ from huggingface_hub import snapshot_download, hf_hub_download
12
+ from pathlib import Path
13
+
14
+ token = os.environ.get("HF_PRIVATE_TOKEN") or os.environ.get("HF_TOKEN")
15
+
16
+ # --- Wait for LLM services to boot before doing anything ---
17
+ log.info("Auto-ingest: waiting for LLM services to boot...")
18
+ for _ in range(30):
19
+ try:
20
+ r1 = requests.get(f"{config.EMBED_BASE_URL}/health", timeout=2)
21
+ r2 = requests.get(f"{config.LLM_BASE_URL}/health", timeout=2)
22
+ if r1.status_code == 200 and r2.status_code == 200:
23
+ break
24
+ except Exception:
25
+ pass
26
+ time.sleep(2)
27
+ else:
28
+ log.warning("Auto-ingest aborted: LLM services not online.")
29
+ _auto_ingest_status["error"] = "LLM services not online within 60s"
30
+ _auto_ingest_status["done"] = True
31
+ return
32
+
33
+ if not token:
34
+ log.error("HF_PRIVATE_TOKEN or HF_TOKEN environment variable is not set. Dataset synchronization will be skipped.")
35
+ _auto_ingest_status["error"] = "HF Token missing"
36
+ _auto_ingest_status["done"] = True
37
+ return
38
+
39
+ # --- 2-Way Log Sync on Startup ---
40
+ log_dir = Path(__file__).parent / "app" / "logs"
41
+ log_dir.mkdir(parents=True, exist_ok=True)
42
+ try:
43
+ for log_file in ["nitdaa_sessions.json", "nitdaa_summary.json"]:
44
+ local_p = log_dir / log_file
45
+ try:
46
+ dl_path = hf_hub_download(repo_id="Sam-max1/mat_data", filename=log_file, repo_type="dataset", token=token)
47
+ if os.path.exists(dl_path):
48
+ remote_lines = set(open(dl_path).readlines())
49
+ if local_p.exists():
50
+ for line in open(local_p).readlines():
51
+ if line not in remote_lines:
52
+ remote_lines.add(line)
53
+ with open(local_p, "w") as f:
54
+ for line in sorted(list(remote_lines)):
55
+ f.write(line)
56
+ log.info(f"Successfully merged {log_file} from mat_data.")
57
+ except Exception as e:
58
+ log.warning(f"Could not download {log_file} from mat_data (it may not exist yet): {e}")
59
+ except Exception as e:
60
+ log.warning(f"Log sync failed: {e}")
61
+ # ---------------------------------
62
+
63
+ kbdocs_dir = Path(__file__).parent / "kbdocs"
64
+ kbdocs_dir.mkdir(parents=True, exist_ok=True)
65
+
66
+ tmp_sync_dir = Path("/tmp/he_data_sync")
67
+ if tmp_sync_dir.exists():
68
+ shutil.rmtree(tmp_sync_dir)
69
+ tmp_sync_dir.mkdir(exist_ok=True)
70
+
71
+ log.info("Syncing fresh files from Sam-max1/he-data to local /tmp...")
72
+ try:
73
+ snapshot_download(
74
+ repo_id="Sam-max1/he-data",
75
+ repo_type="dataset",
76
+ local_dir=str(tmp_sync_dir),
77
+ token=token,
78
+ ignore_patterns=[".git*"]
79
+ )
80
+ except Exception as e:
81
+ log.error(f"Failed to download he-data dataset: {e}")
82
+ _auto_ingest_status["error"] = f"Download failed: {e}"
83
+ _auto_ingest_status["done"] = True
84
+ return
85
+
86
+ from pipeline import vector_store, graph_store
87
+
88
+ local_files = {f.name: f.stat().st_size for f in kbdocs_dir.glob("*.*") if f.is_file()}
89
+ remote_files = {f.name: f.stat().st_size for f in tmp_sync_dir.glob("*.*") if f.is_file()}
90
+
91
+ is_different = False
92
+ if set(local_files.keys()) != set(remote_files.keys()):
93
+ is_different = True
94
+ else:
95
+ for k in local_files:
96
+ if local_files[k] != remote_files[k]:
97
+ is_different = True
98
+ break
99
+
100
+ if is_different:
101
+ log.info("Detected changes in Sam-max1/he-data! Purging databases and re-syncing kbdocs.")
102
+ vector_store.purge()
103
+ if graph_store.is_available():
104
+ graph_store.purge()
105
+
106
+ shutil.rmtree(kbdocs_dir)
107
+ shutil.copytree(tmp_sync_dir, kbdocs_dir)
108
+
109
+ files_to_ingest = [f for f in kbdocs_dir.glob("*.*") if f.is_file() and _allowed(f.name)]
110
+ if not files_to_ingest:
111
+ log.info("No valid files to ingest in he-data.")
112
+ _auto_ingest_status["done"] = True
113
+ return
114
+
115
+ config.current_session.set("admin")
116
+ _auto_ingest_status["running"] = True
117
+ _auto_ingest_status["total"] = len(files_to_ingest)
118
+ _auto_ingest_status["completed"] = 0
119
+ _auto_ingest_status["results"] = []
120
+ _auto_ingest_status["done"] = False
121
+
122
+ for path in files_to_ingest:
123
+ _auto_ingest_status["current_file"] = path.name
124
+ log.info(f"Auto-ingesting file: {path.name}")
125
+ res = process_document_pipeline(str(path), path.name, "foundation", "admin", delete_after=False)
126
+ _auto_ingest_status["completed"] += 1
127
+ _auto_ingest_status["results"].append({
128
+ "file": path.name,
129
+ "ok": res["ok"],
130
+ "result": res["result"],
131
+ })
132
+ if res["ok"]:
133
+ log.info("Auto-ingest successful for %s", path.name)
134
+ else:
135
+ log.error("Auto-ingest failed for %s: %s", path.name, res["result"])
136
+
137
+ _auto_ingest_status["running"] = False
138
+ _auto_ingest_status["done"] = True
139
+ _auto_ingest_status["current_file"] = None
140
+ trigger_kv_cache_update("admin")
141
+
142
+ log.info("=== Full Data Re-Ingestion Complete ===")
143
+ else:
144
+ log.info("kbdocs is completely up to date with he-data. No ingestion needed.")
145
+ _auto_ingest_status["done"] = True
146
+
147
+ log.info(f"Vector DB Chunks: {vector_store.count()}")
148
+ if graph_store.is_available():
149
+ stats = graph_store.get_stats()
150
+ log.info(f"Kuzu DB Nodes: {stats.get('nodes', 0)}, Edges: {stats.get('edges', 0)}")
151
+
152
+ threading.Thread(target=_auto_ingest_worker, daemon=True).start()"""
153
+
154
+ # Replace start_auto_ingest_thread
155
+ content = re.sub(r'def start_auto_ingest_thread\(\):.*? threading\.Thread\(target=_auto_ingest_worker, daemon=True\)\.start\(\)', new_func, content, flags=re.DOTALL)
156
+
157
+ with open('app.py', 'w') as f:
158
+ f.write(content)
159
+