Spaces:

Scribbler310
/

SentimentDetectiontest

Running

App Files Files Community

3v324v23 commited on about 5 hours ago

Commit

177eb47

1 Parent(s): 776cb9b

Keep only one benchmark.py and delete benchmark_b.py

Browse files

Files changed (2) hide show

backend/benchmark.py +13 -46
backend/benchmark_b.py +0 -118

backend/benchmark.py CHANGED Viewed

@@ -21,7 +21,7 @@ samples = [
     ("frustration", "This word problem about two trains leaving different cities is making my head spin. I hate word problems!"),
     ("frustration", "I don't understand how to convert grams to moles. I keep getting the wrong conversion factor and it's so frustrating!"),
     ("frustration", "I've tried balancing this chemical equation five times and the numbers never match up!"),
-    ("frustration", "I'm trying to draw this ray diagram for a concave lens and the lines are crossing in the wrong place. I give up!"),
     ("frustration", "This physics problem about friction has too many variables and I don't even know where to start!"),
     # Boredom
     ("boredom", "Ugh, why do we have to learn about sedimentary rocks? They just sit there. Who cares?"),
@@ -74,16 +74,10 @@ for idx, (cat, q) in enumerate(samples, 1):
             results.append({
                 "category": cat,
                 "query": q,
-                "latency_a": res_data["latency_a"],
-                "tokens_a": res_data["tokens_a"],
-                "latency_b": res_data["latency_b"],
-                "tokens_b": res_data["tokens_b"],
-                "latency_c": res_data["latency_c"],
-                "tokens_c": res_data["tokens_c"],
-                "latency_d": res_data["latency_d"],
-                "tokens_d": res_data["tokens_d"]
             })
-            print(f"    Done: A ({res_data['latency_a']}s, {res_data['tokens_a']}t) | B ({res_data['latency_b']}s, {res_data['tokens_b']}t) | C ({res_data['latency_c']}s, {res_data['tokens_c']}t) | D ({res_data['latency_d']}s, {res_data['tokens_d']}t)")
             # Add a small delay between requests
             time.sleep(1.5)
     except Exception as e:
@@ -95,19 +89,13 @@ with open(csv_file, mode="w", newline="", encoding="utf-8") as f:
     writer = csv.writer(f)
     writer.writerow([
         "Category", "Query",
-        "Latency A (s)", "Tokens A",
-        "Latency B (s)", "Tokens B",
-        "Latency C (s)", "Tokens C",
-        "Latency D (s)", "Tokens D"
     ])
     for r in results:
         writer.writerow([
             r["category"], r["query"],
-            r["latency_a"], r["tokens_a"],
-            r["latency_b"], r["tokens_b"],
-            r["latency_c"], r["tokens_c"],
-            r["latency_d"], r["tokens_d"]
         ])
 print(f"\nResults successfully saved to: {csv_file}")
@@ -115,37 +103,16 @@ print(f"\nResults successfully saved to: {csv_file}")
 # Calculate averages
 num_queries = len(results)
 if num_queries > 0:
-    valid_latencies_a = [r["latency_a"] for r in results if r["latency_a"] is not None]
-    valid_tokens_a = [r["tokens_a"] for r in results if r["tokens_a"] is not None]
-    valid_latencies_b = [r["latency_b"] for r in results if r["latency_b"] is not None]
-    valid_tokens_b = [r["tokens_b"] for r in results if r["tokens_b"] is not None]
-    valid_latencies_c = [r["latency_c"] for r in results if r["latency_c"] is not None]
-    valid_tokens_c = [r["tokens_c"] for r in results if r["tokens_c"] is not None]
-    valid_latencies_d = [r["latency_d"] for r in results if r["latency_d"] is not None]
-    valid_tokens_d = [r["tokens_d"] for r in results if r["tokens_d"] is not None]
-    avg_latency_a = sum(valid_latencies_a) / len(valid_latencies_a) if valid_latencies_a else 0
-    avg_tokens_a = sum(valid_tokens_a) / len(valid_tokens_a) if valid_tokens_a else 0
-    avg_latency_b = sum(valid_latencies_b) / len(valid_latencies_b) if valid_latencies_b else 0
-    avg_tokens_b = sum(valid_tokens_b) / len(valid_tokens_b) if valid_tokens_b else 0
-    avg_latency_c = sum(valid_latencies_c) / len(valid_latencies_c) if valid_latencies_c else 0
-    avg_tokens_c = sum(valid_tokens_c) / len(valid_tokens_c) if valid_tokens_c else 0
-    avg_latency_d = sum(valid_latencies_d) / len(valid_latencies_d) if valid_latencies_d else 0
-    avg_tokens_d = sum(valid_tokens_d) / len(valid_tokens_d) if valid_tokens_d else 0
     print("\n" + "="*50)
     print("BENCHMARK SUMMARY AVERAGES:")
     print("="*50)
-    print(f"Option A (Gemini LLM-Classifier):")
-    print(f"  - Avg Latency: {avg_latency_a:.3f}s")
-    print(f"  - Avg Tokens:  {avg_tokens_a:.1f}")
-    print(f"Option B (Gemini Single-Pass):")
-    print(f"  - Avg Latency: {avg_latency_b:.3f}s")
-    print(f"  - Avg Tokens:  {avg_tokens_b:.1f}")
-    print(f"Option C (Raw Distribution + Gemini):")
-    print(f"  - Avg Latency: {avg_latency_c:.3f}s")
-    print(f"  - Avg Tokens:  {avg_tokens_c:.1f}")
-    print(f"Option D (Local-Classifier + Gemini):")
-    print(f"  - Avg Latency: {avg_latency_d:.3f}s")
-    print(f"  - Avg Tokens:  {avg_tokens_d:.1f}")
     print("="*50)

     ("frustration", "This word problem about two trains leaving different cities is making my head spin. I hate word problems!"),
     ("frustration", "I don't understand how to convert grams to moles. I keep getting the wrong conversion factor and it's so frustrating!"),
     ("frustration", "I've tried balancing this chemical equation five times and the numbers never match up!"),
+    ("frustration", "Im trying to draw this ray diagram for a concave lens and the lines are crossing in the wrong place. I give up!"),
     ("frustration", "This physics problem about friction has too many variables and I don't even know where to start!"),
     # Boredom
     ("boredom", "Ugh, why do we have to learn about sedimentary rocks? They just sit there. Who cares?"),
             results.append({
                 "category": cat,
                 "query": q,
+                "latency": res_data["latency"],
+                "tokens": res_data["tokens"]
             })
+            print(f"    Done: {res_data['latency']}s, {res_data['tokens']}t")
             # Add a small delay between requests
             time.sleep(1.5)
     except Exception as e:
     writer = csv.writer(f)
     writer.writerow([
         "Category", "Query",
+        "Latency (s)", "Tokens"
     ])
     for r in results:
         writer.writerow([
             r["category"], r["query"],
+            r["latency"], r["tokens"]
         ])
 print(f"\nResults successfully saved to: {csv_file}")
 # Calculate averages
 num_queries = len(results)
 if num_queries > 0:
+    valid_latencies = [r["latency"] for r in results if r["latency"] is not None]
+    valid_tokens = [r["tokens"] for r in results if r["tokens"] is not None]
+    avg_latency = sum(valid_latencies) / len(valid_latencies) if valid_latencies else 0
+    avg_tokens = sum(valid_tokens) / len(valid_tokens) if valid_tokens else 0
     print("\n" + "="*50)
     print("BENCHMARK SUMMARY AVERAGES:")
     print("="*50)
+    print(f"Socratic Tutor (Single-Pass):")
+    print(f"  - Avg Latency: {avg_latency:.3f}s")
+    print(f"  - Avg Tokens:  {avg_tokens:.1f}")
     print("="*50)

backend/benchmark_b.py DELETED Viewed

@@ -1,118 +0,0 @@
-import json
-import urllib.request
-import csv
-import time
-import os
-samples = [
-    # Confusion
-    ("confusion", "Wait, why does a negative times a negative make a positive? I don't get it."),
-    ("confusion", "I'm looking at this cell diagram and I can't tell the difference between the cell wall and the cell membrane."),
-    ("confusion", "Our teacher said the Earth is tilted, but how does that make summer and winter? It doesn't make sense."),
-    ("confusion", "Is a virus alive or is it not? My textbook says both and I'm really mixed up."),
-    ("confusion", "What is the difference between a variable and a constant in algebra? I'm lost."),
-    ("confusion", "Why does dividing by a fraction mean multiplying by its reciprocal? It seems arbitrary."),
-    ("confusion", "What is the difference between speed and velocity? They sound like the same thing."),
-    ("confusion", "Why is the mitochondria called the powerhouse of the cell? What does it actually do?"),
-    # Frustration
-    ("frustration", "I've tried to solve this quadratic equation three times using the formula, but I keep getting a negative under the square root!"),
-    ("frustration", "My science experiment failed again! The volcano didn't bubble at all and I did everything exactly right!"),
-    ("frustration", "This long division with decimals is taking forever and I keep getting the wrong remainder! I hate this!"),
-    ("frustration", "This word problem about two trains leaving different cities is making my head spin. I hate word problems!"),
-    ("frustration", "I don't understand how to convert grams to moles. I keep getting the wrong conversion factor and it's so frustrating!"),
-    ("frustration", "I've tried balancing this chemical equation five times and the numbers never match up!"),
-    ("frustration", "Im trying to draw this ray diagram for a concave lens and the lines are crossing in the wrong place. I give up!"),
-    ("frustration", "This physics problem about friction has too many variables and I don't even know where to start!"),
-    # Boredom
-    ("boredom", "Ugh, why do we have to learn about sedimentary rocks? They just sit there. Who cares?"),
-    ("boredom", "This math worksheet is just 50 of the same exact addition problems. This is so boring."),
-    ("boredom", "We are just copying definitions of different math properties from the board. This is so boring."),
-    ("boredom", "Another lecture on the phases of mitosis... we've covered this three years in a row now."),
-    ("boredom", "I finished all my science reading early. There's nothing else to do except stare at the wall."),
-    ("boredom", "We have to measure the temperature of this water every two minutes for an hour. This is so tedious."),
-    ("boredom", "Calculating the area of twenty slightly different rectangles is putting me to sleep."),
-    ("boredom", "This lecture on cell organelles is just slides of definitions. I'm falling asleep."),
-    # Confidence
-    ("confidence", "I totally mastered multiplying fractions! Give me a hard practice problem to try!"),
-    ("confidence", "I just derived the formula for the volume of a sphere all by myself!"),
-    ("confidence", "I know exactly how to balance any redox reaction now. Try me!"),
-    ("confidence", "I got a perfect score on the calculus midterm today! I really understand derivatives now!"),
-    ("confidence", "I can explain the entire water cycle in my sleep! Evaporation, condensation, precipitation, easy!"),
-    ("confidence", "I just solved the hardest logic puzzle in the workbook on my very first try!"),
-    ("confidence", "I can calculate the trajectory of a projectile in my head now, it's so easy!"),
-    ("confidence", "I fully understand how DNA replication works and could draw every step from memory!"),
-    # Neutral
-    ("neutral", "How do I calculate the hypotenuse of a right triangle when the sides are 3 and 4?"),
-    ("neutral", "What are the three main types of rocks found in the Earth's crust?"),
-    ("neutral", "Can you explain how photosynthesis converts sunlight into chemical energy?"),
-    ("neutral", "What is the chemical formula for photosynthesis and cellular respiration?"),
-    ("neutral", "How do you find the slope of a line from two points on a graph?"),
-    ("neutral", "What is the difference between an isotope and an ion in chemistry?"),
-    ("neutral", "Could you list the steps of the scientific method in order?"),
-    ("neutral", "What is the value of the constant pi, and how is it calculated?")
-]
-results = []
-url = "http://127.0.0.1:8000/api/chat"
-print(f"Starting benchmark targeting Option B only for {len(samples)} sample queries...")
-for idx, (cat, q) in enumerate(samples, 1):
-    print(f"[{idx}/{len(samples)}] Query ({cat}): \"{q}\"")
-    req_data = json.dumps({"message": q, "selected_option": "B"}).encode('utf-8')
-    req = urllib.request.Request(
-        url,
-        data=req_data,
-        headers={'Content-Type': 'application/json'}
-    )
-    try:
-        with urllib.request.urlopen(req) as response:
-            res_data = json.loads(response.read().decode('utf-8'))
-            # Record result
-            results.append({
-                "category": cat,
-                "query": q,
-                "latency_b": res_data["latency"],
-                "tokens_b": res_data["tokens"]
-            })
-            print(f"    Done: B ({res_data['latency']}s, {res_data['tokens']}t)")
-            # Add a small delay between requests
-            time.sleep(1.5)
-    except Exception as e:
-        print(f"    Error processing query: {e}")
-# Save to CSV
-csv_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "benchmark_results_b.csv")
-with open(csv_file, mode="w", newline="", encoding="utf-8") as f:
-    writer = csv.writer(f)
-    writer.writerow([
-        "Category", "Query",
-        "Latency B (s)", "Tokens B"
-    ])
-    for r in results:
-        writer.writerow([
-            r["category"], r["query"],
-            r["latency_b"], r["tokens_b"]
-        ])
-print(f"\nResults successfully saved to: {csv_file}")
-# Calculate averages
-num_queries = len(results)
-if num_queries > 0:
-    valid_latencies_b = [r["latency_b"] for r in results if r["latency_b"] is not None]
-    valid_tokens_b = [r["tokens_b"] for r in results if r["tokens_b"] is not None]
-    avg_latency_b = sum(valid_latencies_b) / len(valid_latencies_b) if valid_latencies_b else 0
-    avg_tokens_b = sum(valid_tokens_b) / len(valid_tokens_b) if valid_tokens_b else 0
-    print("\n" + "="*50)
-    print("BENCHMARK SUMMARY AVERAGES (OPTION B ONLY):")
-    print("="*50)
-    print(f"Option B (Gemini Single-Pass):")
-    print(f"  - Avg Latency: {avg_latency_b:.3f}s")
-    print(f"  - Avg Tokens:  {avg_tokens_b:.1f}")
-    print("="*50)