File size: 6,403 Bytes
b3cc6b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import seaborn as sns
import requests
import io
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ─────────────────────────────────────────
# SECTION 1 — Charger les données pré-calculées
# ─────────────────────────────────────────
try:
    df_pricing   = pd.read_csv("artifacts/pricing_decisions.csv")
    df_sales     = pd.read_csv("artifacts/dashboard_data.csv")
    ARTIFACTS_OK = True
except Exception:
    ARTIFACTS_OK = False
    df_pricing   = pd.DataFrame()
    df_sales     = pd.DataFrame()

# ─────────────────────────────────────────
# SECTION 2 — Analyse en temps réel (VADER)
# ─────────────────────────────────────────
analyzer = SentimentIntensityAnalyzer()

def get_sentiment_label(text):
    score = analyzer.polarity_scores(text)["compound"]
    if score >= 0.05:
        return "positive"
    elif score <= -0.05:
        return "negative"
    else:
        return "neutral"

def pricing_decision(avg_units, positive_ratio, negative_ratio):
    if avg_units >= 120 and positive_ratio >= 0.6:
        return "📈 Increase Price"
    elif avg_units <= 60 and negative_ratio >= 0.4:
        return "📉 Decrease Price"
    else:
        return "➡️ Keep Price"

def analyze_book(title, reviews_text, avg_units_sold):
    if not title or not reviews_text:
        return "⚠️ Please enter a title and at least one review.", "", None

    # Analyse sentiment de chaque review
    lines = [r.strip() for r in reviews_text.strip().split("\n") if r.strip()]
    labels = [get_sentiment_label(line) for line in lines]

    total         = len(labels)
    positive_ratio = labels.count("positive") / total
    negative_ratio = labels.count("negative") / total
    neutral_ratio  = labels.count("neutral")  / total

    decision = pricing_decision(avg_units_sold, positive_ratio, negative_ratio)

    # Résumé texte
    summary = f"""
📚 **{title}**

🔢 Reviews analysées : {total}
😊 Positive : {positive_ratio:.0%}
😐 Neutral  : {neutral_ratio:.0%}
😞 Negative : {negative_ratio:.0%}
📦 Avg units sold : {avg_units_sold}

💡 **Pricing Decision : {decision}**
"""

    # Graphique en camembert
    fig, ax = plt.subplots(figsize=(4, 4))
    colors = ["#4CAF50", "#FFC107", "#F44336"]
    ax.pie(
        [positive_ratio, neutral_ratio, negative_ratio],
        labels=["Positive", "Neutral", "Negative"],
        autopct="%1.0f%%",
        colors=colors,
        startangle=90
    )
    ax.set_title(f"Sentiment — {title}")
    plt.tight_layout()

    return summary, "\n".join(f"{l}{s}" for l, s in zip(lines, labels)), fig

# ─────────────────────────────────────────
# SECTION 3 — Interface Gradio
# ─────────────────────────────────────────
with gr.Blocks(title="📚 Book Price Decider", theme=gr.themes.Soft()) as app:

    gr.Markdown("# 📚 Book Price Decider — Group A4")
    gr.Markdown("Sentiment analysis + ARIMA-based pricing decisions for books.")

    with gr.Tabs():

        # ── Tab 1 : Dashboard pré-calculé ──────────────────
        with gr.Tab("📊 Dashboard"):
            gr.Markdown("### Pre-computed results from the analysis notebooks")

            if ARTIFACTS_OK:
                with gr.Row():
                    gr.Image(value="artifacts/sales_trends.png",
                             label="Sales Trends")
                    gr.Image(value="artifacts/sentiment_distribution.png",
                             label="Sentiment Distribution")
                gr.Dataframe(value=df_pricing, label="Pricing Decisions Table")
            else:
                gr.Markdown(
                    "⚠️ No artifacts found yet. "
                    "Run the notebooks and upload the `artifacts/` folder."
                )

        # ── Tab 2 : Analyse en temps réel ──────────────────
        with gr.Tab("🔮 Analyze a New Book"):
            gr.Markdown("### Enter book info to get a live pricing recommendation")

            with gr.Row():
                title_input = gr.Textbox(label="Book Title", placeholder="e.g. The Great Gatsby")
                units_input = gr.Number(label="Avg Monthly Units Sold", value=100)

            reviews_input = gr.Textbox(
                label="Paste reviews here (one per line)",
                lines=6,
                placeholder="This book was amazing!\nNot what I expected.\nDecent read overall."
            )

            analyze_btn = gr.Button("🚀 Analyze & Decide", variant="primary")

            with gr.Row():
                summary_output  = gr.Markdown(label="Summary")
                details_output  = gr.Textbox(label="Review-by-review labels", lines=6)

            chart_output = gr.Plot(label="Sentiment Chart")

            analyze_btn.click(
                fn=analyze_book,
                inputs=[title_input, reviews_input, units_input],
                outputs=[summary_output, details_output, chart_output]
            )

        # ── Tab 3 : À propos ───────────────────────────────
        with gr.Tab("ℹ️ About"):
            gr.Markdown("""
## About this app

This app is part of the **AI for Big Data Management** group project at ESCP Business School.

### Pipeline
1. **Real-world data** scraped from Books to Scrape
2. **Synthetic data** generated to enrich with reviews & sales history
3. **VADER sentiment analysis** on customer reviews
4. **ARIMA forecasting** on sales time series
5. **Rule-based pricing decisions** combining sentiment + sales volume
6. **This Hugging Face app** as the final automation layer

### Team — Group A4
- Project Manager
- Data Analyst
- UX Designer(s)
- Content Specialist
""")

app.launch()