Keshav0308 commited on
Commit
2bf082b
·
verified ·
1 Parent(s): e5f6c81

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from langdetect import detect, LangDetectException
4
+
5
+ # Load model from HuggingFace
6
+ classifier = pipeline(
7
+ "text-classification",
8
+ model="Keshav0308/multilingual-topic-classifier"
9
+ )
10
+
11
+ TOPIC_EMOJIS = {
12
+ "geography": "🌍",
13
+ "science/technology": "🔬",
14
+ "entertainment": "🎬",
15
+ "politics": "🏛️",
16
+ "health": "🏥",
17
+ "travel": "✈️",
18
+ "sports": "⚽"
19
+ }
20
+
21
+ LANGUAGE_NAMES = {
22
+ "en": "English", "fr": "French", "de": "German", "es": "Spanish",
23
+ "it": "Italian", "pt": "Portuguese", "ru": "Russian", "zh-cn": "Chinese",
24
+ "ja": "Japanese", "ko": "Korean", "ar": "Arabic", "hi": "Hindi",
25
+ "bn": "Bengali", "ur": "Urdu", "tr": "Turkish", "pl": "Polish",
26
+ "nl": "Dutch", "sv": "Swedish", "fi": "Finnish", "da": "Danish",
27
+ "uk": "Ukrainian", "cs": "Czech", "ro": "Romanian", "hu": "Hungarian",
28
+ "th": "Thai", "vi": "Vietnamese", "id": "Indonesian", "ms": "Malay",
29
+ "fa": "Persian", "he": "Hebrew", "pa": "Punjabi", "ta": "Tamil",
30
+ "te": "Telugu", "mr": "Marathi", "gu": "Gujarati", "kn": "Kannada",
31
+ "ml": "Malayalam", "si": "Sinhala", "ne": "Nepali", "am": "Amharic",
32
+ "sw": "Swahili", "yo": "Yoruba", "ig": "Igbo", "ha": "Hausa",
33
+ "zu": "Zulu", "af": "Afrikaans", "sq": "Albanian", "hy": "Armenian",
34
+ "az": "Azerbaijani", "eu": "Basque", "be": "Belarusian", "bs": "Bosnian",
35
+ "bg": "Bulgarian", "ca": "Catalan", "hr": "Croatian", "et": "Estonian",
36
+ "gl": "Galician", "ka": "Georgian", "el": "Greek", "is": "Icelandic",
37
+ "lv": "Latvian", "lt": "Lithuanian", "mk": "Macedonian", "mt": "Maltese",
38
+ "sr": "Serbian", "sk": "Slovak", "sl": "Slovenian", "cy": "Welsh",
39
+ }
40
+
41
+ def detect_language(text):
42
+ try:
43
+ code = detect(text)
44
+ return LANGUAGE_NAMES.get(code, f"Unknown ({code})")
45
+ except LangDetectException:
46
+ return "Could not detect"
47
+
48
+ def classify_topic(text):
49
+ if not text or not text.strip():
50
+ return "", "", ""
51
+
52
+ result = classifier(text)[0]
53
+ topic = result["label"]
54
+ confidence = result["score"] * 100
55
+ language = detect_language(text)
56
+
57
+ emoji = TOPIC_EMOJIS.get(topic, "📌")
58
+ topic_display = f"{emoji} {topic.upper()}"
59
+ confidence_display = f"{confidence:.2f}%"
60
+ language_display = f"🌐 {language}"
61
+
62
+ return topic_display, confidence_display, language_display
63
+
64
+ # Example inputs
65
+ examples = [
66
+ ["The patient was diagnosed with pneumonia and prescribed antibiotics."],
67
+ ["El equipo ganó el campeonato mundial de fútbol."],
68
+ ["Le parlement a voté une nouvelle loi sur l'environnement."],
69
+ ["scientists discovered a new exoplanet orbiting a distant star."],
70
+ ["ਕ੍ਰਿਕੇਟ ਟੀਮ ਨੇ ਵਿਸ਼ਵ ਕੱਪ ਜਿੱਤਿਆ।"],
71
+ ["東京オリンピックで日本が金メダルを獲得した。"],
72
+ ["Der Bundestag hat ein neues Klimaschutzgesetz verabschiedet."],
73
+ ]
74
+
75
+ # Build UI
76
+ with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Topic Classifier") as demo:
77
+ gr.Markdown("""
78
+ # 🌍 Multilingual Topic Classifier
79
+ ### Classify text into topics across 205 languages
80
+ Built with `xlm-roberta-base` fine-tuned on the SIB-200 dataset.
81
+ """)
82
+
83
+ with gr.Row():
84
+ with gr.Column(scale=2):
85
+ text_input = gr.Textbox(
86
+ label="Enter text in any language",
87
+ placeholder="Type or paste text here...",
88
+ lines=4
89
+ )
90
+ submit_btn = gr.Button("🔍 Classify", variant="primary", size="lg")
91
+
92
+ with gr.Column(scale=1):
93
+ topic_output = gr.Textbox(label="📌 Topic", interactive=False)
94
+ confidence_output = gr.Textbox(label="📊 Confidence", interactive=False)
95
+ language_output = gr.Textbox(label="🌐 Detected Language", interactive=False)
96
+
97
+ gr.Examples(
98
+ examples=examples,
99
+ inputs=text_input,
100
+ label="Try these examples"
101
+ )
102
+
103
+ submit_btn.click(
104
+ fn=classify_topic,
105
+ inputs=text_input,
106
+ outputs=[topic_output, confidence_output, language_output]
107
+ )
108
+
109
+ text_input.submit(
110
+ fn=classify_topic,
111
+ inputs=text_input,
112
+ outputs=[topic_output, confidence_output, language_output]
113
+ )
114
+
115
+ demo.launch()