| |
| |
|
|
| import streamlit as st |
| import spacy |
| import networkx as nx |
| import matplotlib.pyplot as plt |
| import pandas as pd |
| import numpy as np |
| import logging |
| import io |
| import base64 |
| from collections import Counter, defaultdict |
| import logging |
|
|
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| from .semantic_analysis import ( |
| create_concept_graph, |
| visualize_concept_graph, |
| identify_key_concepts |
| ) |
|
|
|
|
| from .stopwords import ( |
| get_custom_stopwords, |
| process_text, |
| get_stopwords_for_spacy |
| ) |
|
|
|
|
| |
| POS_TRANSLATIONS = { |
| 'es': { |
| 'ADJ': 'Adjetivo', 'ADP': 'Preposición', 'ADV': 'Adverbio', 'AUX': 'Auxiliar', |
| 'CCONJ': 'Conjunción Coordinante', 'DET': 'Determinante', 'INTJ': 'Interjección', |
| 'NOUN': 'Sustantivo', 'NUM': 'Número', 'PART': 'Partícula', 'PRON': 'Pronombre', |
| 'PROPN': 'Nombre Propio', 'SCONJ': 'Conjunción Subordinante', 'SYM': 'Símbolo', |
| 'VERB': 'Verbo', 'X': 'Otro', |
| }, |
| 'en': { |
| 'ADJ': 'Adjective', 'ADP': 'Preposition', 'ADV': 'Adverb', 'AUX': 'Auxiliary', |
| 'CCONJ': 'Coordinating Conjunction', 'DET': 'Determiner', 'INTJ': 'Interjection', |
| 'NOUN': 'Noun', 'NUM': 'Number', 'PART': 'Particle', 'PRON': 'Pronoun', |
| 'PROPN': 'Proper Noun', 'SCONJ': 'Subordinating Conjunction', 'SYM': 'Symbol', |
| 'VERB': 'Verb', 'X': 'Other', |
| }, |
| 'uk': { |
| 'ADJ': 'Прикметник', 'ADP': 'Прийменник', 'ADV': 'Прислівник', 'AUX': 'Допоміжне дієслово', |
| 'CCONJ': 'Сурядний сполучник', 'DET': 'Означник', 'INTJ': 'Вигук', |
| 'NOUN': 'Іменник', 'NUM': 'Число', 'PART': 'Частка', 'PRON': 'Займенник', |
| 'PROPN': 'Власна назва', 'SCONJ': 'Підрядний сполучник', 'SYM': 'Символ', |
| 'VERB': 'Дієслово', 'X': 'Інше', |
| } |
| } |
|
|
| ENTITY_LABELS = { |
| 'es': { |
| "Personas": "lightblue", |
| "Lugares": "lightcoral", |
| "Inventos": "lightgreen", |
| "Fechas": "lightyellow", |
| "Conceptos": "lightpink" |
| }, |
| 'en': { |
| "People": "lightblue", |
| "Places": "lightcoral", |
| "Inventions": "lightgreen", |
| "Dates": "lightyellow", |
| "Concepts": "lightpink" |
| }, |
| 'uk': { |
| "Люди": "lightblue", |
| "Місця": "lightcoral", |
| "Винаходи": "lightgreen", |
| "Дати": "lightyellow", |
| "Концепції": "lightpink" |
| } |
| } |
| |
|
|
| def fig_to_bytes(fig, dpi=100): |
| """Convierte una figura de matplotlib a bytes.""" |
| try: |
| buf = io.BytesIO() |
| fig.savefig(buf, format='png', dpi=dpi, bbox_inches='tight') |
| buf.seek(0) |
| return buf.getvalue() |
| except Exception as e: |
| logger.error(f"Error en fig_to_bytes: {str(e)}") |
| return None |
| |
| |
| def compare_semantic_analysis(text1, text2, nlp, lang): |
| """ |
| Realiza el análisis semántico comparativo entre dos textos |
| """ |
| try: |
| logger.info(f"Iniciando análisis comparativo para idioma: {lang}") |
| |
| |
| stopwords = get_custom_stopwords(lang) |
| logger.info(f"Obtenidas {len(stopwords)} stopwords para el idioma {lang}") |
| |
| |
| doc1 = nlp(text1) |
| doc2 = nlp(text2) |
| |
| |
| logger.info("Identificando conceptos clave del primer texto...") |
| key_concepts1 = identify_key_concepts(doc1, stopwords=stopwords, min_freq=2, min_length=3) |
| |
| logger.info("Identificando conceptos clave del segundo texto...") |
| key_concepts2 = identify_key_concepts(doc2, stopwords=stopwords, min_freq=2, min_length=3) |
|
|
| if not key_concepts1 or not key_concepts2: |
| raise ValueError("No se pudieron identificar conceptos clave en uno o ambos textos") |
|
|
| |
| logger.info("Creando grafos de conceptos...") |
| G1 = create_concept_graph(doc1, key_concepts1) |
| G2 = create_concept_graph(doc2, key_concepts2) |
|
|
| |
| logger.info("Visualizando grafos...") |
| |
| |
| plt.figure(figsize=(12, 8)) |
| fig1 = visualize_concept_graph(G1, lang) |
| plt.title("Análisis del primer texto", pad=20) |
| plt.tight_layout() |
| |
| |
| plt.figure(figsize=(12, 8)) |
| fig2 = visualize_concept_graph(G2, lang) |
| plt.title("Análisis del segundo texto", pad=20) |
| plt.tight_layout() |
|
|
| logger.info("Análisis comparativo completado exitosamente") |
| return fig1, fig2, key_concepts1, key_concepts2 |
|
|
| except Exception as e: |
| logger.error(f"Error en compare_semantic_analysis: {str(e)}") |
| plt.close('all') |
| raise |
| finally: |
| plt.close('all') |
|
|
|
|
| |
| def create_concept_table(key_concepts): |
| """ |
| Crea una tabla de conceptos clave con sus frecuencias |
| Args: |
| key_concepts: Lista de tuplas (concepto, frecuencia) |
| Returns: |
| pandas.DataFrame: Tabla formateada de conceptos |
| """ |
| try: |
| if not key_concepts: |
| logger.warning("Lista de conceptos vacía") |
| return pd.DataFrame(columns=['Concepto', 'Frecuencia']) |
| |
| df = pd.DataFrame(key_concepts, columns=['Concepto', 'Frecuencia']) |
| df['Frecuencia'] = df['Frecuencia'].round(2) |
| return df |
| except Exception as e: |
| logger.error(f"Error en create_concept_table: {str(e)}") |
| return pd.DataFrame(columns=['Concepto', 'Frecuencia']) |
|
|
|
|
| |
|
|
| def perform_discourse_analysis(text1, text2, nlp, lang): |
| """ |
| Realiza el análisis completo del discurso |
| Args: |
| text1: Primer texto a analizar |
| text2: Segundo texto a analizar |
| nlp: Modelo de spaCy cargado |
| lang: Código de idioma |
| Returns: |
| dict: Resultados del análisis con gráficos convertidos a bytes |
| """ |
| try: |
| logger.info("Iniciando análisis del discurso...") |
| |
| |
| if not text1 or not text2: |
| raise ValueError("Los textos de entrada no pueden estar vacíos") |
| |
| if not nlp: |
| raise ValueError("Modelo de lenguaje no inicializado") |
| |
| |
| fig1, fig2, key_concepts1, key_concepts2 = compare_semantic_analysis( |
| text1, text2, nlp, lang |
| ) |
| |
| logger.info("Análisis comparativo completado, convirtiendo figuras a bytes...") |
|
|
| |
| graph1_bytes = fig_to_bytes(fig1) |
| graph2_bytes = fig_to_bytes(fig2) |
| |
| logger.info(f"Figura 1 convertida a {len(graph1_bytes) if graph1_bytes else 0} bytes") |
| logger.info(f"Figura 2 convertida a {len(graph2_bytes) if graph2_bytes else 0} bytes") |
|
|
| |
| if not graph1_bytes or not graph2_bytes: |
| logger.error("Error al convertir figuras a bytes - obteniendo 0 bytes") |
| |
| raise ValueError("No se pudieron convertir las figuras a bytes") |
|
|
| |
| table1 = create_concept_table(key_concepts1) |
| table2 = create_concept_table(key_concepts2) |
|
|
| |
| plt.close(fig1) |
| plt.close(fig2) |
|
|
| result = { |
| 'graph1': graph1_bytes, |
| 'graph2': graph2_bytes, |
| 'combined_graph': None, |
| 'key_concepts1': key_concepts1, |
| 'key_concepts2': key_concepts2, |
| 'table1': table1, |
| 'table2': table2, |
| 'success': True |
| } |
| |
| logger.info("Análisis del discurso completado y listo para almacenamiento") |
| return result |
|
|
| except Exception as e: |
| logger.error(f"Error en perform_discourse_analysis: {str(e)}") |
| |
| plt.close('all') |
| return { |
| 'success': False, |
| 'error': str(e) |
| } |
| finally: |
| |
| plt.close('all') |
|
|
| |
|
|