| | import gradio as gr |
| | import pandas as pd |
| |
|
| | def compare_protein_repeat_frequencies(file1, file2): |
| | |
| | df1 = pd.read_excel(file1.name) |
| | df2 = pd.read_excel(file2.name) |
| |
|
| | |
| | common_cols = df1.columns.intersection(df2.columns) |
| | df1 = df1[common_cols] |
| | df2 = df2[common_cols] |
| |
|
| | |
| | merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2')) |
| |
|
| | |
| | repeat_cols = common_cols[2:] |
| | diff_data = { |
| | "Entry ID": merged["Entry ID"], |
| | "Protein Name": merged["Protein Name"] |
| | } |
| |
|
| | for col in repeat_cols: |
| | diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs() |
| |
|
| | |
| | diff_df = pd.DataFrame(diff_data) |
| | diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1) |
| | diff_df_sorted = diff_df.sort_values(by="Total Difference", ascending=False) |
| |
|
| | |
| | output_path = "comparison_output.xlsx" |
| | diff_df_sorted.to_excel(output_path, index=False) |
| | return output_path |
| |
|
| | |
| | interface = gr.Interface( |
| | fn=compare_protein_repeat_frequencies, |
| | inputs=[ |
| | gr.File(label="Upload First Excel File"), |
| | gr.File(label="Upload Second Excel File") |
| | ], |
| | outputs=gr.File(label="Download Comparison Excel"), |
| | title="Protein Repeat Comparator", |
| | description="Upload two Excel files containing protein repeat data. The app will compare frequencies and return a sorted Excel file showing differences." |
| | ) |
| |
|
| | interface.launch() |