Spaces:

Jayesh13
/

try

Sleeping

try / app.py

Create app.py

d15c119 verified 11 months ago

1.64 kB

	import gradio as gr
	import pandas as pd

	def compare_protein_repeat_frequencies(file1, file2):
	# Load both Excel files
	df1 = pd.read_excel(file1.name)
	df2 = pd.read_excel(file2.name)

	# Ensure both have common structure
	common_cols = df1.columns.intersection(df2.columns)
	df1 = df1[common_cols]
	df2 = df2[common_cols]

	# Merge on Entry ID and Protein Name
	merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2'))

	# Calculate differences
	repeat_cols = common_cols[2:] # exclude ID and Name
	diff_data = {
	"Entry ID": merged["Entry ID"],
	"Protein Name": merged["Protein Name"]
	}

	for col in repeat_cols:
	diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs()

	# Create DataFrame of differences
	diff_df = pd.DataFrame(diff_data)
	diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1)
	diff_df_sorted = diff_df.sort_values(by="Total Difference", ascending=False)

	# Save and return
	output_path = "comparison_output.xlsx"
	diff_df_sorted.to_excel(output_path, index=False)
	return output_path

	# Gradio UI
	interface = gr.Interface(
	fn=compare_protein_repeat_frequencies,
	inputs=[
	gr.File(label="Upload First Excel File"),
	gr.File(label="Upload Second Excel File")
	],
	outputs=gr.File(label="Download Comparison Excel"),
	title="Protein Repeat Comparator",
	description="Upload two Excel files containing protein repeat data. The app will compare frequencies and return a sorted Excel file showing differences."
	)

	interface.launch()