Spaces:
Build error
Build error
| import os | |
| import streamlit as st | |
| import arxiv | |
| import random | |
| import datetime | |
| import requests | |
| from scholarly import scholarly | |
| # ------------------------------- | |
| # Helper Functions | |
| # ------------------------------- | |
| def get_paper_metadata(arxiv_id): | |
| """Fetch metadata like citations and connected papers for scoring.""" | |
| metadata = { | |
| "citations": 0, | |
| "institution": "Unknown", | |
| "authors": [], | |
| "connected_papers": 0 | |
| } | |
| # Fetch citation count from scite.ai | |
| scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}" | |
| response = requests.get(scite_url) | |
| if response.status_code == 200: | |
| data = response.json() | |
| metadata["citations"] = data.get("citation_count", 0) | |
| # Fetch connected paper count from Connected Papers | |
| connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}" | |
| response = requests.get(connected_papers_url) | |
| if response.status_code == 200: | |
| data = response.json() | |
| metadata["connected_papers"] = len(data.get("nodes", [])) | |
| return metadata | |
| def calculate_trust_score(metadata): | |
| """Compute trust score based on citations and author credibility.""" | |
| trust_score = 50 # Base score | |
| # Citations factor (max boost 30 points) | |
| if metadata["citations"] > 100: | |
| trust_score += 30 | |
| elif metadata["citations"] > 50: | |
| trust_score += 20 | |
| elif metadata["citations"] > 10: | |
| trust_score += 10 | |
| # Connected Papers factor (max boost 20 points) | |
| if metadata["connected_papers"] > 20: | |
| trust_score += 20 | |
| elif metadata["connected_papers"] > 10: | |
| trust_score += 10 | |
| return min(trust_score, 100) | |
| def calculate_relevance_score(paper, query): | |
| """Compute relevance score based on keyword match and recency.""" | |
| relevance_score = 50 # Base score | |
| # Keyword match factor | |
| query_terms = query.lower().split() | |
| title_terms = paper['title'].lower().split() | |
| match_count = len(set(query_terms) & set(title_terms)) | |
| relevance_score += match_count * 5 | |
| # Publication date factor | |
| if isinstance(paper['published'], datetime.datetime): | |
| years_old = datetime.datetime.now().year - paper['published'].year | |
| if years_old < 1: | |
| relevance_score += 15 | |
| elif years_old < 3: | |
| relevance_score += 10 | |
| elif years_old < 5: | |
| relevance_score += 5 | |
| return min(relevance_score, 100) | |
| def retrieve_papers(query, max_results=5): | |
| """Retrieve academic papers from arXiv.""" | |
| search = arxiv.Search(query=query, max_results=max_results) | |
| papers = [] | |
| for result in search.results(): | |
| arxiv_id = result.entry_id.split("/")[-1] | |
| metadata = get_paper_metadata(arxiv_id) | |
| trust_score = calculate_trust_score(metadata) | |
| relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query) | |
| paper = { | |
| "title": result.title, | |
| "summary": result.summary, | |
| "url": result.pdf_url, | |
| "doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}", | |
| "bib_explorer": f"https://arxiv.org/abs/{arxiv_id}", | |
| "connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}", | |
| "litmaps": f"https://app.litmaps.com/preview/{arxiv_id}", | |
| "scite": f"https://scite.ai/reports/arxiv:{arxiv_id}", | |
| "authors": [author.name for author in result.authors], | |
| "published": result.published, | |
| "trust_score": trust_score, | |
| "relevance_score": relevance_score | |
| } | |
| papers.append(paper) | |
| return papers | |
| def random_paper_search(): | |
| """Retrieve random papers without user input.""" | |
| random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"] | |
| query = random.choice(random_queries) | |
| return retrieve_papers(query, max_results=random.randint(5, 15)) | |
| # ------------------------------- | |
| # Streamlit UI | |
| # ------------------------------- | |
| st.title("๐ PaperPilot โ Intelligent Academic Navigator") | |
| with st.sidebar: | |
| st.header("๐ Search Parameters") | |
| query = st.text_input("Research topic or question:") | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| search_button = st.button("๐ Find Articles") | |
| with col2: | |
| random_button = st.button("๐ฒ Random Papers") | |
| if search_button and query.strip(): | |
| with st.spinner("Searching arXiv..."): | |
| papers = retrieve_papers(query) | |
| if papers: | |
| st.session_state.papers = papers | |
| else: | |
| st.error("No papers found. Try different keywords.") | |
| elif random_button: | |
| with st.spinner("Fetching random papers..."): | |
| papers = random_paper_search() | |
| st.session_state.papers = papers | |
| if 'papers' in st.session_state: | |
| for idx, paper in enumerate(st.session_state.papers, 1): | |
| with st.expander(f"{idx}. {paper['title']}"): | |
| st.markdown(f"**Authors:** {', '.join(paper['authors'])}") | |
| st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}") | |
| st.markdown(f"**Trust Score:** {paper['trust_score']} / 100") | |
| st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100") | |
| st.markdown(f"**DOI:** [Link]({paper['doi']})") | |
| st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})") | |
| st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})") | |
| st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})") | |
| st.markdown(f"**Scite:** [Link]({paper['scite']})") | |
| st.markdown("**Abstract:**") | |
| st.write(paper['summary']) | |