Spaces:
Running
Running
File size: 4,810 Bytes
d82d9bc 2bf3be5 d82d9bc 672a01e d82d9bc 353e6b2 d82d9bc f50b29d d82d9bc f50b29d 98434dd f50b29d c072563 98434dd f50b29d 4b08d28 f50b29d 4b08d28 f50b29d d82d9bc f50b29d d82d9bc f50b29d ea7c078 f50b29d ea7c078 f50b29d 64e2bcf f50b29d adbfd57 f50b29d 2bf2b98 f50b29d adbfd57 f50b29d adbfd57 f50b29d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import json
import os
import time
import dotenv
import html
from summarize_paper import summarize_paper
from fetch_data import fetch_paper_data_with_category
from post_blog import post_blog
from send_mail import send_email
dotenv.load_dotenv()
ACCESS_KEY = os.getenv("ACCESS_KEY")
def fix_text(text: str) -> str:
text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
fixed_text = ""
for word in text.split():
try:
fixed_text += word.encode('latin1').decode('utf-8') + " "
except Exception:
fixed_text += word + " "
return fixed_text.encode('utf-8').decode()
def paper_data(paper_data_json: str, wait_time: int = 5) -> str:
result_data = {"status": "success", "data": {}}
papers_by_category = json.loads(paper_data_json)
for category, papers in papers_by_category.items():
print(f"Processing category: {category}")
result_data["data"][category] = {}
for paper_id, details in papers.items():
doi = details.get("doi")
pdf_url = details.get("pdf_url")
title = details.get("title")
title = html.escape(title) if title else ""
citation = details.get("citation")
if not all([paper_id, doi, pdf_url, title, citation]):
print(f"Skipping paper with ID: {paper_id} (missing details)")
continue
summary, mindmap = None, None
max_retries = 3
retry_count = 0
while (not summary or not mindmap) and retry_count < max_retries:
try:
summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY)
if summary and mindmap:
break
except Exception as e:
print(f"Error summarizing paper {paper_id}: {e}")
retry_count += 1
if retry_count < max_retries:
print(f"Retrying paper {paper_id} in 3 minutes")
time.sleep(3 * 60)
if not summary or not mindmap:
print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
continue
try:
fixed_title = html.escape(fix_text(title).strip())
fixed_citation = html.escape(fix_text(citation).strip())
fixed_summary = html.escape(str(summary).strip())
fixed_mindmap = html.escape(str(mindmap).strip())
post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time)
except Exception as e:
print(f"Error posting blog '{title}': {e}")
continue
result_data["data"][category][paper_id] = {
"id": paper_id,
"doi": doi,
"title": fixed_title,
"category": category,
"posted": post_status,
"citation": fixed_citation,
"summary": fixed_summary,
"mindmap": fixed_mindmap,
}
return json.dumps(result_data, indent=4, ensure_ascii=False)
def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str:
if uaccess_key != ACCESS_KEY:
return False
data = fetch_paper_data_with_category(uaccess_key)
processed_data = paper_data(data, wait_time)
try:
send_email(processed_data)
print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
except Exception as e:
print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
finally:
print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
return processed_data
def test(uaccess_key: str) -> str:
if uaccess_key != ACCESS_KEY:
return False
test_data = {
"Economics": {
"2501.00578": {
"paper_id": "2501.00578",
"doi": "https://doi.org/10.1002/alz.14328",
"title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
"category": "Economics",
"pdf_url": "https://arxiv.org/pdf/2501.00578",
"citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
}
}
}
return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4))
if __name__ == '__main__':
result = test(ACCESS_KEY)
print(result)
|