File size: 4,810 Bytes
d82d9bc
 
2bf3be5
d82d9bc
672a01e
d82d9bc
 
 
353e6b2
d82d9bc
 
f50b29d
d82d9bc
f50b29d
98434dd
 
 
 
f50b29d
 
 
c072563
98434dd
f50b29d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b08d28
f50b29d
 
 
4b08d28
f50b29d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d82d9bc
f50b29d
 
d82d9bc
 
f50b29d
ea7c078
f50b29d
ea7c078
 
 
 
 
f50b29d
 
64e2bcf
f50b29d
 
adbfd57
f50b29d
2bf2b98
f50b29d
 
 
 
 
 
 
 
 
adbfd57
f50b29d
 
adbfd57
f50b29d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import json
import os
import time
import dotenv
import html
from summarize_paper import summarize_paper
from fetch_data import fetch_paper_data_with_category
from post_blog import post_blog
from send_mail import send_email

dotenv.load_dotenv()
ACCESS_KEY = os.getenv("ACCESS_KEY")

def fix_text(text: str) -> str:
    text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
    fixed_text = ""
    for word in text.split():
        try:
            fixed_text += word.encode('latin1').decode('utf-8') + " "
        except Exception:
            fixed_text += word + " "
    return fixed_text.encode('utf-8').decode()

def paper_data(paper_data_json: str, wait_time: int = 5) -> str:
    result_data = {"status": "success", "data": {}}
    papers_by_category = json.loads(paper_data_json)
    for category, papers in papers_by_category.items():
        print(f"Processing category: {category}")
        result_data["data"][category] = {}
        for paper_id, details in papers.items():
            doi = details.get("doi")
            pdf_url = details.get("pdf_url")
            title = details.get("title")
            title = html.escape(title) if title else ""
            citation = details.get("citation")
            if not all([paper_id, doi, pdf_url, title, citation]):
                print(f"Skipping paper with ID: {paper_id} (missing details)")
                continue
            summary, mindmap = None, None
            max_retries = 3
            retry_count = 0
            while (not summary or not mindmap) and retry_count < max_retries:
                try:
                    summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY)
                    if summary and mindmap:
                        break
                except Exception as e:
                    print(f"Error summarizing paper {paper_id}: {e}")
                retry_count += 1
                if retry_count < max_retries:
                    print(f"Retrying paper {paper_id} in 3 minutes")
                    time.sleep(3 * 60)
            if not summary or not mindmap:
                print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
                continue
            try:
                fixed_title = html.escape(fix_text(title).strip())
                fixed_citation = html.escape(fix_text(citation).strip())
                fixed_summary = html.escape(str(summary).strip())
                fixed_mindmap = html.escape(str(mindmap).strip())
                post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time)
            except Exception as e:
                print(f"Error posting blog '{title}': {e}")
                continue
            result_data["data"][category][paper_id] = {
                "id": paper_id,
                "doi": doi,
                "title": fixed_title,
                "category": category,
                "posted": post_status,
                "citation": fixed_citation,
                "summary": fixed_summary,
                "mindmap": fixed_mindmap,
            }
    return json.dumps(result_data, indent=4, ensure_ascii=False)

def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str:
    if uaccess_key != ACCESS_KEY:
        return False
    data = fetch_paper_data_with_category(uaccess_key)
    processed_data = paper_data(data, wait_time)
    try:
        send_email(processed_data)
        print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
    except Exception as e:
        print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
    finally:
        print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
    
    return processed_data

def test(uaccess_key: str) -> str:
    if uaccess_key != ACCESS_KEY:
        return False
    test_data = {
        "Economics": {
            "2501.00578": {
                "paper_id": "2501.00578",
                "doi": "https://doi.org/10.1002/alz.14328",
                "title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
                "category": "Economics",
                "pdf_url": "https://arxiv.org/pdf/2501.00578",
                "citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
            }
        }
    }
    return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4))

if __name__ == '__main__':
    result = test(ACCESS_KEY)
    print(result)