import json import os import dotenv from summarize_paper import summarize_paper from fetch_data import fetch_paper_data_with_category from post_blog import post_blog # Load environment variables dotenv.load_dotenv() cat_ids_api_key = os.getenv("DATA_API_ACCESS_KEY") summarizer_api_key = os.getenv("SUMMARIZER_API_KEY") def paper_data(paper_data): data = {"status": "success"} data['data'] = {} paper_data = json.loads(paper_data) for category, papers in paper_data.items(): print(f"Processing category: {category}") data['data'][category] = {} for paper_id, details in papers.items(): doi = details.get("doi") pdf_url = details.get("pdf_url") title = details.get("title") citation = details.get("citation") if not all([paper_id, doi, pdf_url, title, citation]): print(f"Skipping paper with ID: {paper_id} (missing details)") continue summary, mindmap = summarize_paper(pdf_url, paper_id, summarizer_api_key) post_blog(title, category, summary, mindmap, citation, os.getenv('ACCESS_KEY')) data['data'][category][paper_id] = { "id": paper_id, "doi": doi, "title": title, "category": category, "citation": citation, "summary": summary, "mindmap": mindmap, } output_file = "paper_data_with_summary.json" data = json.dumps(data, indent=4, ensure_ascii=False) with open(output_file, "w", encoding="utf-8") as file: json.dump(data, file, indent=4) print(f"Processed data saved to {output_file}") return data if __name__ == "__main__": data = fetch_paper_data_with_category(cat_ids_api_key) with open("paper_data.json", "w", encoding="utf-8") as file: json.dump(data, file, indent=4, ensure_ascii=False) pdata = paper_data(data) print(pdata)