ScientryBackend / main.py
raannakasturi's picture
Add initial implementation for paper summarization and data fetching
3ec5aa6
raw
history blame
2.12 kB
import json
import os
import dotenv
from summarize_paper import summarize_paper
from fetch_data import fetch_paper_data_with_category
from post_blog import post_blog
# Load environment variables
dotenv.load_dotenv()
cat_ids_api_key = os.getenv("DATA_API_ACCESS_KEY")
summarizer_api_key = os.getenv("SUMMARIZER_API_KEY")
def paper_data(paper_data):
data = {"status": "success"}
data['data'] = {}
paper_data = json.loads(paper_data)
for category, papers in paper_data.items():
print(f"Processing category: {category}")
data['data'][category] = {}
for paper_id, details in papers.items():
doi = details.get("doi")
pdf_url = details.get("pdf_url")
title = details.get("title")
citation = details.get("citation")
if not all([paper_id, doi, pdf_url, title, citation]):
print(f"Skipping paper with ID: {paper_id} (missing details)")
continue
summary, mindmap = summarize_paper(pdf_url, paper_id, summarizer_api_key)
post_blog(title, category, summary, mindmap, citation, os.getenv('ACCESS_KEY'))
data['data'][category][paper_id] = {
"id": paper_id,
"doi": doi,
"title": title,
"category": category,
"citation": citation,
"summary": summary,
"mindmap": mindmap,
}
output_file = "paper_data_with_summary.json"
data = json.dumps(data, indent=4, ensure_ascii=False)
with open(output_file, "w", encoding="utf-8") as file:
json.dump(data, file, indent=4)
print(f"Processed data saved to {output_file}")
return data
if __name__ == "__main__":
data = fetch_paper_data_with_category(cat_ids_api_key)
with open("paper_data.json", "w", encoding="utf-8") as file:
json.dump(data, file, indent=4, ensure_ascii=False)
pdata = paper_data(data)
print(pdata)