ScientryBackend

Running

App Files Files Community

raannakasturi commited on Feb 20

Commit

f50b29d

1 Parent(s): 319999c

Refactor summarization and email sending logic; improve error handling and environment variable checks

Browse files

Files changed (6) hide show

fetch_data.py +30 -26
image.py +61 -89
main.py +77 -82
post_blog.py +99 -89
send_mail.py +49 -31
summarize_paper.py +25 -16

fetch_data.py CHANGED Viewed

@@ -2,23 +2,23 @@ from gradio_client import Client
 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
-def fetch_category_ids(cat_ids_api_key):
-    """Fetch category IDs using the category API."""
-    if not cat_ids_api_key:
         raise ValueError("API access key not found. Please check your environment variables.")
     cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
     try:
         result = cat_ids_api_client.predict(
-            user_access_key=cat_ids_api_key,
             api_name="/fetch_paper_ids"
         )
         cat_ids = json.loads(result)
-        if cat_ids['status'] == 'success':
-            return cat_ids['data']
         else:
             return None
     except Exception as e:
-        print(f"Exception while fetching category IDs: {str(e)}")
         return None
 def fetch_single_paper_data(paper_id):
@@ -29,44 +29,48 @@ def fetch_single_paper_data(paper_id):
             api_name="/fetch_paper_data"
         )
         paper_data = json.loads(result)
-        if paper_data['status'] == 'success':
-            return paper_id, paper_data['data']
         else:
             print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
             return paper_id, None
     except Exception as e:
-        print(f"Exception while fetching data for paper ID {paper_id}: {str(e)}")
         return paper_id, None
 def fetch_paper_data_concurrently(paper_ids, max_threads=12):
     paper_id_data = {}
     with ThreadPoolExecutor(max_workers=max_threads) as executor:
-        future_to_paper_id = {executor.submit(fetch_single_paper_data, paper_id): paper_id for paper_id in paper_ids}
-        for future in as_completed(future_to_paper_id):
-            paper_id = future_to_paper_id[future]
             try:
-                paper_id, data = future.result()
                 if data:
-                    paper_id_data[paper_id] = data
             except Exception as e:
-                print(f"Error fetching data for paper ID {paper_id}: {str(e)}")
     return paper_id_data
-def fetch_paper_data_with_category(cat_ids_api_key):
-    data = {}
     try:
-        cat_ids = fetch_category_ids(cat_ids_api_key)
         if cat_ids:
-            for category, ids in cat_ids.items():
                 print(f"Fetching data for category: {category}")
                 try:
-                    paper_data = fetch_paper_data_concurrently(ids['ids'])
-                    if paper_data:
-                        data[category] = paper_data
                 except Exception as e:
-                    print(f"Error fetching data for category {category}: {str(e)}")
                     continue
-        return json.dumps(data, indent=4, ensure_ascii=False)
     except Exception as e:
-        print(f"Exception while fetching paper data by category: {str(e)}")
         return None

 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
+def fetch_category_ids(api_key):
+    if not api_key:
         raise ValueError("API access key not found. Please check your environment variables.")
     cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
     try:
         result = cat_ids_api_client.predict(
+            user_access_key=api_key,
             api_name="/fetch_paper_ids"
         )
         cat_ids = json.loads(result)
+        if cat_ids.get('status') == 'success':
+            return cat_ids.get('data')
         else:
+            print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}")
             return None
     except Exception as e:
+        print(f"Exception while fetching category IDs: {e}")
         return None
 def fetch_single_paper_data(paper_id):
             api_name="/fetch_paper_data"
         )
         paper_data = json.loads(result)
+        if paper_data.get('status') == 'success':
+            return paper_id, paper_data.get('data')
         else:
             print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
             return paper_id, None
     except Exception as e:
+        print(f"Exception while fetching data for paper ID {paper_id}: {e}")
         return paper_id, None
 def fetch_paper_data_concurrently(paper_ids, max_threads=12):
     paper_id_data = {}
     with ThreadPoolExecutor(max_workers=max_threads) as executor:
+        future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids}
+        for future in as_completed(future_to_paper):
+            paper_id = future_to_paper[future]
             try:
+                pid, data = future.result()
                 if data:
+                    paper_id_data[pid] = data
             except Exception as e:
+                print(f"Error fetching data for paper ID {paper_id}: {e}")
     return paper_id_data
+def fetch_paper_data_with_category(api_key):
+    all_data = {}
     try:
+        cat_ids = fetch_category_ids(api_key)
         if cat_ids:
+            for category, info in cat_ids.items():
                 print(f"Fetching data for category: {category}")
                 try:
+                    paper_ids = info.get('ids', [])
+                    if paper_ids:
+                        paper_data = fetch_paper_data_concurrently(paper_ids)
+                        if paper_data:
+                            all_data[category] = paper_data
+                    else:
+                        print(f"No paper IDs found for category: {category}")
                 except Exception as e:
+                    print(f"Error fetching data for category {category}: {e}")
                     continue
+        return json.dumps(all_data, indent=4, ensure_ascii=False)
     except Exception as e:
+        print(f"Exception while fetching paper data by category: {e}")
         return None

image.py CHANGED Viewed

@@ -2,22 +2,38 @@ import base64
 import io
 import os
 import re
-import requests
-from urllib.parse import quote
 from PIL import Image
 from g4f.client import Client
 from g4f.Provider import RetryProvider, PollinationsAI, ImageLabs, Blackbox, HuggingSpace, Airforce
 from g4f.Provider.hf_space.BlackForestLabsFlux1Schnell import BlackForestLabsFlux1Schnell
 from g4f.Provider.hf_space.VoodoohopFlux1Schnell import VoodoohopFlux1Schnell
 def extract_summary(text):
     text = text.replace("#", "").strip().lower()
     match = re.search(r"summary(.*?)highlights", text, re.DOTALL)
-    if match:
-        return match.group(1).strip()
-    return text
 def fix_base64_padding(data):
     missing_padding = len(data) % 4
     if missing_padding:
         data += "=" * (4 - missing_padding)
@@ -25,84 +41,41 @@ def fix_base64_padding(data):
 def generate_image(title, category, summary):
     print("Generating image...")
-    import time
     start = time.time()
-    try:
         try:
-            try:
-                negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
-                prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
-                client = Client()
-                time.sleep(20)
-                img_data = client.images.generate(
-                    provider=RetryProvider(
-                        providers=[ImageLabs, PollinationsAI],
-                        shuffle=True,
-                        single_provider_retry=True,
-                        max_retries=5,
-                    ),
-                    model="sdxl-turbo",
-                    prompt=prompt,
-                    negative_prompt=negative,
-                    response_format="b64_json",
-                    width=1024,
-                    height=576,
-                ).data[0].b64_json
-                print(f"Image generated in {time.time() - start:.2f} seconds")
-                if img_data:
-                    return f"data:image/png;base64,{img_data}"
-                return None
-            except Exception as e:
-                print(f"Error generating image: {e}")
-                negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
-                prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
-                client = Client()
-                time.sleep(20)
-                img_data = client.images.generate(
-                    provider=RetryProvider(
-                        providers=[Airforce, PollinationsAI, Blackbox],
-                        shuffle=True,
-                        single_provider_retry=True,
-                        max_retries=5,
-                    ),
-                    model="flux",
-                    prompt=prompt,
-                    negative_prompt=negative,
-                    response_format="b64_json",
-                    width=1024,
-                    height=576,
-                ).data[0].b64_json
-                print(f"Image generated in {time.time() - start:.2f} seconds")
-                if img_data:
-                    return f"data:image/png;base64,{img_data}"
-                return None
-        except Exception as e:
-            print(f"Error generating image: {e}")
-            negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
-            prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
-            client = Client()
-            time.sleep(20)
-            img_data = client.images.generate(
-                provider=RetryProvider(
-                    providers=[BlackForestLabsFlux1Schnell, VoodoohopFlux1Schnell, HuggingSpace],
-                    shuffle=True,
-                    single_provider_retry=True,
-                    max_retries=5,
-                ),
-                model="flux-schnell",
                 prompt=prompt,
-                negative_prompt=negative,
                 response_format="b64_json",
                 width=1024,
                 height=576,
-            ).data[0].b64_json
-            print(f"Image generated in {time.time() - start:.2f} seconds")
             if img_data:
                 return f"data:image/png;base64,{img_data}"
-            return None
-    except Exception as e:
-        print(f"Error generating image: {e}")
-        return None
 def verify_image(image_data):
     try:
@@ -115,31 +88,30 @@ def verify_image(image_data):
         return False
 def fetch_image(title, category, summary):
-    title = r"{}".format(title)
-    category = r"{}".format(category)
     summary = extract_summary(summary)
-    image_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
     try:
         data_uri = generate_image(title, category, summary)
         if data_uri:
-            base64_image = fix_base64_padding(data_uri.split(",")[1])
-            if base64_image:
-                if verify_image(base64.b64decode(base64_image, validate=True)):
-                    image_url = f"data:image/png;base64,{base64_image}"
-        else:
-            image_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
     except Exception as e:
         print(f"Error fetching image: {e}")
-        image_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
     finally:
         if os.path.exists("image.png"):
             os.remove("image.png")
-        return image_url
 if __name__ == "__main__":
     title = "Exposition: Enumerative Geometry and Tree-Level Gromov-Witten Invariants"
     category = "Mathematics"
-    summary = "The text discusses the Kontsevich-Manin formula for enumerating degree d rational curves via Gromov-Witten invariants. It details the calculation of these invariants using moduli spaces of stable maps and explores their implications in enumerative geometry."
     image_url = fetch_image(title, category, summary)
-    print(image_url)

 import io
 import os
 import re
+import time
 from PIL import Image
 from g4f.client import Client
 from g4f.Provider import RetryProvider, PollinationsAI, ImageLabs, Blackbox, HuggingSpace, Airforce
 from g4f.Provider.hf_space.BlackForestLabsFlux1Schnell import BlackForestLabsFlux1Schnell
 from g4f.Provider.hf_space.VoodoohopFlux1Schnell import VoodoohopFlux1Schnell
+NEGATIVE_PROMPT = (
+    "low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, "
+    "missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, "
+    "three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, "
+    "worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, "
+    "2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, "
+    "3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, "
+    "bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, "
+    "harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, "
+    "twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, "
+    "unsuitable, inappropriate, offensive, revealing, sexual, explicit"
+)
 def extract_summary(text):
+    """
+    Clean and extract the summary portion from the text.
+    """
     text = text.replace("#", "").strip().lower()
     match = re.search(r"summary(.*?)highlights", text, re.DOTALL)
+    return match.group(1).strip() if match else text
 def fix_base64_padding(data):
+    """
+    Ensure that the base64 string has the proper padding.
+    """
     missing_padding = len(data) % 4
     if missing_padding:
         data += "=" * (4 - missing_padding)
 def generate_image(title, category, summary):
     print("Generating image...")
     start = time.time()
+    prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
+    client = Client()
+    attempts = [
+        ([ImageLabs, PollinationsAI], "sdxl-turbo"),
+        ([Airforce, PollinationsAI, Blackbox], "flux"),
+        ([BlackForestLabsFlux1Schnell, VoodoohopFlux1Schnell, HuggingSpace], "flux-schnell")
+    ]
+    for providers, model in attempts:
         try:
+            provider = RetryProvider(
+                providers=providers,
+                shuffle=True,
+                single_provider_retry=True,
+                max_retries=3,
+            )
+            response = client.images.generate(
+                provider=provider,
+                model=model,
                 prompt=prompt,
+                negative_prompt=NEGATIVE_PROMPT,
                 response_format="b64_json",
                 width=1024,
                 height=576,
+            )
+            img_data = response.data[0].b64_json
+            elapsed = time.time() - start
+            print(f"Image generated in {elapsed:.2f} seconds using model {model}")
             if img_data:
                 return f"data:image/png;base64,{img_data}"
+        except Exception as e:
+            print(f"Attempt with model {model} failed: {e}")
+    return None
 def verify_image(image_data):
     try:
         return False
 def fetch_image(title, category, summary):
     summary = extract_summary(summary)
+    fallback_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
     try:
         data_uri = generate_image(title, category, summary)
         if data_uri:
+            base64_str = data_uri.split(",")[1]
+            base64_str = fix_base64_padding(base64_str)
+            decoded = base64.b64decode(base64_str, validate=True)
+            if verify_image(decoded):
+                return f"data:image/png;base64,{base64_str}"
+        return fallback_url
     except Exception as e:
         print(f"Error fetching image: {e}")
+        return fallback_url
     finally:
         if os.path.exists("image.png"):
             os.remove("image.png")
 if __name__ == "__main__":
     title = "Exposition: Enumerative Geometry and Tree-Level Gromov-Witten Invariants"
     category = "Mathematics"
+    summary = (
+        "The text discusses the Kontsevich-Manin formula for enumerating degree d rational curves via Gromov-Witten invariants. "
+        "It details the calculation of these invariants using moduli spaces of stable maps and explores their implications in enumerative geometry."
+    )
     image_url = fetch_image(title, category, summary)
+    print(image_url)

main.py CHANGED Viewed

@@ -9,108 +9,103 @@ from post_blog import post_blog
 from send_mail import send_email
 dotenv.load_dotenv()
-access_key = os.getenv("ACCESS_KEY")
-def fix_text(text):
     text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
     fixed_text = ""
     for word in text.split():
         try:
-            fixed_text += word.encode('latin1').decode('utf-8')+" "
-        except:
-            fixed_text += word+" "
     return fixed_text.encode('utf-8').decode()
-def paper_data(paper_data, wait_time=5):
-    data = {"status": "success"}
-    data['data'] = {}
-    paper_data = json.loads(paper_data)
-    for category, papers in paper_data.items():
-            print(f"Processing category: {category}")
-            data['data'][category] = {}
-            for paper_id, details in papers.items():
-                doi = details.get("doi")
-                pdf_url = details.get("pdf_url")
-                title = details.get("title")
-                title = html.escape(title)
-                citation = details.get("citation")
-                if not all([paper_id, doi, pdf_url, title, citation]):
-                    print(f"Skipping paper with ID: {paper_id} (missing details)")
-                    continue
-                summary = None
-                mindmap = None
-                max_retries = 3
-                retry_count = 0
-                while (not summary or not mindmap) and retry_count < max_retries:
-                    try:
-                        summary, mindmap = summarize_paper(pdf_url, paper_id, access_key)
-                        if summary and mindmap:
-                            break
-                    except Exception as e:
-                        print(f"Error summarizing paper {paper_id}: {e}")
-                    retry_count += 1
-                    if retry_count < max_retries:
-                        print(f"Retrying paper {paper_id} in 3 minutes")
-                        time.sleep(3*60)
-                if not summary or not mindmap:
-                    print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
-                    continue
                 try:
-                    title = fix_text(title)
-                    citation = fix_text(citation)
-                    title = html.escape(str(title).strip())
-                    citation = html.escape(str(citation).strip())
-                    summary = html.escape(str(summary).strip())
-                    mindmap = html.escape(str(mindmap).strip())
-                    status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time)
                 except Exception as e:
-                    print(f"Error posting blog '{title}': {e}")
-                    continue
-                data['data'][category][paper_id] = {
-                    "id": paper_id,
-                    "doi": doi,
-                    "title": title,
-                    "category": category,
-                    "posted": status,
-                    "citation": citation,
-                    "summary": summary,
-                    "mindmap": mindmap,
-                }
-    data = json.dumps(data, indent=4, ensure_ascii=False)
-    return data
-def post_blogpost(uaccess_key, wait_time=5):
-    if uaccess_key != access_key:
         return False
     data = fetch_paper_data_with_category(uaccess_key)
-    pdata = paper_data(data, wait_time)
     try:
-        send_email(pdata)
         print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
     except Exception as e:
         print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
     finally:
         print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
-    return pdata
-def test(uaccess_key):
-    if uaccess_key != access_key:
         return False
-    data = {
         "Economics": {
-            "2501.00578":{
-                "paper_id":"2501.00578",
-                "doi":"https://doi.org/10.1002/alz.14328",
-                "title":"Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
-                "category":"Economics",
-                "pdf_url":"https://arxiv.org/pdf/2501.00578",
-                "citation":"Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
-            },
-        },
     }
-    status = paper_data(json.dumps(data, ensure_ascii=False, indent=4))
-    return status
 if __name__ == '__main__':
-    data = test(access_key)
-    print(data)

 from send_mail import send_email
 dotenv.load_dotenv()
+ACCESS_KEY = os.getenv("ACCESS_KEY")
+def fix_text(text: str) -> str:
     text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
     fixed_text = ""
     for word in text.split():
         try:
+            fixed_text += word.encode('latin1').decode('utf-8') + " "
+        except Exception:
+            fixed_text += word + " "
     return fixed_text.encode('utf-8').decode()
+def paper_data(paper_data_json: str, wait_time: int = 5) -> str:
+    result_data = {"status": "success", "data": {}}
+    papers_by_category = json.loads(paper_data_json)
+    for category, papers in papers_by_category.items():
+        print(f"Processing category: {category}")
+        result_data["data"][category] = {}
+        for paper_id, details in papers.items():
+            doi = details.get("doi")
+            pdf_url = details.get("pdf_url")
+            title = details.get("title")
+            title = html.escape(title) if title else ""
+            citation = details.get("citation")
+            if not all([paper_id, doi, pdf_url, title, citation]):
+                print(f"Skipping paper with ID: {paper_id} (missing details)")
+                continue
+            summary, mindmap = None, None
+            max_retries = 3
+            retry_count = 0
+            while (not summary or not mindmap) and retry_count < max_retries:
                 try:
+                    summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY)
+                    if summary and mindmap:
+                        break
                 except Exception as e:
+                    print(f"Error summarizing paper {paper_id}: {e}")
+                retry_count += 1
+                if retry_count < max_retries:
+                    print(f"Retrying paper {paper_id} in 3 minutes")
+                    time.sleep(3 * 60)
+            if not summary or not mindmap:
+                print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
+                continue
+            try:
+                fixed_title = html.escape(fix_text(title).strip())
+                fixed_citation = html.escape(fix_text(citation).strip())
+                fixed_summary = html.escape(str(summary).strip())
+                fixed_mindmap = html.escape(str(mindmap).strip())
+                post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time)
+            except Exception as e:
+                print(f"Error posting blog '{title}': {e}")
+                continue
+            result_data["data"][category][paper_id] = {
+                "id": paper_id,
+                "doi": doi,
+                "title": fixed_title,
+                "category": category,
+                "posted": post_status,
+                "citation": fixed_citation,
+                "summary": fixed_summary,
+                "mindmap": fixed_mindmap,
+            }
+    return json.dumps(result_data, indent=4, ensure_ascii=False)
+def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str:
+    if uaccess_key != ACCESS_KEY:
         return False
     data = fetch_paper_data_with_category(uaccess_key)
+    processed_data = paper_data(data, wait_time)
     try:
+        send_email(processed_data)
         print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
     except Exception as e:
         print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
     finally:
         print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
+    return processed_data
+def test(uaccess_key: str) -> str:
+    if uaccess_key != ACCESS_KEY:
         return False
+    test_data = {
         "Economics": {
+            "2501.00578": {
+                "paper_id": "2501.00578",
+                "doi": "https://doi.org/10.1002/alz.14328",
+                "title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
+                "category": "Economics",
+                "pdf_url": "https://arxiv.org/pdf/2501.00578",
+                "citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
+            }
+        }
     }
+    return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4))
 if __name__ == '__main__':
+    result = test(ACCESS_KEY)
+    print(result)

post_blog.py CHANGED Viewed

@@ -6,54 +6,53 @@ import mistune
 from image import fetch_image
 dotenv.load_dotenv()
-access_key = os.getenv('ACCESS_KEY')
-client_id = os.getenv('CLIENT_ID')
-client_secret = os.getenv('CLIENT_SECRET')
-refresh_token = os.getenv('REFRESH_TOKEN')
-blog_id = os.getenv('BLOG_ID')
 def generate_post_html(doi, title, category, summary, mindmap, citation):
     doi = doi.split("https://")[-1]
     mindmap = mindmap.replace("{", r'{').replace("}", r'}')
-    citation = mistune.html(repr(citation.replace("&amp;", "&").replace("```plaintext\n", "").replace("\n```", "").strip())[1:-1])
     image = fetch_image(title, category, summary)
-    html_summary = mistune.html(summary)
     post = f"""
     <div id="paper_post">
         <img style="display:block; width:100%; height:100%;" id="paper_image"
-            src="{image.strip()}"
-            alt="{title.strip()}">
-        <br>
-        <br>
         <div id="paper_summary">
-            {html_summary.replace("&amp;", "&").strip()}
         </div>
         <br>
         <h2>Mindmap</h2>
         <p><small><em>If MindMap doesn't load, please try refreshing the page.</em></small></p>
         <div class="markmap" id="paper_mindmap">
             <script type="text/template">
-                    {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
-                </script>
         </div>
         <br>
         <h2>Citation</h2>
         <div id="paper_citation">
-            {citation}
         </div>
         <script>
-          const paperImage = document.querySelector(
-            'img[style="display:block; width:100%; height:100%;"][id="paper_image"]'
-          );
           if (paperImage) {{
-            const tablOfContents = document.createElement("div");
-            tablOfContents.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
-            const brElement = paperImage.nextElementSibling;
-            if (brElement && brElement.tagName === "BR") {{
-              brElement.insertAdjacentElement("afterend", tablOfContents);
             }}
           }}
-		</script>
         <script>
             window.markmap = {{
                 autoLoader: {{
@@ -65,10 +64,9 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
         <script>
             window.addEventListener('load', function() {{
                 setTimeout(function() {{
-                    const element = document.querySelector('div.mm-toolbar-item[title="Fit window size"]');
-                    console.log(element);
-                    if (element) {{
-                        element.click();
                     }} else {{
                         console.log('Element not found');
                     }}
@@ -79,7 +77,6 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
             .markmap {{
                 position: relative;
             }}
             .markmap > svg {{
                 width: 100%;
                 border: 2px solid #000;
@@ -109,83 +106,96 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
     """
     return post, image
 def create_post(doi, title, category, summary, mindmap, citation):
-    post_title = title
-    post_category = f"{category}"
     try:
         post_body, post_image = generate_post_html(doi, title, category, summary, mindmap, citation)
-        # print("_____________________\n\n",title,"\n\n_____________________")
-        # with open('index.html', 'w', encoding='utf-8') as f:
-        #     f.write(post_body)
-        # exit()
     except Exception as e:
         print(f"Error generating post: {e}")
         return None, None, None, None
     return post_title, post_category, post_body, post_image
 def post_post(title, category, body, image):
-    response = None
     try:
-        data = requests.post(
-            url='https://oauth2.googleapis.com/token',
-            data={
-                'grant_type': 'refresh_token',
-                'client_secret': client_secret,
-                'refresh_token': refresh_token,
-                'client_id': client_id,
-            },
-            ).json()
-        url = f"https://blogger.googleapis.com/v3/blogs/{blog_id}/posts"
-        headers = {
-            'Authorization': f"Bearer {data['access_token']}",
-            "content-type": "application/json"
-        }
-        post_data = {
-            "kind": "blogger#post",
-            "blog": {
-                "id": blog_id
-            },
-            "images": [{
-                "url": image
-            }],
-            "title": r"{}".format(title),
-            "content": body,
-            "labels": [category, "ZZZZZZZZZ"]
-        }
-        response = requests.post(url, headers=headers, json=post_data).json()
-        if response['status'] != 'LIVE':
-            print(response)
-        if response['status'] == 'LIVE':
-            print(f"The post '{title}' is {response['status']}")
             return True
         else:
-            print(response)
-            print(f"Error posting {title}: {response}")
             return False
     except Exception as e:
-        print(response)
         print(f"Error posting {title}: {e}")
         return False
 def post_blog(doi, title, category, summary, mindmap, citation, uaccess_key, wait_time=5):
-    if uaccess_key != access_key:
         return False
     else:
-        status = True
-        post_title, post_category, post_body, post_image = create_post(doi, title, category, summary, mindmap, citation)
-        if not all([post_title, post_category, post_body, post_image]):
-            print(f'Failed to create post {post_title}')
-            return False
-        post_title = post_title.replace("&amp;", "&")
-        if "&amp;" in post_title:
-            return False
-        status = post_post(post_title, post_category, post_body, post_image)
-        print(f"Waiting for {wait_time*60} seconds...")
-        time.sleep(wait_time*60)
-        if status:
-            print('Post created successfully')
-            return True
-        else:
-            print('Failed to create post')
-            return False

 from image import fetch_image
 dotenv.load_dotenv()
+ACCESS_KEY = os.getenv('ACCESS_KEY')
+CLIENT_ID = os.getenv('CLIENT_ID')
+CLIENT_SECRET = os.getenv('CLIENT_SECRET')
+REFRESH_TOKEN = os.getenv('REFRESH_TOKEN')
+BLOG_ID = os.getenv('BLOG_ID')
 def generate_post_html(doi, title, category, summary, mindmap, citation):
     doi = doi.split("https://")[-1]
     mindmap = mindmap.replace("{", r'{').replace("}", r'}')
+    citation_clean = citation.replace("&amp;", "&").replace("```plaintext\n", "").replace("\n```", "").strip()
+    citation_html = mistune.html(repr(citation_clean)[1:-1])
     image = fetch_image(title, category, summary)
+    html_summary = mistune.html(summary).replace("&amp;", "&").strip()
     post = f"""
     <div id="paper_post">
         <img style="display:block; width:100%; height:100%;" id="paper_image"
+             src="{image.strip()}"
+             alt="{title.strip()}">
+        <br><br>
         <div id="paper_summary">
+            {html_summary}
         </div>
         <br>
         <h2>Mindmap</h2>
         <p><small><em>If MindMap doesn't load, please try refreshing the page.</em></small></p>
         <div class="markmap" id="paper_mindmap">
             <script type="text/template">
+                {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
+            </script>
         </div>
         <br>
         <h2>Citation</h2>
         <div id="paper_citation">
+            {citation_html}
         </div>
         <script>
+          const paperImage = document.querySelector('img[style="display:block; width:100%; height:100%;"][id="paper_image"]');
           if (paperImage) {{
+            const toc = document.createElement("div");
+            toc.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
+            const brElem = paperImage.nextElementSibling;
+            if (brElem && brElem.tagName === "BR") {{
+              brElem.insertAdjacentElement("afterend", toc);
             }}
           }}
+        </script>
         <script>
             window.markmap = {{
                 autoLoader: {{
         <script>
             window.addEventListener('load', function() {{
                 setTimeout(function() {{
+                    const elem = document.querySelector('div.mm-toolbar-item[title="Fit window size"]');
+                    if (elem) {{
+                        elem.click();
                     }} else {{
                         console.log('Element not found');
                     }}
             .markmap {{
                 position: relative;
             }}
             .markmap > svg {{
                 width: 100%;
                 border: 2px solid #000;
     """
     return post, image
 def create_post(doi, title, category, summary, mindmap, citation):
     try:
         post_body, post_image = generate_post_html(doi, title, category, summary, mindmap, citation)
     except Exception as e:
         print(f"Error generating post: {e}")
         return None, None, None, None
+    post_title = title.replace("&amp;", "&")
+    if "&amp;" in post_title:
+        return None, None, None, None
+    post_category = f"{category}"
     return post_title, post_category, post_body, post_image
+def fetch_oauth_token():
+    token_data = {
+        'grant_type': 'refresh_token',
+        'client_secret': CLIENT_SECRET,
+        'refresh_token': REFRESH_TOKEN,
+        'client_id': CLIENT_ID,
+    }
+    try:
+        response = requests.post('https://oauth2.googleapis.com/token', data=token_data)
+        response.raise_for_status()
+        token_info = response.json()
+        return token_info.get('access_token')
+    except Exception as e:
+        print(f"Error fetching OAuth token: {e}")
+        return None
 def post_post(title, category, body, image):
+    access_token = fetch_oauth_token()
+    if not access_token:
+        return False
+    url = f"https://blogger.googleapis.com/v3/blogs/{BLOG_ID}/posts"
+    headers = {
+        'Authorization': f"Bearer {access_token}",
+        "Content-Type": "application/json"
+    }
+    post_data = {
+        "kind": "blogger#post",
+        "blog": {"id": BLOG_ID},
+        "images": [{"url": image}],
+        "title": title,
+        "content": body,
+        "labels": [category, "ZZZZZZZZZ"]
+    }
     try:
+        response = requests.post(url, headers=headers, json=post_data)
+        response.raise_for_status()
+        result = response.json()
+        if result.get('status') == 'LIVE':
+            print(f"The post '{title}' is LIVE")
             return True
         else:
+            print(f"Error posting {title}: {result}")
             return False
     except Exception as e:
         print(f"Error posting {title}: {e}")
         return False
 def post_blog(doi, title, category, summary, mindmap, citation, uaccess_key, wait_time=5):
+    if uaccess_key != ACCESS_KEY:
         return False
+    post_title, post_category, post_body, post_image = create_post(doi, title, category, summary, mindmap, citation)
+    if not all([post_title, post_category, post_body, post_image]):
+        print(f"Failed to create post {post_title}")
+        return False
+    status = post_post(post_title, post_category, post_body, post_image)
+    print(f"Waiting for {wait_time * 60} seconds...")
+    time.sleep(wait_time * 60)
+    if status:
+        print("Post created successfully")
+        return True
     else:
+        print("Failed to create post")
+        return False
+if __name__ == "__main__":
+    doi = "https://doi.org/10.1234/example"
+    title = "Example Title"
+    category = "Science"
+    summary = "This is an example summary in markdown format."
+    mindmap = "{example: mindmap content}"
+    citation = "Example citation text"
+    uaccess_key = ACCESS_KEY
+    post_success = post_blog(doi, title, category, summary, mindmap, citation, uaccess_key)
+    print("Post success:", post_success)

send_mail.py CHANGED Viewed

@@ -1,39 +1,38 @@
-from email import encoders
-from email.mime.base import MIMEBase
 import os
 from datetime import datetime
 from pytz import timezone
-import pytz
 import sib_api_v3_sdk
 from sib_api_v3_sdk.rest import ApiException
 from dotenv import load_dotenv
 load_dotenv()
-mail_api = os.getenv("MAIL_API")
 def mail_body(generation_details):
     body = f"""
     Hello,
     These are the details of the Blogs Posted at ReXplore: Science @ Fingertips.
     Date & Time: {get_current_time()}
     {generation_details}
     Regards,
     Nayan Kasturi (Raanna),
     Developer & Maintainer,
     ReXplore.
     """
-    return body
-def get_current_time():
-    fmt = "%d-%m-%Y %H:%M:%S %Z%z"
-    now_utc = datetime.now(timezone('UTC'))
-    now_asia = now_utc.astimezone(timezone('Asia/Kolkata'))
-    return now_asia.strftime(fmt)
 def create_attachment(content, filename):
     attachment = MIMEBase('application', 'octet-stream')
@@ -44,26 +43,45 @@ def create_attachment(content, filename):
 def send_email(generation_details):
     configuration = sib_api_v3_sdk.Configuration()
-    configuration.api_key['api-key'] = mail_api
-    api_instance = sib_api_v3_sdk.TransactionalEmailsApi(sib_api_v3_sdk.ApiClient(configuration))
-    data = mail_body(generation_details)
-    data_attchment = create_attachment(data.encode('utf-8'), "data.txt")
-    subject = "New Blog Batch Published to ReXplore at " + get_current_time()
-    sender = {"name": "Project Gatekeeper", "email": "projectgatekeeper@silerudaagartha.eu.org"}
-    reply_to = {"name": "Project Gatekeeper", "email": "gatekeeper@raannakasturi.eu.org"}
-    text_content = data
-    attachments = [
-        {"content": data_attchment.get_payload(), "name": data_attchment.get_filename()},
-    ]
-    to = [{"email": "raannakasturi@proton.me"}]
-    send_smtp_email = sib_api_v3_sdk.SendSmtpEmail(to=to, reply_to=reply_to, attachment=attachments, text_content=text_content, sender=sender, subject=subject)
     try:
-        api_instance.send_transac_email(send_smtp_email)
         print("Email Sent")
         return True
     except ApiException as e:
-        print("Can't send email")
-        print("Exception when calling SMTPApi->send_transac_email: %s\n" % e)
         return False

 import os
 from datetime import datetime
+from email import encoders
+from email.mime.base import MIMEBase
 from pytz import timezone
 import sib_api_v3_sdk
 from sib_api_v3_sdk.rest import ApiException
 from dotenv import load_dotenv
 load_dotenv()
+MAIL_API_KEY = os.getenv("MAIL_API")
+def get_current_time():
+    fmt = "%d-%m-%Y %H:%M:%S %Z%z"
+    now_utc = datetime.now(timezone('UTC'))
+    now_kolkata = now_utc.astimezone(timezone('Asia/Kolkata'))
+    return now_kolkata.strftime(fmt)
 def mail_body(generation_details):
     body = f"""
     Hello,
     These are the details of the Blogs Posted at ReXplore: Science @ Fingertips.
     Date & Time: {get_current_time()}
     {generation_details}
     Regards,
     Nayan Kasturi (Raanna),
     Developer & Maintainer,
     ReXplore.
     """
+    return body.strip()
 def create_attachment(content, filename):
     attachment = MIMEBase('application', 'octet-stream')
 def send_email(generation_details):
     configuration = sib_api_v3_sdk.Configuration()
+    configuration.api_key['api-key'] = MAIL_API_KEY
+    api_client = sib_api_v3_sdk.ApiClient(configuration)
+    transactional_api = sib_api_v3_sdk.TransactionalEmailsApi(api_client)
+    email_content = mail_body(generation_details)
+    attachment_obj = create_attachment(email_content.encode('utf-8'), "data.txt")
+    subject = f"New Blog Batch Published to ReXplore at {get_current_time()}"
+    sender = {
+        "name": "Project Gatekeeper",
+        "email": "projectgatekeeper@silerudaagartha.eu.org"
+    }
+    reply_to = {
+        "name": "Project Gatekeeper",
+        "email": "gatekeeper@raannakasturi.eu.org"
+    }
+    recipients = [{"email": "raannakasturi@proton.me"}]
+    attachments = [{
+        "content": attachment_obj.get_payload(),
+        "name": attachment_obj.get_filename()
+    }]
+    email = sib_api_v3_sdk.SendSmtpEmail(
+        to=recipients,
+        reply_to=reply_to,
+        attachment=attachments,
+        text_content=email_content,
+        sender=sender,
+        subject=subject
+    )
     try:
+        transactional_api.send_transac_email(email)
         print("Email Sent")
         return True
     except ApiException as e:
+        print("Failed to send email:")
+        print(f"Exception when calling SMTPApi->send_transac_email: {e}")
         return False
+if __name__ == "__main__":
+    generation_details = "Example: 5 blogs generated and posted successfully."
+    if send_email(generation_details):
+        print("Email sent successfully.")
+    else:
+        print("Email sending failed.")

summarize_paper.py CHANGED Viewed

@@ -1,31 +1,40 @@
 import os
-import dotenv
 import json
 from gradio_client import Client
 dotenv.load_dotenv()
-def summarize_paper(pdf_url, paper_id, access_key):
-    mindmap = None
     summary = None
     try:
-        summarizer_client = Client(
-            "raannakasturi/ReXploreAPI",
-            hf_token=os.getenv("HF_API_TOKEN"),
-        )
         result = summarizer_client.predict(
-                url=pdf_url,
-                id=paper_id,
-                access_key=access_key,
-                api_name="/rexplore_summarizer"
         )
         if result:
             data = json.loads(result[0])
-            print
-            if data["mindmap_status"] == "success":
-                mindmap = data["mindmap"]
-            if data["summary_status"] == "success":
-                summary = data["summary"]
     except Exception as e:
         print(f"Error summarizing paper: {e}")
     return summary, mindmap

 import os
 import json
 from gradio_client import Client
+import dotenv
 dotenv.load_dotenv()
+def summarize_paper(pdf_url: str, paper_id: str, access_key: str):
     summary = None
+    mindmap = None
     try:
+        hf_api_token = os.getenv("HF_API_TOKEN")
+        if not hf_api_token:
+            raise ValueError("HF_API_TOKEN not found in environment variables.")
+        summarizer_client = Client("raannakasturi/ReXploreAPI", hf_token=hf_api_token)
         result = summarizer_client.predict(
+            url=pdf_url,
+            id=paper_id,
+            access_key=access_key,
+            api_name="/rexplore_summarizer"
         )
         if result:
             data = json.loads(result[0])
+            if data.get("mindmap_status") == "success":
+                mindmap = data.get("mindmap")
+            if data.get("summary_status") == "success":
+                summary = data.get("summary")
     except Exception as e:
         print(f"Error summarizing paper: {e}")
     return summary, mindmap
+if __name__ == "__main__":
+    test_pdf_url = "https://example.com/paper.pdf"
+    test_paper_id = "12345"
+    test_access_key = "your_access_key_here"
+    paper_summary, paper_mindmap = summarize_paper(test_pdf_url, test_paper_id, test_access_key)
+    print("Summary:", paper_summary)
+    print("Mindmap:", paper_mindmap)