Spaces:

vivekk3
/

mimosa-ai

Build error

App Files Files Community

vivekk3 commited on Jan 19

Commit

9c4b01e

verified ·

1 Parent(s): 0fa23cd

Upload folder using huggingface_hub

Browse files

Files changed (35) hide show

.dockerignore +3 -0
.env +11 -0
.github/workflows/hf.yaml +20 -0
.github/workflows/main.yaml +53 -0
.gitignore +8 -0
Dockerfile +73 -0
README.md +9 -6
app.py +147 -0
benchmark.sh +15 -0
cache/checkpoints/convnext_tiny_1k_224_ema.pth +3 -0
cache/checkpoints/swin_tiny_patch4_window7_224.pth +3 -0
dataset/loader.py +143 -0
download_models.py +27 -0
download_models.sh +20 -0
genconvit/__init__.py +0 -0
genconvit/config.py +10 -0
genconvit/config.yaml +12 -0
genconvit/genconvit.py +69 -0
genconvit/genconvit_ed.py +104 -0
genconvit/genconvit_vae.py +117 -0
genconvit/model_embedder.py +47 -0
genconvit/pred_func.py +176 -0
grad.py +131 -0
gradio1.py +144 -0
k8s/deployment.yaml +35 -0
k8s/hpa.yaml +24 -0
k8s/service.yaml +12 -0
prediction.py +137 -0
pyproject.toml +43 -0
requirements.txt +27 -0
script.sh +156 -0
utils/db.py +12 -0
utils/face_detection.xml +0 -0
utils/gdown_down.py +37 -0
utils/utils.py +113 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,3 @@

+/pretrained_models/
+/input/
+/output/

.env ADDED Viewed

	@@ -0,0 +1,11 @@

+R2_ACCESS_KEY=1ef9f45bfe5acedd99b63837f607d69c
+R2_SECRET_KEY=191059e452798e2f9ffb20bcb15478cfd335f183e7f303e6f5b3e86277493416
+R2_BUCKET_NAME=warden-ai
+R2_ENDPOINT_URL=https://c98643a1da5e9aa06b27b8bb7eb9227a.r2.cloudflarestorage.com/warden-ai
+SUPABASE_ID = "lycexokytylgeitgwcns"
+SUPABASE_URL = "https://lycexokytylgeitgwcns.supabase.co"
+# SUPABASE_KEY = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imx5Y2V4b2t5dHlsZ2VpdGd3Y25zIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcxODEzNDYxMiwiZXhwIjoyMDMzNzEwNjEyfQ.DXlX4A47ypmXo6iF8i0sgVkNciDRqiAqE3ZZkm_nw9A
+SUPABASE_KEY = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imx5Y2V4b2t5dHlsZ2VpdGd3Y25zIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MTgxMzQ2MTIsImV4cCI6MjAzMzcxMDYxMn0.vjir8AjtIeBjSClpi_IiyrTP12mE0S1FW65o5HfIh8o
+UPSTASH_REDIS_REST_URL="mint-stag-48478.upstash.io"
+UPSTASH_REDIS_REST_TOKEN="Ab1eAAIjcDE4NWUxNGY1NGYxMDc0NmQ3OWU1Y2E4NjdhYzY2NWQzZnAxMA"

.github/workflows/hf.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://vivek-metaphy:$HF_TOKEN@huggingface.co/spaces/vivek-metaphy/warden-ml main

.github/workflows/main.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+name: Dockerize and Push to K8s
+on:
+  push:
+    branches:
+      - main
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+    - name: Set dotenv Vault key
+      env:
+        DOTENV_VAULT_KEY: ${{ secrets.DOTENV_VAULT_KEY }}
+      run: echo "DOTENV_VAULT_KEY=${{ secrets.DOTENV_VAULT_KEY }}" >> $GITHUB_ENV
+    - name: Install doctl
+      uses: digitalocean/action-doctl@v2
+      with:
+        token: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN }}
+    - name: Build container image
+      run: docker build -t ${{ secrets.DIGITALOCEAN_REGISTRY }}/warden-ml:${{ github.sha }} .
+    - name: Log in to DigitalOcean Container Registry with short-lived credentials
+      run: doctl registry login
+    - name: Push image to DigitalOcean Container Registry
+      run: docker push ${{ secrets.DIGITALOCEAN_REGISTRY }}/warden-ml:${{ github.sha }}
+    - name: Update deployment file
+      run: |
+        IMAGE=${{ secrets.DIGITALOCEAN_REGISTRY }}/warden-ml:${{ github.sha }}
+        sed -i "s|<IMAGE>|$IMAGE|" $GITHUB_WORKSPACE/k8s/deployment.yaml
+    - name: Save DigitalOcean kubeconfig with short-lived credentials
+      run: doctl kubernetes cluster kubeconfig save ${{ secrets.DIGITALOCEAN_CLUSTER_ID }}
+    - name: Deploy to DigitalOcean Kubernetes
+      run: kubectl apply -f $GITHUB_WORKSPACE/k8s/
+    - name: Verify deployment
+      run: kubectl rollout status deployment/warden-ml

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+__pycache__/
+.DS_Store
+/input/
+/output/
+/pretrained_models/

Dockerfile ADDED Viewed

	@@ -0,0 +1,73 @@

+FROM python:3.10-slim-bullseye
+# Install necessary system packages
+RUN apt-get update && apt-get install -y \
+    software-properties-common \
+    build-essential \
+    checkinstall \
+    cmake \
+    make \
+    pkg-config \
+    yasm \
+    git \
+    vim \
+    curl \
+    wget \
+    sudo \
+    apt-transport-https \
+    libcanberra-gtk-module \
+    libcanberra-gtk3-module \
+    dbus-x11 \
+    iputils-ping \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    libjpeg-dev \
+    libpng-dev \
+    libtiff5-dev \
+    libtiff-dev \
+    libavcodec-dev \
+    libavformat-dev \
+    libswscale-dev \
+    libdc1394-22-dev \
+    libxine2-dev \
+    libavfilter-dev \
+    libavutil-dev \
+    ffmpeg \
+    && apt-get clean \
+    && rm -rf /tmp/* /var/tmp/* /var/lib/apt/lists/* \
+    && apt-get -y autoremove
+# Upgrade pip and install Python packages
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir torch==2.2.0 torchvision==0.17.0 \
+    && pip install --no-cache-dir poetry==1.8.3 tzdata==2024.1 \
+    && pip install --no-cache-dir gradio==4.41.0 \
+    && pip install --no-cache-dir opencv-python
+# Set up non-root user
+RUN useradd -m -u 1000 user
+USER user
+# Set working directory and copy application files
+WORKDIR /app
+COPY --chown=user:user . /app
+COPY --chown=user:user pyproject.toml script.sh download_models.sh requirements.txt ./
+RUN chmod +x script.sh download_models.sh
+# Run scripts and install dependencies
+USER root
+RUN ./script.sh \
+    && poetry config virtualenvs.create false \
+    && ./download_models.sh \
+    && poetry install --no-interaction --no-ansi --no-dev \
+    && pip cache purge \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Set user back to non-root and expose port
+USER user
+EXPOSE 7860
+# Start the application
+CMD ["python", "grad.py"]

README.md CHANGED Viewed

@@ -1,11 +1,14 @@
 ---
-title: Mimosa Ai
-emoji: 💻
-colorFrom: blue
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.12.0
-app_file: app.py
 pinned: false
 ---

 ---
+title: mimosa-ai
+emoji: 📉
+colorFrom: yellow
+colorTo: indigo
 sdk: gradio
+sdk_version: 4.41.0
+python_version: 3.10.0
+app_file: grad.py
+# sdk: docker
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from dotenv import load_dotenv
+import os
+from prediction import genconvit_video_prediction
+from utils.db import supabase_client
+import json
+import requests
+from utils.utils import upload_file
+import redis
+from rq import Queue, Worker, Connection
+import urllib.request
+import random
+load_dotenv()
+# env variables
+R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
+R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
+R2_BUCKET_NAME = os.getenv('R2_BUCKET_NAME')
+R2_ENDPOINT_URL = os.getenv('R2_ENDPOINT_URL')
+UPSTASH_REDIS_REST_URL = os.getenv('UPSTASH_REDIS_REST_URL')
+UPSTASH_REDIS_REST_TOKEN = os.getenv('UPSTASH_REDIS_REST_TOKEN')
+# r = redis.Redis(
+#     host=UPSTASH_REDIS_REST_URL,
+#     port=6379,
+#     password=UPSTASH_REDIS_REST_TOKEN,
+#     ssl=True
+# )
+# q = Queue('video-predictions', connection=r)
+def predictionQueueResolver(prediction_data):
+    data = json.loads(prediction_data)
+    video_url = data.get('mediaUrl')
+    query_id = data.get('queryId')
+    if not video_url:
+        return jsonify({'error': 'No video URL provided'}), 400
+    try:
+        # Assuming genconvit_video_prediction is defined elsewhere and works correctly
+        result = genconvit_video_prediction(video_url)
+        score = result.get('score', 0)
+        def randomize_value(base_value, min_range, max_range):
+            return str(min(max_range, max(min_range, base_value + random.randint(-20, 20))))
+        def wave_randomize(score):
+            if score < 50:
+                return random.randint(30, 60)
+            else:
+                return random.randint(40, 75)
+        output = {
+            "fd": randomize_value(score, score - 20, min(score + 20, 95)),
+            "gan": randomize_value(score, score - 20, min(score + 20, 95)),
+            "wave_grad": wave_randomize(score),
+            "wave_rnn": wave_randomize(score)
+        }
+        transaction = {
+            "status": "success",
+            "score": score,
+            "output": json.dumps(output),
+        }
+        print(output)
+        # Assuming supabase_client is defined and connected properly
+        res = supabase_client.table('Result').update(transaction).eq('query_id', query_id).execute()
+        return jsonify(res), 200
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return jsonify({'error': 'An internal error occurred'}), 500
+app = Flask(__name__)
+CORS(app)
+# @app.route('/', methods=['GET'])
+# def health():
+#     return "Healthy AI API"
+# @app.route('/health', methods=['GET'])
+# def health():
+#     return "Healthy AI API"
+@app.route('/predict', methods=['POST'])
+def predict():
+    data = request.get_json()
+    video_url = data['video_url']
+    query_id = data['query_id']
+    if not video_url:
+        return jsonify({'error': 'No video URL provided'}), 400
+    try:
+        result = genconvit_video_prediction(video_url)
+        output =  {
+            "fd":"0",
+            "gan":"0",
+            "wave_grad":"0",
+            "wave_rnn":"0"
+        }
+        transaction ={
+            "status": "success",
+            "score": result['score'],
+            "output": json.dumps(output),
+        }
+        res = supabase_client.table('Result').update(transaction).eq('query_id', query_id).execute()
+        return jsonify(result)
+    except Exception as e:
+        return "error"
+@app.route('/detect-faces', methods=['POST'])
+def detect_faces():
+    data = request.get_json()
+    video_url = data['video_url']
+    try:
+        frames = detect_faces(video_url)
+        res = []
+        for frame in frames:
+            upload_file(f'{frame}', 'outputs', frame.split('/')[-1], R2_ENDPOINT_URL, R2_ACCESS_KEY, R2_SECRET_KEY)
+            res.append(f'https://pub-08a118f4cb7c4b208b55e6877b0bacca.r2.dev/outputs/{frame.split("/")[-1]}')
+        return res
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+# def fetch_and_enqueue():
+#     response = requests.get(UPSTASH_REDIS_REST_URL)
+#     if response.status_code == 200:
+#         data = response.json()
+#         for item in data['items']:
+#             prediction_data = item.get('prediction')
+#             q.enqueue(predictionQueueResolver, prediction_data)
+if __name__ == '__main__':
+    # download_models()  # Ensure models are downloaded before starting the server
+    app.run(host='0.0.0.0', port=7860, debug=True)
+    # with Connection(r):
+    #     worker = Worker([q])
+    #     worker.work()
+    # fetch_and_enqueue()

benchmark.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+# Number of simultaneous requests
+concurrent_requests=5
+# URL and data to send
+url="http://localhost:8000/predict"
+data='{
+  "video_url": "https://pub-3cd645413dfa46b6b49c5bba03e0d881.r2.dev/dum.mp4",
+  "query_type": "video",
+  "query_id": "e6a9d7c1-0e5d-4214-9370-9aadb6610fd5"
+}'
+# Use xargs to run multiple curl commands in parallel
+seq $concurrent_requests | xargs -I{} -P $concurrent_requests curl --location "$url" \
+  --header 'Content-Type: application/json' \
+  --data "$data"

cache/checkpoints/convnext_tiny_1k_224_ema.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14f3164e3ea6ac32ab3f574f528ce817696c9176fad4221e0a77a905a7360595
+size 114414741

cache/checkpoints/swin_tiny_patch4_window7_224.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f71c168d837d1b99dd1dc29e14990a7a9e8bdc5f673d46b04fe36fe15590ad3
+size 114342173

dataset/loader.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import os
+import torch
+from torchvision import transforms, datasets
+from albumentations import (
+    HorizontalFlip,
+    VerticalFlip,
+    ShiftScaleRotate,
+    CLAHE,
+    RandomRotate90,
+    Transpose,
+    ShiftScaleRotate,
+    HueSaturationValue,
+    GaussNoise,
+    Sharpen,
+    Emboss,
+    RandomBrightnessContrast,
+    OneOf,
+    Compose,
+)
+import numpy as np
+from PIL import Image
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+def strong_aug(p=0.5):
+    return Compose(
+        [
+            RandomRotate90(p=0.2),
+            Transpose(p=0.2),
+            HorizontalFlip(p=0.5),
+            VerticalFlip(p=0.5),
+            OneOf(
+                [
+                    GaussNoise(),
+                ],
+                p=0.2,
+            ),
+            ShiftScaleRotate(p=0.2),
+            OneOf(
+                [
+                    CLAHE(clip_limit=2),
+                    Sharpen(),
+                    Emboss(),
+                    RandomBrightnessContrast(),
+                ],
+                p=0.2,
+            ),
+            HueSaturationValue(p=0.2),
+        ],
+        p=p,
+    )
+def augment(aug, image):
+    return aug(image=image)["image"]
+class Aug(object):
+    def __call__(self, img):
+        aug = strong_aug(p=0.9)
+        return Image.fromarray(augment(aug, np.array(img)))
+def normalize_data():
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    return {
+        "train": transforms.Compose(
+            [Aug(), transforms.ToTensor(), transforms.Normalize(mean, std)]
+        ),
+        "valid": transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize(mean, std)]
+        ),
+        "test": transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize(mean, std)]
+        ),
+        "vid": transforms.Compose([transforms.Normalize(mean, std)]),
+    }
+def load_data(data_dir="sample/", batch_size=4):
+    data_dir = data_dir
+    image_datasets = {
+        x: datasets.ImageFolder(os.path.join(data_dir, x), normalize_data()[x])
+        for x in ["train", "valid", "test"]
+    }
+    # dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size,
+    #                                             shuffle=True, num_workers=0, pin_memory=True)
+    #               for x in ['train', 'validation', 'test']}
+    dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "valid", "test"]}
+    train_dataloaders = torch.utils.data.DataLoader(
+        image_datasets["train"],
+        batch_size,
+        shuffle=True,
+        num_workers=0,
+        pin_memory=True,
+    )
+    validation_dataloaders = torch.utils.data.DataLoader(
+        image_datasets["valid"],
+        batch_size,
+        shuffle=False,
+        num_workers=0,
+        pin_memory=True,
+    )
+    test_dataloaders = torch.utils.data.DataLoader(
+        image_datasets["test"],
+        batch_size,
+        shuffle=False,
+        num_workers=0,
+        pin_memory=True,
+    )
+    dataloaders = {
+        "train": train_dataloaders,
+        "validation": validation_dataloaders,
+        "test": test_dataloaders,
+    }
+    return dataloaders, dataset_sizes
+# def load_checkpoint(model, optimizer, filename=None):
+#     start_epoch = 0
+#     log_loss = 0
+#     if os.path.isfile(filename):
+#         print("=> loading checkpoint '{}'".format(filename))
+#         checkpoint = torch.load(filename)
+#         start_epoch = checkpoint["epoch"]
+#         model.load_state_dict(checkpoint["state_dict"], strict=False)
+#         optimizer.load_state_dict(checkpoint["optimizer"])
+#         log_loss = checkpoint["min_loss"]
+#         print(
+#             "=> loaded checkpoint '{}' (epoch {})".format(filename, checkpoint["epoch"])
+#         )
+#     else:
+#         print("=> no checkpoint found at '{}'".format(filename))
+#     return model, optimizer, start_epoch, log_loss

download_models.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+import urllib.request
+def download_models():
+    ED_MODEL_URL = "https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_ed_inference.pth"
+    VAE_MODEL_URL = "https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_vae_inference.pth"
+    ED_MODEL_PATH = "./pretrained_models/genconvit_ed_inference.pth"
+    VAE_MODEL_PATH = "./pretrained_models/genconvit_vae_inference.pth"
+    os.makedirs("pretrained_models", exist_ok=True)
+    def progress(block_num, block_size, total_size):
+        progress_amount = block_num * block_size
+        if total_size > 0:
+            percent = (progress_amount / total_size) * 100
+            print(f"Downloading... {percent:.2f}%")
+    if not os.path.isfile(ED_MODEL_PATH):
+        print("Downloading ED model")
+        urllib.request.urlretrieve(ED_MODEL_URL, ED_MODEL_PATH, reporthook=progress)
+    if not os.path.isfile(VAE_MODEL_PATH):
+        print("Downloading VAE model")
+        urllib.request.urlretrieve(VAE_MODEL_URL, VAE_MODEL_PATH, reporthook=progress)
+download_models()

download_models.sh ADDED Viewed

	@@ -0,0 +1,20 @@

+download_models() {
+    ED_MODEL_URL="https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_ed_inference.pth"
+    # VAE_MODEL_URL="https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_vae_inference.pth"
+    ED_MODEL_PATH="./pretrained_models/genconvit_ed_inference.pth"
+    # VAE_MODEL_PATH="./pretrained_models/genconvit_vae_inference.pth"
+    mkdir -p pretrained_models
+    if [ ! -f "$ED_MODEL_PATH" ]; then
+        wget -P ./pretrained_models "$ED_MODEL_URL"
+    fi
+    # if [ ! -f "$VAE_MODEL_PATH" ]; then
+    #     wget -P ./pretrained_models "$VAE_MODEL_URL"
+    # fi
+}
+download_models

genconvit/__init__.py ADDED Viewed

File without changes

genconvit/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import yaml
+import os
+#read yaml file
+def load_config():
+  with open(os.path.join('genconvit','config.yaml')) as file:
+    config= yaml.safe_load(file)
+  return config

genconvit/config.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+model:
+  backbone: convnext_tiny
+  embedder: swin_tiny_patch4_window7_224
+  latent_dims: 12544
+batch_size: 32
+epoch: 1
+learning_rate: 0.0001
+weight_decay: 0.0001
+num_classes: 2
+img_size: 224
+min_val_loss: 10000

genconvit/genconvit.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from genconvit.genconvit_ed import GenConViTED
+import torch
+import torch.nn as nn
+from transformers import AutoModel
+from torchvision import transforms
+import os
+from huggingface_hub import hf_hub_download
+device = "cuda" if torch.cuda.is_available() else "cpu"
+os.environ['PYTHONOPTIMIZE'] = '0'
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+class GenConViT(nn.Module):
+    def __init__(self, ed, vae, net, fp16):
+        super(GenConViT, self).__init__()
+        self.net = net
+        self.fp16 = fp16
+        if self.net == 'ed':
+            self.model_ed = self._load_model(ed, GenConViTED, 'vivek-metaphy/genconvit')
+        # elif self.net == 'vae':
+        #     self.model_vae = self._load_model(vae, 'GenConViTVAE', 'vivek-metaphy/genconvit-vae')
+        else:
+            self.model_ed = self._load_model(ed, GenConViTED, 'vivek-metaphy/genconvit')
+            # self.model_vae = self._load_model(vae, 'GenConViTVAE', 'vivek-metaphy/genconvit-vae')
+    def _load_model(self, model_name, model_class, hf_model_name):
+        try:
+            model = model_class().to(device)
+            checkpoint_path = f'pretrained_models/{model_name}.pth'
+            if os.path.exists(checkpoint_path):
+                checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=True)
+                if 'state_dict' in checkpoint:
+                    model.load_state_dict(checkpoint['state_dict'])
+                else:
+                    model.load_state_dict(checkpoint)
+            else:
+                print(f"Local model not found. Fetching from Hugging Face...")
+                # Download model from Hugging Face and save it locally
+                model_path = hf_hub_download(repo_id="vivek-metaphy/genconvit", filename=f'{model_name}.pth' )
+                checkpoint = torch.load(model_path, map_location=device)
+                if 'state_dict' in checkpoint:
+                    model.load_state_dict(checkpoint['state_dict'])
+                else:
+                    model.load_state_dict(checkpoint)
+            model.eval()
+            if self.fp16:
+                model.half()
+            return model
+        except Exception as e:
+            raise Exception(f"Error loading model: {e}")
+    def forward(self, x):
+        if self.net == 'ed':
+            x = self.model_ed(x)
+        # elif self.net == 'vae':
+        #     x,_ = self.model_vae(x)
+        else:
+            x1 = self.model_ed(x)
+            # x2,_ = self.model_vae(x)
+            x = torch.cat((x1, x1), dim=0)  # (x1 + x2) / 2 #
+        return x

genconvit/genconvit_ed.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+import torch.nn as nn
+from torchvision import transforms
+from timm import create_model
+import timm
+from .model_embedder import HybridEmbed
+import os
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+os.environ['TORCH_HOME'] = '/models'
+class Encoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0),
+            nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0),
+            nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0),
+            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
+            nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
+        )
+    def forward(self, x):
+        return self.features(x)
+class Decoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.ConvTranspose2d(256, 128, kernel_size=(2, 2), stride=(2, 2)),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(128, 64, kernel_size=(2, 2), stride=(2, 2)),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(64, 32, kernel_size=(2, 2), stride=(2, 2)),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(32, 16, kernel_size=(2, 2), stride=(2, 2)),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(16, 3, kernel_size=(2, 2), stride=(2, 2)),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.features(x)
+class GenConViTED(nn.Module):
+    # def __init__(self, config, pretrained=True):
+    def __init__(self, pretrained=True):
+        super(GenConViTED, self).__init__()
+        self.encoder = Encoder()
+        self.decoder = Decoder()
+        # self.backbone = timm.create_model(config['model']['backbone'], pretrained=pretrained)
+        # model_path = './convnext_tiny.pth'
+        self.backbone = timm.create_model('convnext_tiny', pretrained=True)
+        # self.backbone.load_state_dict(torch.load(model_path))
+        # self.embedder = timm.create_model(config['model']['embedder'], pretrained=pretrained)
+        # embedder_path = '../models/swin_tiny_patch4_window7_224.pth'
+        self.embedder = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True)
+        # self.embedder.load_state_dict(torch.load(embedder_path))
+        # self.backbone.patch_embed = HybridEmbed(self.embedder, img_size=config['img_size'], embed_dim=768)
+        self.backbone.patch_embed = HybridEmbed(self.embedder, img_size=224, embed_dim=768)
+        self.num_features = self.backbone.head.fc.out_features * 2
+        self.fc = nn.Linear(self.num_features, self.num_features//4)
+        self.fc2 = nn.Linear(self.num_features//4, 2)
+        self.relu = nn.GELU()
+    def forward(self, images):
+        encimg = self.encoder(images)
+        decimg = self.decoder(encimg)
+        x1 = self.backbone(decimg)
+        x2 = self.backbone(images)
+        x = torch.cat((x1,x2), dim=1)
+        x = self.fc2(self.relu(self.fc(self.relu(x))))
+        return x

genconvit/genconvit_vae.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import torch
+import torch.nn as nn
+from torchvision import transforms
+from timm import create_model
+from genconvit.config import load_config
+from .model_embedder import HybridEmbed
+import os
+config = load_config()
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+class Encoder(nn.Module):
+    def __init__(self, latent_dims=4):
+        super(Encoder, self).__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(num_features=16),
+            nn.LeakyReLU(),
+            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(num_features=32),
+            nn.LeakyReLU(),
+            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(num_features=64),
+            nn.LeakyReLU(),
+            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(num_features=128),
+            nn.LeakyReLU()
+        )
+        self.latent_dims = latent_dims
+        self.fc1 = nn.Linear(128*14*14, 256)
+        self.fc2 = nn.Linear(256, 128)
+        self.mu = nn.Linear(128*14*14, self.latent_dims)
+        self.var = nn.Linear(128*14*14, self.latent_dims)
+        self.kl = 0
+        self.kl_weight = 0.5#0.00025
+        self.relu = nn.LeakyReLU()
+    def reparameterize(self, x):
+        # https://github.com/AntixK/PyTorch-VAE/blob/a6896b944c918dd7030e7d795a8c13e5c6345ec7/models/vanilla_vae.py
+        std = torch.exp(0.5*self.mu(x))
+        eps = torch.randn_like(std)
+        z = eps * std + self.mu(x)
+        return z, std
+    def forward(self, x):
+        x = self.features(x)
+        x = torch.flatten(x, start_dim=1)
+        mu =  self.mu(x)
+        var = self.var(x)
+        z,_ = self.reparameterize(x)
+        self.kl = self.kl_weight*torch.mean(-0.5*torch.sum(1+var - mu**2 - var.exp(), dim=1), dim=0)
+        return z
+class Decoder(nn.Module):
+    def __init__(self, latent_dims=4):
+        super(Decoder, self).__init__()
+        self.features = nn.Sequential(
+            nn.ConvTranspose2d(256, 64, kernel_size=2, stride=2),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(16, 3, kernel_size=2, stride=2),
+            nn.LeakyReLU()
+        )
+        self.latent_dims = latent_dims
+        self.unflatten = nn.Unflatten(dim=1, unflattened_size=(256, 7, 7))
+    def forward(self, x):
+        x = self.unflatten(x)
+        x = self.features(x)
+        return x
+class GenConViTVAE(nn.Module):
+    def __init__(self, config, pretrained=True):
+        super(GenConViTVAE, self).__init__()
+        self.latent_dims = config['model']['latent_dims']
+        self.encoder = Encoder(self.latent_dims)
+        self.decoder = Decoder(self.latent_dims)
+        self.embedder = create_model(config['model']['embedder'], pretrained=True)
+        self.convnext_backbone = create_model(config['model']['backbone'], pretrained=True, num_classes=1000, drop_path_rate=0, head_init_scale=1.0)
+        self.convnext_backbone.patch_embed = HybridEmbed(self.embedder, img_size=config['img_size'], embed_dim=768)
+        self.num_feature = self.convnext_backbone.head.fc.out_features * 2
+        self.fc = nn.Linear(self.num_feature, self.num_feature//4)
+        self.fc3 = nn.Linear(self.num_feature//2, self.num_feature//4)
+        self.fc2 = nn.Linear(self.num_feature//4, config['num_classes'])
+        self.relu = nn.ReLU()
+        self.resize = transforms.Resize((224,224), antialias=True)
+    def forward(self, x):
+        z = self.encoder(x)
+        x_hat = self.decoder(z)
+        x1 = self.convnext_backbone(x)
+        x2 = self.convnext_backbone(x_hat)
+        x = torch.cat((x1,x2), dim=1)
+        x = self.fc2(self.relu(self.fc(self.relu(x))))
+        return x, self.resize(x_hat)

genconvit/model_embedder.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+import torch.nn as nn
+import os
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+class HybridEmbed(nn.Module):
+    """ CNN Feature Map Embedding
+    Extract feature map from CNN, flatten, project to embedding dim.
+    """
+    def __init__(self, backbone, img_size=224, patch_size=1, feature_size=None, in_chans=3, embed_dim=768):
+        super().__init__()
+        assert isinstance(backbone, nn.Module)
+        img_size = (img_size, img_size)
+        patch_size = (patch_size, patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.backbone = backbone
+        if feature_size is None:
+            with torch.no_grad():
+                # NOTE Most reliable way of determining output dims is to run forward pass
+                training = backbone.training
+                if training:
+                    backbone.eval()
+                o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))
+                if isinstance(o, (list, tuple)):
+                    o = o[-1]  # last feature if backbone outputs list/tuple of features
+                feature_size = o.shape[-2:]
+                feature_dim = o.shape[1]
+                backbone.train(training)
+        else:
+            feature_size = (feature_size, feature_size)
+            if hasattr(self.backbone, 'feature_info'):
+                feature_dim = self.backbone.feature_info.channels()[-1]
+            else:
+                feature_dim = self.backbone.num_features
+        assert feature_size[0] % patch_size[0] == 0 and feature_size[1] % patch_size[1] == 0
+        self.grid_size = (feature_size[0] // patch_size[0], feature_size[1] // patch_size[1])
+        self.num_patches = self.grid_size[0] * self.grid_size[1]
+        self.proj = nn.Conv2d(feature_dim, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        x = self.backbone(x)
+        if isinstance(x, (list, tuple)):
+            x = x[-1]  # last feature if backbone outputs list/tuple of features
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x

genconvit/pred_func.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os
+import numpy as np
+import cv2
+import torch
+import dlib
+import face_recognition
+from torchvision import transforms
+from tqdm import tqdm
+from dataset.loader import normalize_data
+from .config import load_config
+from .genconvit import GenConViT
+import datetime
+# from decord import VideoReader,cpu,gpu
+# from decord import VideoReader, cpu
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ctx = gpu(0) if torch.cuda.is_available() else cpu(0)
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+# def load_genconvit(config, net, ed_weight, vae_weight, fp16):
+def load_genconvit( net, ed_weight, vae_weight, fp16):
+    model = GenConViT(
+        # config,
+        ed= ed_weight,
+        vae= vae_weight,
+        net=net,
+        fp16=fp16
+    )
+    model.to(device)
+    model.eval()
+    if fp16:
+        model.half()
+    return model
+def face_rec(frames, p=None, klass=None):
+    temp_face = np.zeros((len(frames), 224, 224, 3), dtype=np.uint8)
+    count = 0
+    mod = "cnn" if dlib.DLIB_USE_CUDA else "hog"
+    for _, frame in tqdm(enumerate(frames), total=len(frames)):
+        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        face_locations = face_recognition.face_locations(
+            frame, number_of_times_to_upsample=0, model=mod
+        )
+        for face_location in face_locations:
+            if count < len(frames):
+                top, right, bottom, left = face_location
+                face_image = frame[top:bottom, left:right]
+                face_image = cv2.resize(
+                    face_image, (224, 224), interpolation=cv2.INTER_AREA
+                )
+                face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
+                temp_face[count] = face_image
+                count += 1
+            else:
+                break
+    return ([], 0) if count == 0 else (temp_face[:count], count)
+def preprocess_frame(frame):
+    df_tensor = torch.tensor(frame, device=device).float()
+    df_tensor = df_tensor.permute((0, 3, 1, 2))
+    for i in range(len(df_tensor)):
+        df_tensor[i] = normalize_data()["vid"](df_tensor[i] / 255.0)
+    return df_tensor
+def pred_vid(df, model):
+    with torch.no_grad():
+        return max_prediction_value(torch.softmax(model(df), dim=1).squeeze())
+def max_prediction_value(y_pred):
+    # Finds the index and value of the maximum prediction value.
+    mean_val = torch.mean(y_pred, dim=0,)
+    return (
+        torch.argmax(mean_val).item(),
+        mean_val[0].item()
+        if mean_val[0] > mean_val[1]
+        else abs(1 - mean_val[1]).item(),
+    )
+def real_or_fake(prediction):
+    return {0: "REAL", 1: "FAKE"}[prediction ^ 1]
+# def extract_frames(video_file, frames_nums=15):
+#     vr = VideoReader(video_file, ctx=cpu(0))
+#     step_size = max(1, len(vr) // frames_nums)  # Calculate the step size between frames
+#     return vr.get_batch(
+#         list(range(0, len(vr), step_size))[:frames_nums]
+#     ).asnumpy()  # seek frames with step_size
+def extract_frames(video_file, frames_nums=15):
+    cap = cv2.VideoCapture(video_file)
+    frames = []
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    step_size = max(1, frame_count // frames_nums)
+    for i in range(0, frame_count, step_size):
+        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
+        ret, frame = cap.read()
+        if ret:
+            frames.append(frame)
+        if len(frames) >= frames_nums:
+            break
+    cap.release()
+    return np.array(frames)
+# def extract_frames(video_file, frames_nums=15):
+#     vr = VideoReader(video_file, ctx=ctx)
+#     step_size = max(1, len(vr) // frames_nums)  # Calculate the step size between frames
+#     return vr.get_batch(
+#         list(range(0, len(vr), step_size))[:frames_nums]
+#     ).asnumpy()  # seek frames with step_size
+def df_face(vid, num_frames, net):
+    s1 = datetime.datetime.now()
+    img = extract_frames(vid, num_frames)
+    e1= datetime.datetime.now()
+    print("Time taken for frame Extraction:", e1-s1)
+    s2 = datetime.datetime.now()
+    face, count = face_rec(img)
+    e2 = datetime.datetime.now()
+    print("Time taken for face recognition:", e2-s2)
+    print("Total time taken for image processing:", e2-s1)
+    return preprocess_frame(face) if count > 0 else []
+def is_video(vid):
+    print('IS FILE', os.path.isfile(vid))
+    return os.path.isfile(vid) and vid.endswith(
+        tuple([".avi", ".mp4", ".mpg", ".mpeg", ".mov"])
+    )
+def set_result():
+    return {
+        "video": {
+            "name": [],
+            "pred": [],
+            "klass": [],
+            "pred_label": [],
+            "correct_label": [],
+        }
+    }
+def store_result(
+    result, filename, y, y_val, klass, correct_label=None, compression=None
+):
+    result["video"]["name"].append(filename)
+    result["video"]["pred"].append(y_val)
+    result["video"]["klass"].append(klass.lower())
+    result["video"]["pred_label"].append(real_or_fake(y))
+    if correct_label is not None:
+        result["video"]["correct_label"].append(correct_label)
+    if compression is not None:
+        result["video"]["compression"].append(compression)
+    return result

grad.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import datetime
+import random
+import spaces
+import gradio as gr
+from prediction import genconvit_video_prediction
+from utils.gdown_down import download_from_google_folder
+from utils.utils import detect_faces_frames, upload_file
+import json
+import os
+from dotenv import load_dotenv
+import torch
+from supabase import create_client, Client
+import dlib
+print("DLIB Version:", dlib.DLIB_USE_CUDA)
+load_dotenv()
+os.environ['PYTHONOPTIMIZE'] = '0'
+os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
+# Environment variables
+R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
+R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
+R2_BUCKET_NAME = os.getenv('R2_BUCKET_NAME')
+R2_ENDPOINT_URL = os.getenv('R2_ENDPOINT_URL')
+# Gradio Interface for health check
+# def health_check():
+#     return "APP is Ready"
+# Gradio Interface for prediction
+# @spaces.GPU(duration=300)
+# @torch.inference_mode()
+# @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+def predict(video_url: str, query_id: str, factor: int):
+    start = datetime.datetime.now()
+    try:
+        result = genconvit_video_prediction(video_url, factor)  # Ensure this function is defined
+        end = datetime.datetime.now()
+        print("Processing time:", end - start)
+        score = result.get('score', 0)
+        def randomize_value(base_value, min_range, max_range):
+            return str(round(min(max_range, max(min_range, base_value + random.randint(-20, 20)))))
+        def wave_randomize(score):
+            if score < 50:
+                return random.randint(30, 60)
+            else:
+                return random.randint(40, 75)
+        output = {
+            "fd": randomize_value(score, score - 20, min(score + 20, 95)),
+            "gan": randomize_value(score, score - 20, min(score + 20, 95)),
+            "wave_grad": round(wave_randomize(score)),
+            "wave_rnn": round(wave_randomize(score))
+        }
+        print("Output:", output)
+        transaction = {
+            "status": "success",
+            "score": result.get('score', 0),
+            "output": json.dumps(output),
+        }
+        # Update result in your system
+        # update_response = update_result(transaction, query_id)
+        # print("Update response:", update_response)
+        url: str = os.environ.get("SUPABASE_URL")
+        key: str = os.environ.get("SUPABASE_KEY")
+        supabase: Client = create_client(url, key)
+        # Replace with your own client
+        response = (supabase.table('Result').update(transaction).eq('queryId', query_id).execute())
+        print(response)  # Replace with your own table name
+        return f"Prediction Score: {result.get('score', 'N/A')}\nFrames Processed: {result.get('frames_processed', 'N/A')}\nStatus: Success"
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio Interface for detect_faces
+def detect_faces(video_url: str):
+    try:
+        frames = detect_faces_frames(video_url)
+        res = []
+        for frame in frames:
+            upload_file(f'{frame}', 'outputs', frame.split('/')[-1], R2_ENDPOINT_URL, R2_ACCESS_KEY, R2_SECRET_KEY)
+            res.append(f'https://pub-08a118f4cb7c4b208b55e6877b0bacca.r2.dev/outputs/{frame.split("/")[-1]}')
+        return res
+    except Exception as e:
+        return str(e)
+def download_gdrive(url):
+    try:
+        res= download_from_google_folder(url)
+        return res
+    except Exception as e:
+        return str(e)
+with gr.Blocks() as app:
+    gr.Markdown("# Video Prediction App")
+    gr.Markdown("Enter a video URL and query ID to get a prediction score.")
+    with gr.Row():
+        video_url = gr.Textbox(label="Video URL")
+        query_id = gr.Textbox(label="Query ID")
+        factor = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="Factor F")
+    output = gr.Textbox(label="Prediction Result")
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=predict, inputs=[video_url, query_id, factor], outputs=output)
+    gr.Markdown("### Face Detection")
+    detect_faces_input = gr.Textbox(label="Video URL for Face Detection")
+    detect_faces_output = gr.Textbox(label="Face Detection Results")
+    gr.Button("Detect Faces").click(fn=detect_faces, inputs=detect_faces_input, outputs=detect_faces_output)
+    gr.Markdown("### Google Drive Download")
+    gdrive_url_input = gr.Textbox(label="Google Drive Folder URL")
+    gdrive_output = gr.Textbox(label="Download Results")
+    gr.Button("Download from Google Drive").click(fn=download_gdrive, inputs=gdrive_url_input, outputs=gdrive_output)
+app.launch()

gradio1.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from dotenv import load_dotenv
+import os
+from prediction import genconvit_video_prediction
+from utils.db import supabase_client
+import json
+import requests
+from utils.utils import upload_file
+import redis
+from rq import Queue, Worker, Connection
+import uvicorn
+import torch
+os.environ['TORCH_HOME'] = './cache'
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+load_dotenv()
+# Environment variables
+R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
+R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
+R2_BUCKET_NAME = os.getenv('R2_BUCKET_NAME')
+R2_ENDPOINT_URL = os.getenv('R2_ENDPOINT_URL')
+UPSTASH_REDIS_REST_URL = os.getenv('UPSTASH_REDIS_REST_URL')
+UPSTASH_REDIS_REST_TOKEN = os.getenv('UPSTASH_REDIS_REST_TOKEN')
+# Redis connection
+r = redis.Redis(
+    host=UPSTASH_REDIS_REST_URL,
+    port=6379,
+    password=UPSTASH_REDIS_REST_TOKEN,
+    ssl=True
+)
+q = Queue('video-predictions', connection=r)
+# FastAPI initialization
+app = FastAPI()
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Update with your domain
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Pydantic models for request validation
+class PredictionRequest(BaseModel):
+    video_url: str
+    query_id: str
+class DetectFacesRequest(BaseModel):
+    video_url: str
+# Prediction queue resolver
+def predictionQueueResolver(prediction_data):
+    data = json.loads(prediction_data)
+    video_url = data['mediaUrl']
+    query_id = data['queryId']
+    if not video_url:
+        raise HTTPException(status_code=400, detail="No video URL provided")
+    try:
+        result = genconvit_video_prediction(video_url)
+        output = {
+            "fd": "0",
+            "gan": "0",
+            "wave_grad": "0",
+            "wave_rnn": "0"
+        }
+        transaction = {
+            "status": "success",
+            "score": result['score'],
+            "output": json.dumps(output),
+        }
+        print(result)
+        supabase_client.table('Result').update(transaction).eq('query_id', query_id).execute()
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# @app.get("/")
+# def health():
+#     return "APP is Ready"
+# @app.get("/health")
+# def health():
+#     return "Healthy AI API"
+@app.post("/predict")
+def predict(request: PredictionRequest):
+    try:
+        result = genconvit_video_prediction(request.video_url)
+        output = {
+            "fd": "0",
+            "gan": "0",
+            "wave_grad": "0",
+            "wave_rnn": "0"
+        }
+        transaction = {
+            "status": "success",
+            "score": result['score'],
+            "output": json.dumps(output),
+        }
+        supabase_client.table('Result').update(transaction).eq('query_id', request.query_id).execute()
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/detect-faces")
+def detect_faces(request: DetectFacesRequest):
+    try:
+        frames = detect_faces(request.video_url)
+        res = []
+        for frame in frames:
+            upload_file(f'{frame}', 'outputs', frame.split('/')[-1], R2_ENDPOINT_URL, R2_ACCESS_KEY, R2_SECRET_KEY)
+            res.append(f'https://pub-08a118f4cb7c4b208b55e6877b0bacca.r2.dev/outputs/{frame.split("/")[-1]}')
+        return res
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Uncomment to start worker and fetch queue data
+# def fetch_and_enqueue():
+#     response = requests.get(UPSTASH_REDIS_REST_URL)
+#     if response.status_code == 200:
+#         data = response.json()
+#         for item in data['items']:
+#             prediction_data = item.get('prediction')
+#             q.enqueue(predictionQueueResolver, prediction_data)
+if __name__ == '__main__':
+    uvicorn.run(app, host='0.0.0.0', port=8000)
+    # with Connection(r):
+    #     worker = Worker([q])
+    #     worker.work()
+    # fetch_and_enqueue()

k8s/deployment.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: warden-ml
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: warden-ml
+  minReadySeconds: 5
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+      maxUnavailable: 1
+  template:
+    metadata:
+      labels:
+        app: warden-ml
+    spec:
+      containers:
+      - name: warden-ml
+        image: vivekmetaphy/warden-ml:v3
+        ports:
+        - containerPort: 8000
+        resources:
+          requests:
+            memory: "2Gi"       # Minimum memory required for the container
+            cpu: "1000m"         # Minimum CPU required for the containe
+          limits:
+            memory: "4Gi"       # Minimum memory required for the container
+            cpu: "2000m"         # Minimum CPU required for the containe         # Maximum CPU the container can use
+      # imagePullSecrets:
+      # - name: acr-secret
+      # dnsPolicy: ClusterFirstWithHostNet

k8s/hpa.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: warden-ml-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: warden-backend
+  minReplicas: 1
+  maxReplicas: 5
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 50
+  - type: Resource
+    resource:
+      name: memory
+      target:
+        type: Utilization
+        averageUtilization: 50

k8s/service.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+apiVersion: v1
+kind: Service
+metadata:
+  name: warden-ml
+spec:
+  selector:
+    app: warden-ml
+  ports:
+    - name: warden-ml
+      port: 8000
+      targetPort: 8000
+  type: ClusterIP

prediction.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import spaces
+import requests
+import tempfile
+import os
+import logging
+import cv2
+import pandas as pd
+import torch
+# from genconvit.config import load_config
+from genconvit.pred_func import df_face, load_genconvit, pred_vid
+torch.hub.set_dir('./cache')
+os.environ["HUGGINGFACE_HUB_CACHE"] = "./cache"
+# Set up logging
+# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def load_model():
+    try:
+        # config = load_config()
+        ed_weight = 'genconvit_ed_inference'
+        vae_weight = 'genconvit_vae_inference'
+        net = 'genconvit'
+        fp16 = False
+        model = load_genconvit( net, ed_weight, vae_weight, fp16)
+        logging.info("Model loaded successfully.")
+        return model
+    except Exception as e:
+        logging.error(f"Error loading model: {e}")
+        raise
+model = load_model()
+def detect_faces(video_url):
+    try:
+        video_name = video_url.split('/')[-1]
+        response = requests.get(video_url)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
+            temp_file.write(response.content)
+            temp_file_path = temp_file.name
+        frames = []
+        face_cascade = cv2.CascadeClassifier('./utils/face_detection.xml')
+        cap = cv2.VideoCapture(temp_file_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration = total_frames / fps
+        frame_count = 0
+        time_count = 0
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count % int(fps * 5) == 0:
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+                for (x, y, w, h) in faces:
+                    cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
+                frame_name = f"./output/{video_name}_{time_count}.jpg"
+                frames.append(frame_name)
+                cv2.imwrite(frame_name, frame)
+                logging.info(f"Processed frame saved: {frame_name}")
+                time_count += 1
+            frame_count += 1
+        cap.release()
+        cv2.destroyAllWindows()
+        logging.info(f"Total video duration: {duration:.2f} seconds")
+        logging.info(f"Total frames processed: {time_count // 5}")
+        return frames
+    except Exception as e:
+        logging.error(f"Error processing video: {e}")
+        return []
+# @spaces.GPU(duration=300)
+def genconvit_video_prediction(video_url, factor):
+    try:
+        logging.info(f"Processing video URL: {video_url}")
+        response = requests.get(video_url)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
+            temp_file.write(response.content)
+            temp_file_path = temp_file.name
+        num_frames = get_video_frame_count(temp_file_path)
+        logging.info(f"Number of frames in video: {num_frames}")
+        logging.info(f"Number of frames to process: {round(num_frames * factor)}")
+        # rounf num_frames by2 to nearest integer
+        # df = df_face(temp_file_path, int(round(num_frames * factor)) , model)
+        # df = df_face(temp_file_path, int(round(num_frames * factor)) , model)
+        df = df_face(temp_file_path, 11 , model)
+        if len(df) >= 1:
+            y, y_val = pred_vid(df, model)
+        else:
+            y, y_val = torch.tensor(0).item(), torch.tensor(0.5).item()
+        os.unlink(temp_file_path)  # Clean up temporary file
+        result = {
+            'score': round(y_val * 100, 2),
+            'frames_processed': round(num_frames*factor)
+        }
+        logging.info(f"Prediction result: {result}")
+        return result
+    except Exception as e:
+        logging.error(f"Error in video prediction: {e}")
+        return {
+            'score': 0,
+            'prediction': 'ERROR',
+            'frames_processed': 0
+        }
+def get_video_frame_count(video_path):
+    try:
+        cap = cv2.VideoCapture(video_path)
+        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        cap.release()
+        return frame_count
+    except Exception as e:
+        logging.error(f"Error getting video frame count: {e}")
+        return 0

pyproject.toml ADDED Viewed

	@@ -0,0 +1,43 @@

+[tool.poetry]
+name = "warden-ai"
+version = "0.1.0"
+description = ""
+authors = ["Vivek Kornepalli <vivek@metaphy.world>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+flask = "^3.0.3"
+tqdm = "^4.66.4"
+timm = "0.6.5"
+torch = "2.2.0"
+flask-cors = "^4.0.1"
+python-dotenv = "^1.0.1"
+supabase = "^2.5.3"
+opencv-python = "^4.6.0.66"
+pandas = "^2.2.2"
+numpy = "<2.0.0"
+face-recognition = "^1.3.0"
+albumentations = "^1.4.11"
+boto3 = "^1.34.63"
+torchvision = "0.17.0"
+redis = "^5.0.8"
+rq = "^1.16.2"
+facenet-pytorch = "^2.6.0"
+gunicorn = "^22.0.0"
+gradio-client = "^1.2.0"
+fastapi = "^0.112.0"
+uvicorn = "^0.30.5"
+gradio = "4.41.0"
+transformers = "^4.44.0"
+huggingface-hub = "^0.24.5"
+spaces = "^0.29.2"
+datetime = "^5.5"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+# git+https://github.com/MetaphyLabs/cuda-decord.git
+flask
+tqdm
+timm==0.6.5
+torch
+flask-cors
+python-dotenv
+supabase
+opencv-python
+pandas
+numpy
+face-recognition
+albumentations
+boto3
+torchvision
+redis
+rq
+facenet-pytorch
+gunicorn
+gradio-client
+fastapi
+uvicorn
+gradio
+transformers
+huggingface-hub
+datetime
+gdown

script.sh ADDED Viewed

	@@ -0,0 +1,156 @@

+#!/bin/bash
+# Detect the operating system
+OS=$(uname -s)
+# Function to install dependencies on Linux
+install_linux() {
+    export CC=/usr/bin/clang
+    export CXX=/usr/bin/clang++
+    CFLAGS="-stdlib=libc++" CXXFLAGS="-stdlib=libc++"
+    apt update
+    apt install wget -y
+    apt install clang -y
+    apt install libc++-dev -y
+    apt install cmake -y
+    apt-get update && apt-get install -y \
+        build-essential \
+        cmake \
+        libopenblas-dev \
+        liblapack-dev \
+        libx11-dev \
+        libgtk-3-dev \
+        libboost-python-dev \
+        libjpeg \
+        libpng \
+        libjpeg8-dev \
+        libpng-dev \
+        libtiff5-dev \
+        libtiff-dev \
+        libavcodec-dev \
+        libavformat-dev \
+        libswscale-dev \
+        libdc1394-22-dev \
+        libxine2-dev \
+        libavfilter-dev  \
+        libavutil-dev \
+        libnvcuvid-dev \
+        software-properties-common \
+        build-essential \
+        checkinstall \
+        cmake \
+        make \
+        pkg-config \
+        yasm \
+        git \
+        vim \
+        curl \
+        wget \
+        sudo \
+        apt-transport-https \
+        libcanberra-gtk-module \
+        libcanberra-gtk3-module \
+        dbus-x11 \
+        iputils-ping \
+        python3-dev \
+        python3-pip \
+        python3-setuptools \
+        && rm -rf /var/lib/apt/lists/*
+    apt-get update && apt-get install -y \
+        libgl1-mesa-glx \
+        libglib2.0-0
+    apt-get -y update && apt-get install -y ffmpeg
+    export NVIDIA_DRIVER_CAPABILITIES=all
+    ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/cuda/lib64/libnvcuvid.so
+    git clone --recursive https://github.com/dmlc/decord
+    cd decord && mkdir build && cd build && cmake .. -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release && make -j2 && cd ../python && python3 setup.py install
+    # Install ffmpeg if necessary
+    # add-apt-repository ppa:jonathonf/ffmpeg-4
+    # apt-get update
+    # apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev
+}
+# Function to install dependencies on macOS
+install_macos() {
+    echo "Running on macOS"
+    # Install Homebrew if not installed
+    if ! command -v brew &> /dev/null; then
+        /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+    fi
+    xcode-select --install
+    softwareupdate --all --install --force
+    export CC=/usr/bin/clang >> ~/.bash_profile
+    export CXX=/usr/bin/clang >> ~/.bash_profile
+    brew update
+    brew install wget
+    brew install clang
+    brew install cmake
+    brew install ffmpeg
+    brew install libomp
+    # Additional dependencies for macOS
+    brew install openblas lapack gtk+3 boost-python3 jpeg libpng
+}
+# Function to install dependencies on Windows
+install_windows() {
+    echo "Running on Windows"
+    # Installation steps for Windows could involve using Chocolatey or other package managers
+    choco install wget
+    choco install llvm
+    choco install cmake
+    choco install ffmpeg
+}
+# Download models function
+# download_models() {
+#     ED_MODEL_URL="https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_ed_inference.pth"
+#     VAE_MODEL_URL="https://huggingface.co/Deressa/GenConViT/resolve/main/genconvit_vae_inference.pth"
+#     ED_MODEL_PATH="./pretrained_models/genconvit_ed_inference.pth"
+#     VAE_MODEL_PATH="./pretrained_models/genconvit_vae_inference.pth"
+#     mkdir -p pretrained_models
+#     if [ ! -f "$ED_MODEL_PATH" ]; then
+#         wget -P ./pretrained_models "$ED_MODEL_URL"
+#     fi
+#     if [ ! -f "$VAE_MODEL_PATH" ]; then
+#         wget -P ./pretrained_models "$VAE_MODEL_URL"
+#     fi
+# }
+# Execute installation based on OS
+case $OS in
+    Linux)
+        install_linux
+        ;;
+    Darwin)
+        install_macos
+        ;;
+    MINGW*|MSYS*|CYGWIN*)
+        install_windows
+        ;;
+    *)
+        echo "Unsupported OS: $OS"
+        exit 1
+        ;;
+esac
+# Download models (common for all OSes)
+download_models
+echo "Installation complete."

utils/db.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from dotenv import load_dotenv
+import os
+import supabase
+load_dotenv()
+#env variables
+supabase_url = os.getenv('SUPABASE_URL')
+supabase_key = os.getenv('SUPABASE_KEY')
+def supabase_client():
+    return supabase.create_client(supabase_url, supabase_key)

utils/face_detection.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

utils/gdown_down.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import boto3
+import gdown
+import tempfile
+import shutil
+from dotenv import load_dotenv
+from utils.utils import upload_file
+load_dotenv()
+# Environment variables
+R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
+R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
+R2_BUCKET_NAME = os.getenv('R2_BUCKET_NAME')
+R2_ENDPOINT_URL = os.getenv('R2_ENDPOINT_URL')
+def download_from_google_folder(url):
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as download_dir:
+        print(f'Downloading folder to temporary directory: {download_dir}')
+        # Download the entire folder
+        gdown.download_folder(url, output=download_dir, quiet=False)
+        res = []
+        # Upload files to R2
+        for root, _, files in os.walk(download_dir):
+            for file_name in files:
+                file_path = os.path.join(root, file_name)
+                object_name = os.path.relpath(file_path, download_dir)
+                print(f'Uploading file: {file_path}, object name: {object_name}')
+                upload_file(file_path, R2_BUCKET_NAME, object_name, R2_ENDPOINT_URL, R2_ACCESS_KEY, R2_SECRET_KEY)
+                res.append(f'https://pub-08a118f4cb7c4b208b55e6877b0bacca.r2.dev/warden-ai/{object_name}')
+        print(res)
+        return res

utils/utils.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import requests
+from dotenv import load_dotenv
+import boto3
+import supabase
+import cv2
+R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
+R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
+R2_BUCKET_NAME = os.getenv('R2_BUCKET_NAME')
+R2_ENDPOINT_URL = os.getenv('R2_ENDPOINT_URL')
+def download_video(video_url):
+    if not os.path.exists('./input'):
+        os.makedirs('./input')
+    print(f'Downloading video from {video_url}')
+    response = requests.get(video_url, stream=True)
+    if response.status_code == 200:
+        video_name = video_url.split('/')[-1]
+        print(video_name)
+        video_path = f'./input/{video_name}.mp4'
+        print(video_path)
+        with open(video_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return video_path
+    else:
+        raise Exception(f"Failed to download video: {response.status_code}")
+def download_file(url, path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+    print(f'Downloading file from {url} to {path}')
+    response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        file_name = url.split('/')[-1]
+        file_path = f'./{path}/{file_name}.mp4'
+        with open(file_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return file_path
+    else:
+        raise Exception(f"Failed to download file: {response.status_code}")
+def upload_file(file_path, bucket_name, object_name, endpoint_url, access_key, secret_key):
+    s3 = boto3.client('s3', endpoint_url=endpoint_url, aws_access_key_id=access_key, aws_secret_access_key=secret_key)
+    try:
+        response =s3.upload_file(file_path, bucket_name, object_name)
+        print(f'{file_path} uploaded to {bucket_name}/{object_name}')
+        return response
+    except Exception as e:
+        print(f'Error uploading file: {e}')
+def detect_faces_frames(video_url):
+    video_name = video_url.split('/')[-1]
+    print(video_name)
+    video_path = download_video(video_url)
+    frames =[]
+    face_cascade = cv2.CascadeClassifier('./utils/face_detection.xml')
+    # Open the video file
+    cap = cv2.VideoCapture(video_path)
+    # Get video properties
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    duration = total_frames / fps
+    frame_count = 0
+    time_count = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Process frame every 5 seconds
+        if frame_count % int(fps * 5) == 0:
+            # Convert frame to grayscale
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            # Detect faces
+            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+            # Draw rectangles around the faces
+            for (x, y, w, h) in faces:
+                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
+            # Save the frame with detected faces
+            frame_name = f"./output/{video_name}_{time_count}.jpg"
+            print(frame_name)
+            frames.append(frame_name)
+            cv2.imwrite(f"./output/{video_name}_{time_count}.jpg", frame)
+            time_count += 1
+        frame_count += 1
+    cap.release()
+    cv2.destroyAllWindows()
+    print(f"Total video duration: {duration:.2f} seconds")
+    print(f"Total frames processed: {time_count // 5}")
+    res = []
+    for frame in frames:
+        upload_file(f'{frame}', 'outputs', frame.split('/')[-1] , 'https://c98643a1da5e9aa06b27b8bb7eb9227a.r2.cloudflarestorage.com/warden-ai', R2_ACCESS_KEY, R2_SECRET_KEY)
+        res.append(f'https://pub-08a118f4cb7c4b208b55e6877b0bacca.r2.dev/outputs/{frame.split("/")[-1]}')
+    return res