Spaces:

Lan92
/

Video

Runtime error

App Files Files Community

Video / app.py

Lan92

Upload folder using huggingface_hub

8ea71e1 verified 18 days ago

raw

history blame

3.99 kB

	# -- coding: utf-8 --
	"""🎬 Keras Video Classification CNN-RNN model

	Spaces for showing the model usage.

	Author:
	- Thomas Chaigneau @ChainYo
	"""
	import os
	import cv2

	import gradio as gr
	import numpy as np

	from tensorflow import keras

	from tensorflow_docs.vis import embed

	from huggingface_hub import from_pretrained_keras

	# Kích thước ảnh đầu vào và số lượng đặc trưng
	IMG_SIZE = 224
	NUM_FEATURES = 2048

	# Tải mô hình CNN-RNN từ HuggingFace
	model = from_pretrained_keras("keras-io/video-classification-cnn-rnn")

	# Tạo danh sách video ví dụ từ thư mục Samples
	samples = []
	for file in os.listdir("Samples"):
	tag = file.split("_")[1]
	samples.append([f"samples/{file}"])

	# Cắt phần hình vuông ở trung tâm frame
	def crop_center_square(frame):
	y, x = frame.shape[0:2]
	min_dim = min(y, x)
	start_x = (x // 2) - (min_dim // 2)
	start_y = (y // 2) - (min_dim // 2)
	return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]

	# Đọc video và xử lý từng frame
	def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
	cap = cv2.VideoCapture(path)
	frames = []
	try:
	while True:
	ret, frame = cap.read()
	if not ret:
	break
	frame = crop_center_square(frame)
	frame = cv2.resize(frame, resize)
	frame = frame[:, :, [2, 1, 0]]
	frames.append(frame)

	if len(frames) == max_frames:
	break
	finally:
	cap.release()
	return np.array(frames)

	# Xây dựng mô hình trích xuất đặc trưng (InceptionV3)
	def build_feature_extractor():
	feature_extractor = keras.applications.InceptionV3(
	weights="imagenet",
	include_top=False,
	pooling="avg",
	input_shape=(IMG_SIZE, IMG_SIZE, 3),
	)
	preprocess_input = keras.applications.inception_v3.preprocess_input

	inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
	preprocessed = preprocess_input(inputs)

	outputs = feature_extractor(preprocessed)
	return keras.Model(inputs, outputs, name="feature_extractor")

	# Tạo feature extractor 1 lần
	feature_extractor = build_feature_extractor()

	# Trích xuất đặc trưng cho từng frame của video
	def prepare_video(frames, max_seq_length: int = 20):
	frames = frames[None, ...]
	frame_mask = np.zeros(shape=(1, max_seq_length,), dtype="bool")
	frame_features = np.zeros(shape=(1, max_seq_length, NUM_FEATURES), dtype="float32")

	for i, batch in enumerate(frames):
	video_length = batch.shape[0]
	length = min(max_seq_length, video_length)
	for j in range(length):
	frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
	frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked

	return frame_features, frame_mask

	# Dự đoán hành động từ video
	def sequence_prediction(path):
	class_vocab = ["CricketShot", "PlayingCello", "Punch", "ShavingBeard", "TennisSwing"]

	frames = load_video(path)
	frame_features, frame_mask = prepare_video(frames)
	probabilities = model.predict([frame_features, frame_mask])[0]

	preds = {}
	for i in np.argsort(probabilities)[::-1]:
	preds[class_vocab[i]] = float(probabilities[i])
	return preds

	# HTML mô tả bên dưới app
	article = article = "<div style='text-align: center;'><a href='https://github.com/ChainYo' target='_blank'>Space by Thomas Chaigneau</a><br><a href='https://keras.io/examples/vision/video_classification/' target='_blank'>Keras example by Sayak Paul</a></div>"

	# Tạo giao diện Gradio
	app = gr.Interface(
	fn=sequence_prediction,
	inputs=[gr.Video(label="Video")],
	outputs=gr.Label(label="Prediction"),
	title="Keras Video Classification with CNN-RNN",
	description="Video classification demo using CNN-RNN based model.",
	article=article,
	examples=samples
	)

	# Khởi chạy ứng dụng
	app.launch()