diff --git a/.gitignore b/.gitignore index 1171c56e..c6d36daa 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,10 @@ tests/resources/* *.mkv *.m4v *.csv +benchmarks/BCC/*.mp4 +*.txt +benchmarks/RAI/*.mp4 +*.txt # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore diff --git a/benchmarks/BBC/.gitkeep b/benchmarks/BBC/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..a6c5f24a --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,50 @@ +# Benchmarking PySceneDetect +This repository benchmarks the performance of PySceneDetect in terms of both latency and accuracy. +We evaluate it using the standard dataset for video shot detection: [BBC](https://zenodo.org/records/14865504). + +## Dataset Download +### BBC +``` +# annotation +wget -O BBC/fixed.zip https://zenodo.org/records/14873790/files/fixed.zip +unzip BBC/fixed.zip -d BBC +rm -rf BBC/fixed.zip + +# videos +wget -O BBC/videos.zip https://zenodo.org/records/14873790/files/videos.zip +unzip BBC/videos.zip -d BBC +rm -rf BBC/videos.zip +``` + +### Evaluation +To evaluate PySceneDetect on a dataset, run the following command: +``` +python benchmark.py -d --detector +``` +For example, to evaluate ContentDetector on the BBC dataset: +``` +python evaluate.py -d BBC --detector detect-content +``` + +### Result +The performance is computed as recall, precision, f1, and elapsed time. +The following results indicate that ContentDetector achieves the highest performance on the BBC dataset. + +| Detector | Recall | Precision | F1 | Elapsed time (second) | +|:-----------------:|:------:|:---------:|:-----:|:---------------------:| +| AdaptiveDetector | 7.80 | 96.18 | 14.44 | 25.75 | +| ContentDetector | 84.52 | 88.77 | 86.59 | 25.50 | +| HashDetector | 8.57 | 80.27 | 15.48 | 23.78 | +| HistogramDetector | 8.22 | 70.82 | 14.72 | 18.60 | +| ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 | + +## Citation +### BBC +``` +@InProceedings{bbc_dataset, + author = {Lorenzo Baraldi and Costantino Grana and Rita Cucchiara}, + title = {A Deep Siamese Network for Scene Detection in Broadcast Videos}, + booktitle = {Proceedings of the 23rd ACM International Conference on Multimedia}, + year = {2015}, +} +``` \ No newline at end of file diff --git a/benchmarks/bbc_dataset.py b/benchmarks/bbc_dataset.py new file mode 100644 index 00000000..d297a5a7 --- /dev/null +++ b/benchmarks/bbc_dataset.py @@ -0,0 +1,26 @@ +import os +import glob + +class BBCDataset: + """ + The BBC Dataset, proposed by Baraldi et al. in A deep siamese network for scene detection in broadcast videos + Link: https://arxiv.org/abs/1510.08893 + The dataset consists of 11 videos (BBC/videos/bbc_01.mp4 to BBC/videos/bbc_11.mp4). + The annotated scenes are provided in corresponding files (BBC/fixed/[i]-scenes.txt). + """ + def __init__(self, dataset_dir: str): + self._video_files = [file for file in sorted(glob.glob(os.path.join(dataset_dir, 'videos', '*.mp4')))] + self._scene_files = [file for file in sorted(glob.glob(os.path.join(dataset_dir, 'fixed', '*-scenes.txt')))] + assert (len(self._video_files) == len(self._scene_files)) + for video_file, scene_file in zip(self._video_files, self._scene_files): + video_id = os.path.basename(video_file).replace('bbc_', '').split('.')[0] + scene_id = os.path.basename(scene_file).split('-')[0] + assert (video_id == scene_id) + + def __getitem__(self, index): + video_file = self._video_files[index] + scene_file = self._scene_files[index] + return video_file, scene_file + + def __len__(self): + return len(self._video_files) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 00000000..5f658bbe --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,51 @@ +import time +import argparse +from bbc_dataset import BBCDataset +from evaluator import Evaluator + +from tqdm import tqdm +from scenedetect import detect +from scenedetect import AdaptiveDetector, ContentDetector, HashDetector, HistogramDetector, ThresholdDetector + +def _load_detector(detector_name: str): + detector_map = { + 'detect-adaptive': AdaptiveDetector(), + 'detect-content': ContentDetector(), + 'detect-hash': HashDetector(), + 'detect-hist': HistogramDetector(), + 'detect-threshold': ThresholdDetector(), + } + return detector_map[detector_name] + +def _detect_scenes(detector, dataset): + pred_scenes = {} + for video_file, scene_file in tqdm(dataset): + start = time.time() + pred_scene_list = detect(video_file, detector) + elapsed = time.time() - start + + pred_scenes[scene_file] = { + 'video_file': video_file, + 'elapsed': elapsed, + 'pred_scenes': [scene[1].frame_num for scene in pred_scene_list] + } + + return pred_scenes + +def main(args): + dataset = BBCDataset('BBC') + detector = _load_detector(args.detector) + pred_scenes = _detect_scenes(detector, dataset) + evaluator = Evaluator() + result = evaluator.evaluate_performance(pred_scenes) + + print('Detector: {} Recall: {:.2f}, Precision: {:.2f}, F1: {:.2f} Elapsed time: {:.2f}' + .format(args.detector, result['recall'], result['precision'], result['f1'], result['elapsed'])) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking PySceneDetect performance.') + parser.add_argument('--detector', type=str, choices=['detect-adaptive', 'detect-content', 'detect-hash', 'detect-hist', 'detect-threshold'], + default='detect-content', help='Detector name. Implemented detectors are listed: https://www.scenedetect.com/docs/latest/cli.html') + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/benchmarks/evaluator.py b/benchmarks/evaluator.py new file mode 100644 index 00000000..6a8190da --- /dev/null +++ b/benchmarks/evaluator.py @@ -0,0 +1,35 @@ +from statistics import mean + +class Evaluator: + def __init__(self): + pass + + def _load_scenes(self, scene_filename): + with open(scene_filename) as f: + gt_scene_list = [x.strip().split('\t')[1] for x in f.readlines()] + gt_scene_list = [int(x) + 1 for x in gt_scene_list] + return gt_scene_list + + def evaluate_performance(self, pred_scenes): + total_correct = 0 + total_pred = 0 + total_gt = 0 + + for scene_file, pred in pred_scenes.items(): + gt_scene_list = self._load_scenes(scene_file) + pred_list = pred['pred_scenes'] + total_correct += len(set(pred_list) & set(gt_scene_list)) + total_pred += len(pred_list) + total_gt += len(gt_scene_list) + + recall = total_correct / total_gt + precision = total_correct / total_pred + f1 = 2 * recall * precision / (recall + precision) if (recall + precision) != 0 else 0 + avg_elapsed = mean([x['elapsed'] for x in pred_scenes.values()]) + result = { + 'recall': recall * 100, + 'precision': precision * 100, + 'f1': f1 * 100, + 'elapsed': avg_elapsed + } + return result \ No newline at end of file