Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ tests/resources/*
*.mkv
*.m4v
*.csv
benchmarks/BCC/*.mp4
*.txt
benchmarks/RAI/*.mp4
*.txt


# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
Expand Down
Empty file added benchmarks/BBC/.gitkeep
Empty file.
50 changes: 50 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Benchmarking PySceneDetect
This repository benchmarks the performance of PySceneDetect in terms of both latency and accuracy.
We evaluate it using the standard dataset for video shot detection: [BBC](https://zenodo.org/records/14865504).

## Dataset Download
### BBC
```
# annotation
wget -O BBC/fixed.zip https://zenodo.org/records/14873790/files/fixed.zip
unzip BBC/fixed.zip -d BBC
rm -rf BBC/fixed.zip

# videos
wget -O BBC/videos.zip https://zenodo.org/records/14873790/files/videos.zip
unzip BBC/videos.zip -d BBC
rm -rf BBC/videos.zip
```

### Evaluation
To evaluate PySceneDetect on a dataset, run the following command:
```
python benchmark.py -d <dataset_name> --detector <detector_name>
```
For example, to evaluate ContentDetector on the BBC dataset:
```
python evaluate.py -d BBC --detector detect-content
```

### Result
The performance is computed as recall, precision, f1, and elapsed time.
The following results indicate that ContentDetector achieves the highest performance on the BBC dataset.

| Detector | Recall | Precision | F1 | Elapsed time (second) |
|:-----------------:|:------:|:---------:|:-----:|:---------------------:|
| AdaptiveDetector | 7.80 | 96.18 | 14.44 | 25.75 |
| ContentDetector | 84.52 | 88.77 | 86.59 | 25.50 |
| HashDetector | 8.57 | 80.27 | 15.48 | 23.78 |
| HistogramDetector | 8.22 | 70.82 | 14.72 | 18.60 |
| ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 |

## Citation
### BBC
```
@InProceedings{bbc_dataset,
author = {Lorenzo Baraldi and Costantino Grana and Rita Cucchiara},
title = {A Deep Siamese Network for Scene Detection in Broadcast Videos},
booktitle = {Proceedings of the 23rd ACM International Conference on Multimedia},
year = {2015},
}
```
26 changes: 26 additions & 0 deletions benchmarks/bbc_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import glob

class BBCDataset:
"""
The BBC Dataset, proposed by Baraldi et al. in A deep siamese network for scene detection in broadcast videos
Link: https://arxiv.org/abs/1510.08893
The dataset consists of 11 videos (BBC/videos/bbc_01.mp4 to BBC/videos/bbc_11.mp4).
The annotated scenes are provided in corresponding files (BBC/fixed/[i]-scenes.txt).
"""
def __init__(self, dataset_dir: str):
self._video_files = [file for file in sorted(glob.glob(os.path.join(dataset_dir, 'videos', '*.mp4')))]
self._scene_files = [file for file in sorted(glob.glob(os.path.join(dataset_dir, 'fixed', '*-scenes.txt')))]
assert (len(self._video_files) == len(self._scene_files))
for video_file, scene_file in zip(self._video_files, self._scene_files):
video_id = os.path.basename(video_file).replace('bbc_', '').split('.')[0]
scene_id = os.path.basename(scene_file).split('-')[0]
assert (video_id == scene_id)

def __getitem__(self, index):
video_file = self._video_files[index]
scene_file = self._scene_files[index]
return video_file, scene_file

def __len__(self):
return len(self._video_files)
51 changes: 51 additions & 0 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import time
import argparse
from bbc_dataset import BBCDataset
from evaluator import Evaluator

from tqdm import tqdm
from scenedetect import detect
from scenedetect import AdaptiveDetector, ContentDetector, HashDetector, HistogramDetector, ThresholdDetector

def _load_detector(detector_name: str):
detector_map = {
'detect-adaptive': AdaptiveDetector(),
'detect-content': ContentDetector(),
'detect-hash': HashDetector(),
'detect-hist': HistogramDetector(),
'detect-threshold': ThresholdDetector(),
}
return detector_map[detector_name]

def _detect_scenes(detector, dataset):
pred_scenes = {}
for video_file, scene_file in tqdm(dataset):
start = time.time()
pred_scene_list = detect(video_file, detector)
elapsed = time.time() - start

pred_scenes[scene_file] = {
'video_file': video_file,
'elapsed': elapsed,
'pred_scenes': [scene[1].frame_num for scene in pred_scene_list]
}

return pred_scenes

def main(args):
dataset = BBCDataset('BBC')
detector = _load_detector(args.detector)
pred_scenes = _detect_scenes(detector, dataset)
evaluator = Evaluator()
result = evaluator.evaluate_performance(pred_scenes)

print('Detector: {} Recall: {:.2f}, Precision: {:.2f}, F1: {:.2f} Elapsed time: {:.2f}'
.format(args.detector, result['recall'], result['precision'], result['f1'], result['elapsed']))


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Benchmarking PySceneDetect performance.')
parser.add_argument('--detector', type=str, choices=['detect-adaptive', 'detect-content', 'detect-hash', 'detect-hist', 'detect-threshold'],
default='detect-content', help='Detector name. Implemented detectors are listed: https://www.scenedetect.com/docs/latest/cli.html')
args = parser.parse_args()
main(args)
35 changes: 35 additions & 0 deletions benchmarks/evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from statistics import mean

class Evaluator:
def __init__(self):
pass

def _load_scenes(self, scene_filename):
with open(scene_filename) as f:
gt_scene_list = [x.strip().split('\t')[1] for x in f.readlines()]
gt_scene_list = [int(x) + 1 for x in gt_scene_list]
return gt_scene_list

def evaluate_performance(self, pred_scenes):
total_correct = 0
total_pred = 0
total_gt = 0

for scene_file, pred in pred_scenes.items():
gt_scene_list = self._load_scenes(scene_file)
pred_list = pred['pred_scenes']
total_correct += len(set(pred_list) & set(gt_scene_list))
total_pred += len(pred_list)
total_gt += len(gt_scene_list)

recall = total_correct / total_gt
precision = total_correct / total_pred
f1 = 2 * recall * precision / (recall + precision) if (recall + precision) != 0 else 0
avg_elapsed = mean([x['elapsed'] for x in pred_scenes.values()])
result = {
'recall': recall * 100,
'precision': precision * 100,
'f1': f1 * 100,
'elapsed': avg_elapsed
}
return result