Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Adds mediainfo sniffing and auto-thumbnailing with ffmpeg. #23

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,19 @@ class BlogPageWithMedia(Page):
]
```

## Sniffing media metadata and auto-thumbnails

If you have `ffprobe` (via ffmpeg) installed (and specify the command path in
settings), it can sniff the media file to auto-populate duration, height and
width.

WAGTAILMEDIA_FFPROBE_CMD = '/usr/local/bin/ffprobe'

Additionaly, `ffmpeg` can extract a frame from a video file to auto-generate a
thumbnail. Just set the path to ffmpeg in settings.

WAGTAILMEDIA_FFMPEG_CMD = '/usr/local/bin/ffmpeg'


## How to run tests

Expand Down
25 changes: 25 additions & 0 deletions wagtailmedia/migrations/0004_add_mediainfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.8 on 2017-12-29 18:36
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('wagtailmedia', '0003_copy_media_permissions_to_collections'),
]

operations = [
migrations.AddField(
model_name='media',
name='mediainfo',
field=models.TextField(blank=True, null=True, verbose_name='mediainfo'),
),
migrations.AlterField(
model_name='media',
name='duration',
field=models.PositiveIntegerField(blank=True, help_text='Duration in seconds', null=True, verbose_name='duration'),
),
]
49 changes: 47 additions & 2 deletions wagtailmedia/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files import File
from django.core.urlresolvers import reverse
from django.db import models
from django.db.models.signals import pre_delete
from django.db.models.signals import post_save, pre_delete
from django.dispatch import Signal
from django.dispatch.dispatcher import receiver
from django.utils.encoding import python_2_unicode_compatible
Expand All @@ -18,6 +19,10 @@
from wagtail.wagtailsearch import index
from wagtail.wagtailsearch.queryset import SearchableQuerySetMixin

from .sniffers.ffmpeg import (
generate_media_thumb, get_video_stream_data, sniff_media_data
)


class MediaQuerySet(SearchableQuerySetMixin, models.QuerySet):
pass
Expand All @@ -34,11 +39,14 @@ class AbstractMedia(CollectionMember, index.Indexed, models.Model):
file = models.FileField(upload_to='media', verbose_name=_('file'))

type = models.CharField(choices=MEDIA_TYPES, max_length=255, blank=False, null=False)
duration = models.PositiveIntegerField(verbose_name=_('duration'), help_text=_('Duration in seconds'))
duration = models.PositiveIntegerField(blank=True, null=True, verbose_name=_('duration'),
help_text=_('Duration in seconds'))
width = models.PositiveIntegerField(null=True, blank=True, verbose_name=_('width'))
height = models.PositiveIntegerField(null=True, blank=True, verbose_name=_('height'))
thumbnail = models.FileField(upload_to='media_thumbnails', blank=True, verbose_name=_('thumbnail'))

mediainfo = models.TextField(null=True, blank=True, verbose_name=_('mediainfo'))

created_at = models.DateTimeField(verbose_name=_('created at'), auto_now_add=True)
uploaded_by_user = models.ForeignKey(
settings.AUTH_USER_MODEL,
Expand Down Expand Up @@ -92,6 +100,18 @@ def is_editable_by_user(self, user):
from wagtailmedia.permissions import permission_policy
return permission_policy.user_has_permission_for_instance(user, 'change', self)

def save(self, *args, **kwargs):
''' Send changed field names through to signals. '''
if self.pk is not None:
old = self.__class__._default_manager.filter(pk=self.pk).values()[0]
changed = []
for field in old.keys():
if getattr(self, field) != old[field]:
changed.append(field)
if changed:
kwargs['update_fields'] = changed
super(AbstractMedia, self).save(*args, **kwargs)

class Meta:
abstract = True
verbose_name = _('media')
Expand Down Expand Up @@ -130,6 +150,31 @@ def get_media_model():
return media_model


# Receive the post_save signal and sniff mediainfo data if possible.
@receiver(post_save, sender=Media)
def media_sniff(sender, instance, created, update_fields, **kwargs):
if hasattr(settings, 'WAGTAILMEDIA_FFPROBE_CMD'):
if created or (update_fields and 'file' in update_fields):
data = sniff_media_data(instance.file.path)
if data:
duration = int(float(data['format']['duration']))
Media.objects.filter(pk=instance.pk).update(duration=duration, mediainfo=data)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with your comment that just dumping this data dictionary into a text field feels wasteful.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I felt compelled to store the results of the media info extraction alongside the Media object. Maybe it should be called "raw_mediainfo" or something. I'm not sure how wasteful it is. Probably less wasteful than re-extracting via ffmpeg each time.

I don't love that it's a Textfield.... I'd prefer a JSONField but wanted to maximize compatibility.

if instance.type == 'video':
video_stream = get_video_stream_data(data)
if video_stream:
height = int(float(video_stream['height']))
width = int(float(video_stream['width']))
Media.objects.filter(pk=instance.pk).update(height=height, width=width)

# Try to scrape a thumbnail from video
if hasattr(settings, 'WAGTAILMEDIA_FFMPEG_CMD')\
and not instance.thumbnail:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NB this leaves no way to leave a thumbnail blank. I'm happy with this, but just pointing it out for discussion if anyone has an opinion.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you here. I think I'll add a "No thumbnail" checkbox to the Media admin UI.

thumb_path = generate_media_thumb(instance.file.path, f'{instance.file.name}.jpg',
skip_seconds=int(duration*1.0/2))
instance.thumbnail.save(os.path.basename(thumb_path), File(open(thumb_path, 'rb')))
os.remove(thumb_path)


# Receive the pre_delete signal and delete the file associated with the model instance.
@receiver(pre_delete, sender=Media)
def media_delete(sender, instance, **kwargs):
Expand Down
Empty file.
41 changes: 41 additions & 0 deletions wagtailmedia/sniffers/ffmpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import unicode_literals

import json
import subprocess
import sys

from django.conf import settings


def sniff_media_data(video_path):
''' Uses ffprobe to sniff mediainfo metadata. '''
ffprobe = settings.WAGTAILMEDIA_FFPROBE_CMD
p = subprocess.check_output([ffprobe, "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams",
video_path])
return json.loads(p.decode(sys.stdout.encoding))


def generate_media_thumb(video_path, out_path, skip_seconds=0):
''' Uses ffmpeg to scrape out a thumbnail image from a video file. '''
ffmpeg = settings.WAGTAILMEDIA_FFMPEG_CMD
subprocess.check_output([ffmpeg, "-y", "-v", "quiet", "-accurate_seek", "-ss", str(skip_seconds), "-i", video_path,
"-frames:v", "1", out_path])
return out_path


def get_stream_by_type(data, typestr):
''' Returns the appropriate mediainfo stream data. '''
for stream in data['streams']:
if stream['codec_type'] == typestr:
return stream
return None


def get_video_stream_data(data):
''' Returns the video mediainfo stream data. '''
return get_stream_by_type(data, 'video')


def get_audio_stream_data(data):
''' Returns the audio mediainfo stream data. '''
return get_stream_by_type(data, 'audio')