Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a storage which compress only modified packages and invalidates caching automatically #657

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/storages.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,23 @@ Also available if you want versioning ::

STATICFILES_STORAGE = 'pipeline.storage.NonPackagingPipelineCachedStorage'

Optimized storage with cache invalidation
-----------------------------------------

There is also an optimized storage which compress only the packages with
modifications. This speeds up the collectstatic process substantially when you
have multiple packages in your pipeline configuration.

In addition, this storage produces a hash from the compressed files which is
appended to the resources' URL. Thus, whenever you modify your files and
compress them, the cache registries (from cache server ou browser) will be
automatically invalidated ::

STATICFILES_STORAGE = 'pipeline.storage.OptimizedPipelineStorage'

File finders (staticfiles with DEBUG = False)
=============================================

If you use staticfiles with ``DEBUG = False`` (i.e. for integration tests
with `Selenium <http://docs.seleniumhq.org/>`_) you should install the finder
that allows staticfiles to locate your outputted assets : ::
Expand Down
30 changes: 29 additions & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Describes how to use Pipeline when it is installed and configured.
Templatetags
============

Pipeline includes two template tags: ``stylesheet`` and ``javascript``,
Pipeline includes two main template tags: ``stylesheet`` and ``javascript``,
in a template library called ``pipeline``.

They are used to output the ``<link>`` and ``<script>``-tags for the
Expand Down Expand Up @@ -41,6 +41,34 @@ with the name “scripts”, you would use the following code to output them all
{% javascript 'scripts' %}


Templatetags for prefetching
============================

In addition to the ``javascript`` and ``stylesheet``, django-pipeline provides
four templatetags which are used to prefetch resources, they are:
``javascript_prefetch``, ``stylesheet_prefetch``, ``javascript_prefetch_all``,
``stylesheet_prefetch_all``.

These tags are used to output <link rel="prefetch"> HTML tags. These tags will
prefetch resources that you know you will need them in other pages. In
Firefox, for instance, these resources will be loaded when the browser is idle.

Example
-------

Prefetching specific packages ::

{% load pipeline %}
{% javascript_prefetch 'colors' %}
{% stylesheet_prefetch 'stats' %}

Prefetching all packages ::

{% load pipeline %}
{% javascript_prefetch_all %}
{% stylesheet_prefetch_all %}


Form Media
==========

Expand Down
202 changes: 202 additions & 0 deletions pipeline/storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import unicode_literals

import gzip
import hashlib
import io
import os

from io import BytesIO

Expand Down Expand Up @@ -98,3 +101,202 @@ class PipelineCachedStorage(PipelineMixin, CachedStaticFilesStorage):

class NonPackagingPipelineCachedStorage(NonPackagingMixin, PipelineCachedStorage):
pass


class OptimizedPipelineStorage(PipelineMixin, StaticFilesStorage):
"""This storage compresses only the packages which had modifications in
their source files, or that have not been compressed yet. This speeds up
the collectstatic process, since must of the time we modify only a few
javascript/css files at a time.

It also appends the a md5 hash to the compressed files' url so any existing
cache mechanisms are naturally invalidated."""

compressed_packages = []
unchanged_packages = []
packager = None
HASH_CACHE_KEY = 'pipeline_compressed_hash_key'
SOURCES_DUMP_KEY = 'pipeline_dumped_sources_key'

def url(self, name):
"""Append the produced hash to the resource url so existing cache
mechanisms are naturally invalidated."""
url = super(OptimizedPipelineStorage, self).url(name)
_hash = self.get_compressed_files_hash()
if _hash and name:
return '{url}?{_hash}'.format(url=url, _hash=_hash)
else:
return url

def post_process(self, paths, dry_run=False, **options):
if dry_run:
return

from pipeline.packager import Packager
self.packager = Packager(storage=self)

for package_name in self.packager.packages['css']:
package = self.packager.package_for('css', package_name)
output_file = package.output_filename

if self.packing and self._is_outdated(package_name, package):
print('COMPRESSING {} package...'.format(package_name))
self.packager.pack_stylesheets(package)
self.compressed_packages.append(package_name)
else:
self.unchanged_packages.append(package_name)

paths[output_file] = (self, output_file)
yield output_file, output_file, True

for package_name in self.packager.packages['js']:
package = self.packager.package_for('js', package_name)
output_file = package.output_filename

if self.packing and self._is_outdated(package_name, package):
print('COMPRESSING {} package...'.format(package_name))
self.packager.pack_javascripts(package)
self.compressed_packages.append(package_name)
else:
self.unchanged_packages.append(package_name)

paths[output_file] = (self, output_file)
yield output_file, output_file, True

super_class = super(PipelineMixin, self)
if hasattr(super_class, 'post_process'):
for name, hashed_name, processed in super_class.post_process(
paths.copy(), dry_run, **options):
yield name, hashed_name, processed

self._finalize()

def _is_outdated(self, package_name, package):
outdated = False

for path in package.paths:
# Needs to run for every path in order to generate the individual
# file hashes.
if self._is_content_changed(path) and not outdated:
outdated = True

if not outdated:
previous_paths = self._get_previous_compressed_sources(package_name)
if not previous_paths or set(previous_paths) != set(package.paths):
outdated = True

from django.conf import settings
output_path = os.path.join(settings.STATIC_ROOT, package.output_filename)
return outdated or not os.path.exists(output_path)

def _is_content_changed(self, path):
"""Verifies if the content of :path change based on the hash that was
produced during the last collecstatic run."""
from django.conf import settings
changed = True
infile_path = os.path.join(self.location, path)
outfile_path = os.path.join(settings.STATIC_ROOT, path)
infile_hash_path = outfile_path + '.hash'

with open(infile_path, 'rb') as infile_file:
current_hash = hashlib.md5(infile_file.read()).hexdigest()

from django.core.cache import caches
DEFAULT_CACHE = caches['default']
old_hash = DEFAULT_CACHE.get(infile_hash_path)
changed = current_hash != old_hash
DEFAULT_CACHE.set(infile_hash_path, current_hash, None)
return changed

def _finalize(self):
self._dump_sources()
print('\n=== {} results ==='.format(self.__class__.__name__))
total_removed = self._remove_sources()
self._write_hash()
print('{} removed files used in the compressing'.format(total_removed))
print('{} new compressed packages: {}'.format(
len(self.compressed_packages), self.compressed_packages))
print('{} unchanged packages: {}'.format(
len(self.unchanged_packages), self.unchanged_packages))
print('=== End {} results ==='.format(self.__class__.__name__))

def _remove_sources(self):
"""We do not want to expose our source files, thus they are removed
from the STATIC_ROOT directory, keeping only the compressed files."""
from django.conf import settings
sources = []

for package_name in self.packager.packages['js']:
package = self.packager.package_for('js', package_name)
sources.extend(package.paths)

for package_name in self.packager.packages['css']:
package = self.packager.package_for('css', package_name)
sources.extend(package.paths)

removed = 0
for source in sources:
source_path = os.path.join(settings.STATIC_ROOT, source)
if os.path.exists(source_path):
os.remove(source_path)
removed += 1

return removed

def _dump_sources(self):
"""We dump the list of compressed source files so we can compare if
there is any difference (new files or removed files) in the next
collectstatic run."""
from django.core.cache import caches
DEFAULT_CACHE = caches['default']

packages = {}

for package_name in self.packager.packages['js']:
package = self.packager.package_for('js', package_name)
packages[package_name] = package.paths

for package_name in self.packager.packages['css']:
package = self.packager.package_for('css', package_name)
packages[package_name] = package.paths
# cache forever
DEFAULT_CACHE.set(self.SOURCES_DUMP_KEY, packages, None)

def _get_previous_compressed_sources(self, package_name):
from django.core.cache import caches
DEFAULT_CACHE = caches['default']
return DEFAULT_CACHE.get(self.SOURCES_DUMP_KEY, {}).\
get(package_name)

def _write_hash(self):
"""Writes a single md5 hash considering all the content from the
source files. This is useful to force any cache mechanism to update
their registries."""
from django.conf import settings
from django.core.cache import caches
DEFAULT_CACHE = caches['default']
output_filenames = []

for package_name in self.packager.packages['js']:
package = self.packager.package_for('js', package_name)
output_filenames.append(package.output_filename)

for package_name in self.packager.packages['css']:
package = self.packager.package_for('css', package_name)
output_filenames.append(package.output_filename)

contents = []
for output_filename in output_filenames:
abs_path = os.path.join(settings.STATIC_ROOT, output_filename)
with io.open(abs_path, 'rb') as output_file:
contents.append(output_file.read())

digest = hashlib.md5(b''.join(contents)).hexdigest()
print('New hash: {}'.format(digest))
DEFAULT_CACHE.set(self.HASH_CACHE_KEY, digest, None) # cache forever

@staticmethod
def get_compressed_files_hash():
from django.core.cache import caches
DEFAULT_CACHE = caches['default']
return DEFAULT_CACHE.get(OptimizedPipelineStorage.HASH_CACHE_KEY)
3 changes: 3 additions & 0 deletions pipeline/templates/pipeline/prefetch.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% for url in urls %}
<link rel="prefetch" href="{{ url }}"/>
{% endfor %}
50 changes: 50 additions & 0 deletions pipeline/templatetags/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,53 @@ def javascript(parser, token):
except ValueError:
raise template.TemplateSyntaxError('%r requires exactly one argument: the name of a group in the PIPELINE.JAVASVRIPT setting' % token.split_contents()[0])
return JavascriptNode(name)


@register.inclusion_tag('pipeline/prefetch.html')
def javascript_prefetch(package_name):
urls = []
if settings.PIPELINE_ENABLED:
path = settings.JAVASCRIPT[package_name]['output_filename']
urls.append(staticfiles_storage.url(path))
else:
for path in settings.JAVASCRIPT[package_name]['source_filenames']:
urls.append(staticfiles_storage.url(path))
return {'urls': urls}


@register.inclusion_tag('pipeline/prefetch.html')
def stylesheet_prefetch(package_name):
urls = []
if settings.PIPELINE_ENABLED:
path = settings.STYLESHEETS[package_name]['output_filename']
urls.append(staticfiles_storage.url(path))
else:
for path in settings.STYLESHEETS[package_name]['source_filenames']:
urls.append(staticfiles_storage.url(path))
return {'urls': urls}


@register.inclusion_tag('pipeline/prefetch.html')
def javascript_prefetch_all():
urls = []
for package in getattr(settings, 'JAVASCRIPT', {}).values():
if settings.PIPELINE_ENABLED:
path = package['output_filename']
urls.append(staticfiles_storage.url(path))
else:
for path in package['source_filenames']:
urls.append(staticfiles_storage.url(path))
return {'urls': urls}


@register.inclusion_tag('pipeline/prefetch.html')
def stylesheet_prefetch_all():
urls = []
for package in getattr(settings, 'STYLESHEETS', {}).values():
if settings.PIPELINE_ENABLED:
path = package['output_filename']
urls.append(staticfiles_storage.url(path))
else:
for path in package['source_filenames']:
urls.append(staticfiles_storage.url(path))
return {'urls': urls}
2 changes: 1 addition & 1 deletion tests/assets/compressors/yuglify.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
(function(){(function(){window.concat=function(){console.log(arguments)}})(),function(){window.cat=function(){console.log("hello world")}}()}).call(this);
(function(){window.concat=function(){console.log(arguments)},window.cat=function(){console.log("hello world")}}).call(this)