Skip to content

Commit

Permalink
#14 #15 Serialize EDTFField values into DB
Browse files Browse the repository at this point in the history
Improve performance of EDTFField when populating model
instances from DB values by storing EDTF data in pickled
format, not as string values that need to be re-parsed
every time they are loaded.

This implementation is naive and could be improved with
a more sophisticated serialization approach for EDTF
fields, though it works for now.

This change also handles the issue in #15 where already-
parsed EDTF field values are re-parsed when an instance
is saved, unless there is a `natural_text_field` value
present to override and reset the EDTF field value.
  • Loading branch information
jmurty committed May 29, 2017
1 parent dc51bc7 commit a293a7a
Showing 1 changed file with 35 additions and 9 deletions.
44 changes: 35 additions & 9 deletions edtf/fields.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
try:
import cPickle as pickle
except:
import pickle

from django.db import models

from edtf import parse_edtf, EDTFObject
Expand All @@ -22,7 +27,7 @@ def __init__(
upper_fuzzy_field=None,
**kwargs
):
kwargs['max_length'] = 255
kwargs['max_length'] = 1000
self.natural_text_field, self.lower_strict_field, \
self.upper_strict_field, self.lower_fuzzy_field, \
self.upper_fuzzy_field = natural_text_field, lower_strict_field, \
Expand All @@ -48,6 +53,12 @@ def deconstruct(self):

def from_db_value(self, value, expression, connection, context):
# Converting values to Python objects
if not value:
return None
try:
return pickle.loads(str(value))
except:
pass
return parse_edtf(value, fail_silently=True)

def to_python(self, value):
Expand All @@ -59,11 +70,16 @@ def to_python(self, value):

return parse_edtf(value, fail_silently=True)

def get_db_prep_save(self, value, connection):
if value:
return pickle.dumps(value)
return super(EDTFField, self).get_db_prep_save(value, connection)

def get_prep_value(self, value):
# convert python objects to query values
value = super(EDTFField, self).get_prep_value(value)
if isinstance(value, EDTFObject):
return unicode(value)
return pickle.dumps(value)
return value

def pre_save(self, instance, add):
Expand All @@ -74,19 +90,29 @@ def pre_save(self, instance, add):
if not self.natural_text_field or self.attname not in instance.__dict__:
return

edtf = getattr(instance, self.attname)

# Update EDTF field based on latest natural text value, if any
natural_text = getattr(instance, self.natural_text_field)
if natural_text:
n = text_to_edtf(natural_text)
setattr(instance, self.attname, n)
edtf = text_to_edtf(natural_text)

e = parse_edtf(getattr(instance, self.attname), fail_silently=True)
if e:
# TODO If `natural_text_field` becomes cleared the derived EDTF field
# value should also be cleared, rather than left at original value?

# TODO Handle case where EDTF field is set to a string directly, not
# via `natural_text_field` (this is a slightly unexpected use-case, but
# is a very efficient way to set EDTF values in situations like for API
# imports so we probably want to continue to support it?)
if edtf and not isinstance(edtf, EDTFObject):
edtf = parse_edtf(edtf, fail_silently=True)

setattr(instance, self.attname, edtf)
if edtf:
# set related date fields on the instance
for attr in DATE_ATTRS:
field_attr = "%s_field" % attr
g = getattr(self, field_attr, None)
if g:
setattr(instance, g, getattr(e, attr)())

return unicode(e)
setattr(instance, g, getattr(edtf, attr)())
return edtf

0 comments on commit a293a7a

Please sign in to comment.