From 335292f5c5155c0c95913f34020fd42d747f7963 Mon Sep 17 00:00:00 2001
From: Alberto Pettarin <alberto@albertopettarin.it>
Date: Sun, 29 Nov 2015 13:11:04 +0100
Subject: [PATCH] Fixed bug outputting to Kobo. New options.

---
 README.md                         |  52 +--
 README.rst                        |  53 +--
 VERSION                           |   2 +-
 bin/penelope                      |   2 +-
 penelope/__init__.py              |   2 +-
 penelope/__main__.py              |   2 +-
 penelope/collation_default.py     |   8 +-
 penelope/collation_german.py      |  19 +-
 penelope/command_line.py          |  83 +++--
 penelope/dictionary.py            |  94 +++++-
 penelope/dictionary_ebook.py      | 523 ++++++++++++++++++++++++++++++
 penelope/format_bookeen.py        |  45 +--
 penelope/format_csv.py            |   2 +-
 penelope/format_epub.py           | 398 +++--------------------
 penelope/format_kobo.py           |  83 ++---
 penelope/format_mobi.py           | 163 +++-------
 penelope/format_stardict.py       |  47 +--
 penelope/format_xml.py            |   2 +-
 penelope/input_parser_identity.py |   2 +-
 penelope/input_parser_webster.py  |   2 +-
 penelope/prefix_default.py        |  35 ++
 penelope/prefix_kobo.py           |  60 ++++
 penelope/utilities.py             |   6 +-
 setup.py                          |   4 +-
 24 files changed, 1037 insertions(+), 652 deletions(-)
 create mode 100644 penelope/dictionary_ebook.py
 create mode 100644 penelope/prefix_default.py
 create mode 100644 penelope/prefix_kobo.py

diff --git a/README.md b/README.md
index ee1b0ae..e70b13a 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@
 
 **Penelope** is a multi-tool for creating, editing and converting dictionaries, especially for eReader devices.
 
-* Version: 3.0.1
-* Date: 2015-11-22
+* Version: 3.1.0
+* Date: 2015-11-29
 * Developer: [Alberto Pettarin](http://www.albertopettarin.it/)
 * License: the MIT License (MIT)
 * Contact: [click here](http://www.albertopettarin.it/contact.html)
@@ -78,7 +78,7 @@ You might need to install `dictzip` (StarDict output) and  `kindlegen` (MOBI out
     $ python -m penelope
     ```
 
-This procedure will not any dependencies, see below.
+This procedure will not install any dependencies: you will need to do that manually, see below.
 
 
 ### Dependencies
@@ -97,9 +97,9 @@ This procedure will not any dependencies, see below.
     $ [sudo] pip install marisa-trie
     ```
 
-  or [`MARISA`](https://code.google.com/p/marisa-trie/) executables available in your `$PATH` or specified with `--marisa-bin-path`
+  or [MARISA](https://code.google.com/p/marisa-trie/) executables available in your `$PATH` or specified with `--marisa-bin-path`
 
-* to write MOBI Kindle dictionaries: the [`kindlegen`](https://www.amazon.com/gp/feature.html?docId=1000765211) executable, available in your `$PATH` or specified with `--kindlegen-path`
+* to write MOBI Kindle dictionaries: the [kindlegen](https://www.amazon.com/gp/feature.html?docId=1000765211) executable, available in your `$PATH` or specified with `--kindlegen-path`
 
 * to read/write XML dictionaries: the Python module `lxml`:
 
@@ -154,6 +154,8 @@ optional arguments:
   --title TITLE         title string
   --website WEBSITE     website string
   --year YEAR           year string
+  --apply-css APPLY_CSS
+                        apply the given CSS file (epub and mobi output only)
   --bookeen-collation-function BOOKEEN_COLLATION_FUNCTION
                         use the specified collation function
   --bookeen-install-file
@@ -165,29 +167,36 @@ optional arguments:
   --csv-ls CSV_LS       CSV line separator (default: '\n')
   --dictzip-path DICTZIP_PATH
                         path to dictzip executable
-  --epub-escape-strings
-                        escape HTML strings (default: False)
-  --epub-group-prefix-length EPUB_GROUP_PREFIX_LENGTH
-                        group headwords by prefix of given length (default: 3)
-  --epub-merge-group-size EPUB_MERGE_GROUP_SIZE
-                        merge headword groups with less than this number of
-                        headwords (default: 128)
-  --epub-output-definitions
-                        output definitions in addition to the headwords
-                        (default: False)
+  --epub-no-compress    do not create the compressed container (epub output
+                        only, default: False)
+  --escape-strings      escape HTML strings (default: False)
   --flatten-synonyms    flatten synonyms, creating a new entry with
                         headword=synonym and using the definition of the
                         original headword (default: False)
+  --group-by-prefix-function GROUP_BY_PREFIX_FUNCTION
+                        compute the prefix of headwords using the given prefix
+                        function file
+  --group-by-prefix-length GROUP_BY_PREFIX_LENGTH
+                        group headwords by prefix of given length (default: 2)
+  --group-by-prefix-merge-across-first
+                        merge headword groups even when the first character
+                        changes (default: False)
+  --group-by-prefix-merge-min-size GROUP_BY_PREFIX_MERGE_MIN_SIZE
+                        merge headword groups until the given minimum number
+                        of headwords is reached (default: 0, meaning no merge
+                        will take place)
+  --ignore-case         ignore headword case, all headwords will be lowercased
+                        (default: False)
+  --ignore-synonyms     ignore synonyms, not reading/writing them if present
+                        (default: False)
+  --include-index-page  include an index page (epub and mobi output only,
+                        default: False)
   --input-file-encoding INPUT_FILE_ENCODING
                         use the specified encoding for reading the raw
                         contents of input file(s) (default: 'utf-8')
   --input-parser INPUT_PARSER
                         use the specified parser function after reading the
                         raw contents of input file(s)
-  --ignore-case         ignore headword case, all headwords will be lowercased
-                        (default: False)
-  --ignore-synonyms     ignore synonyms, not reading/writing them if present
-                        (default: False)
   --kindlegen-path KINDLEGEN_PATH
                         path to kindlegen executable
   --marisa-bin-path MARISA_BIN_PATH
@@ -201,6 +210,8 @@ optional arguments:
                         | ')
   --mobi-no-kindlegen   do not run kindlegen, keep .opf and .html files
                         (default: False)
+  --no-definitions      do not output definitions for EPUB and MOBI formats
+                        (default: False)
   --sd-ignore-sametypesequence
                         ignore the value of sametypesequence in StarDict .ifo
                         files (default: False)
@@ -253,7 +264,6 @@ examples:
 
   $ penelope -i dict.xml -j xml -f en -t it -p mobi -o output.epub --epub-output-definitions
     As above, but also output definitions
-  
 ```
 
 You can find ISO 639-1 language codes [here](http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).
@@ -338,6 +348,8 @@ were released under the GNU GPL 3 License.
 * Reading EPUB (3) dictionaries is not supported; the writing part needs polishing/refactoring
 * Reading PRC/MOBI (Kindle) dictionaries is not supported
 * There are some limitations on StarDict files that can be read (see comments in `format_stardict.py`)
+* Documentation is not complete
+* Unit tests are missing
 
 
 ## Acknowledgments 
diff --git a/README.rst b/README.rst
index 1217bb5..0e733d7 100644
--- a/README.rst
+++ b/README.rst
@@ -4,8 +4,8 @@ Penelope
 **Penelope** is a multi-tool for creating, editing and converting
 dictionaries, especially for eReader devices.
 
--  Version: 3.0.1
--  Date: 2015-11-22
+-  Version: 3.1.0
+-  Date: 2015-11-29
 -  Developer: `Alberto Pettarin <http://www.albertopettarin.it/>`__
 -  License: the MIT License (MIT)
 -  Contact: `click here <http://www.albertopettarin.it/contact.html>`__
@@ -96,7 +96,8 @@ From source code
 
        $ python -m penelope
 
-This procedure will not any dependencies, see below.
+This procedure will not install any dependencies: you will need to do
+that manually, see below.
 
 Dependencies
 ~~~~~~~~~~~~
@@ -116,11 +117,11 @@ Dependencies
 
        $ [sudo] pip install marisa-trie
 
-or ```MARISA`` <https://code.google.com/p/marisa-trie/>`__ executables
+or `MARISA <https://code.google.com/p/marisa-trie/>`__ executables
 available in your ``$PATH`` or specified with ``--marisa-bin-path``
 
 -  to write MOBI Kindle dictionaries: the
-   ```kindlegen`` <https://www.amazon.com/gp/feature.html?docId=1000765211>`__
+   `kindlegen <https://www.amazon.com/gp/feature.html?docId=1000765211>`__
    executable, available in your ``$PATH`` or specified with
    ``--kindlegen-path``
 
@@ -178,6 +179,8 @@ Usage
       --title TITLE         title string
       --website WEBSITE     website string
       --year YEAR           year string
+      --apply-css APPLY_CSS
+                            apply the given CSS file (epub and mobi output only)
       --bookeen-collation-function BOOKEEN_COLLATION_FUNCTION
                             use the specified collation function
       --bookeen-install-file
@@ -189,29 +192,36 @@ Usage
       --csv-ls CSV_LS       CSV line separator (default: '\n')
       --dictzip-path DICTZIP_PATH
                             path to dictzip executable
-      --epub-escape-strings
-                            escape HTML strings (default: False)
-      --epub-group-prefix-length EPUB_GROUP_PREFIX_LENGTH
-                            group headwords by prefix of given length (default: 3)
-      --epub-merge-group-size EPUB_MERGE_GROUP_SIZE
-                            merge headword groups with less than this number of
-                            headwords (default: 128)
-      --epub-output-definitions
-                            output definitions in addition to the headwords
-                            (default: False)
+      --epub-no-compress    do not create the compressed container (epub output
+                            only, default: False)
+      --escape-strings      escape HTML strings (default: False)
       --flatten-synonyms    flatten synonyms, creating a new entry with
                             headword=synonym and using the definition of the
                             original headword (default: False)
+      --group-by-prefix-function GROUP_BY_PREFIX_FUNCTION
+                            compute the prefix of headwords using the given prefix
+                            function file
+      --group-by-prefix-length GROUP_BY_PREFIX_LENGTH
+                            group headwords by prefix of given length (default: 2)
+      --group-by-prefix-merge-across-first
+                            merge headword groups even when the first character
+                            changes (default: False)
+      --group-by-prefix-merge-min-size GROUP_BY_PREFIX_MERGE_MIN_SIZE
+                            merge headword groups until the given minimum number
+                            of headwords is reached (default: 0, meaning no merge
+                            will take place)
+      --ignore-case         ignore headword case, all headwords will be lowercased
+                            (default: False)
+      --ignore-synonyms     ignore synonyms, not reading/writing them if present
+                            (default: False)
+      --include-index-page  include an index page (epub and mobi output only,
+                            default: False)
       --input-file-encoding INPUT_FILE_ENCODING
                             use the specified encoding for reading the raw
                             contents of input file(s) (default: 'utf-8')
       --input-parser INPUT_PARSER
                             use the specified parser function after reading the
                             raw contents of input file(s)
-      --ignore-case         ignore headword case, all headwords will be lowercased
-                            (default: False)
-      --ignore-synonyms     ignore synonyms, not reading/writing them if present
-                            (default: False)
       --kindlegen-path KINDLEGEN_PATH
                             path to kindlegen executable
       --marisa-bin-path MARISA_BIN_PATH
@@ -225,6 +235,8 @@ Usage
                             | ')
       --mobi-no-kindlegen   do not run kindlegen, keep .opf and .html files
                             (default: False)
+      --no-definitions      do not output definitions for EPUB and MOBI formats
+                            (default: False)
       --sd-ignore-sametypesequence
                             ignore the value of sametypesequence in StarDict .ifo
                             files (default: False)
@@ -277,7 +289,6 @@ Usage
 
       $ penelope -i dict.xml -j xml -f en -t it -p mobi -o output.epub --epub-output-definitions
         As above, but also output definitions
-      
 
 You can find ISO 639-1 language codes
 `here <http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes>`__.
@@ -384,6 +395,8 @@ Limitations and Missing Features
 -  Reading PRC/MOBI (Kindle) dictionaries is not supported
 -  There are some limitations on StarDict files that can be read (see
    comments in ``format_stardict.py``)
+-  Documentation is not complete
+-  Unit tests are missing
 
 Acknowledgments
 ---------------
diff --git a/VERSION b/VERSION
index cb2b00e..fd2a018 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.0.1
+3.1.0
diff --git a/bin/penelope b/bin/penelope
index b810d23..7421518 100755
--- a/bin/penelope
+++ b/bin/penelope
@@ -14,7 +14,7 @@ from penelope import main as package_main
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/__init__.py b/penelope/__init__.py
index e54bbe4..d4a9326 100644
--- a/penelope/__init__.py
+++ b/penelope/__init__.py
@@ -32,7 +32,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/__main__.py b/penelope/__main__.py
index bbda8b9..596e443 100644
--- a/penelope/__main__.py
+++ b/penelope/__main__.py
@@ -31,7 +31,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/collation_default.py b/penelope/collation_default.py
index a5d7059..ae4b587 100644
--- a/penelope/collation_default.py
+++ b/penelope/collation_default.py
@@ -2,19 +2,21 @@
 # -*- coding: utf-8 -*-
 
 """
-This is the default collation function (IcuNoCase) for bookeen output format.
+This is the default collation function (IcuNoCase).
 """
 
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
 def collate_function(string1, string2):
     """
-    Implement IcuNoCase collation.
+    Implement default IcuNoCase collation,
+    by simply lowercasing the UTF-8 encoded versions
+    of the two strings.
 
     :param string1: first string
     :type  string1: unicode
diff --git a/penelope/collation_german.py b/penelope/collation_german.py
index 3c5e2c5..d647f7b 100644
--- a/penelope/collation_german.py
+++ b/penelope/collation_german.py
@@ -2,19 +2,27 @@
 # -*- coding: utf-8 -*-
 
 """
-This is a sample collation function (IcuNoCase) for German.
+This is a collation function (IcuNoCase) for German.
 """
 
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
+REPLACEMENTS = [
+    [u"ä", u"a"],
+    [u"ö", u"o"],
+    [u"ü", u"u"],
+    [u"ß", u"ss"]
+]
+
 def collate_function(string1, string2):
     """
     Implement IcuNoCase collation for German.
+    (I do not remember where the procedure comes from.)
 
     :param string1: first string
     :type  string1: unicode
@@ -26,10 +34,9 @@ def collate_function(string1, string2):
     b2 = string2.lower()
     c1 = b1
     c2 = b2
-    for f in [[u"ä", u"a"], [u"ö", u"o"], [u"ü", u"u"], [u"ß", u"ss"]]:
-        b1 = b1.replace(f[0], f[1])
-        b2 = b2.replace(f[0], f[1])
-    
+    for repl in REPLACEMENTS:
+        b1 = b1.replace(repl[0], repl[1])
+        b2 = b2.replace(repl[0], repl[1])
     if b1.encode("utf-16") == b2.encode("utf-16"):
         if c1.encode("utf-16") == c2.encode("utf-16"):
             return 0
diff --git a/penelope/command_line.py b/penelope/command_line.py
index 6e46d47..f2fa6f6 100644
--- a/penelope/command_line.py
+++ b/penelope/command_line.py
@@ -15,7 +15,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
@@ -154,6 +154,12 @@
         "action": "store"
     },
 
+    {
+        "short": None,
+        "long": "--apply-css",
+        "help": "apply the given CSS file (epub and mobi output only)",
+        "action": "store"
+    },
     {
         "short": None,
         "long": "--bookeen-collation-function",
@@ -192,44 +198,44 @@
     },
     {
         "short": None,
-        "long": "--epub-escape-strings",
-        "help": "escape HTML strings (default: False)",
+        "long": "--epub-no-compress",
+        "help": "do not create the compressed container (epub output only, default: False)",
         "action": "store_true"
     },
     {
         "short": None,
-        "long": "--epub-group-prefix-length",
-        "help": "group headwords by prefix of given length (default: 3)",
-        "action": "store"
+        "long": "--escape-strings",
+        "help": "escape HTML strings (default: False)",
+        "action": "store_true"
     },
     {
         "short": None,
-        "long": "--epub-merge-group-size",
-        "help": "merge headword groups with less than this number of headwords (default: 128)",
-        "action": "store"
+        "long": "--flatten-synonyms",
+        "help": "flatten synonyms, creating a new entry with headword=synonym and using the definition of the original headword (default: False)",
+        "action": "store_true"
     },
     {
         "short": None,
-        "long": "--epub-output-definitions",
-        "help": "output definitions in addition to the headwords (default: False)",
-        "action": "store_true"
+        "long": "--group-by-prefix-function",
+        "help": "compute the prefix of headwords using the given prefix function file",
+        "action": "store"
     },
     {
         "short": None,
-        "long": "--flatten-synonyms",
-        "help": "flatten synonyms, creating a new entry with headword=synonym and using the definition of the original headword (default: False)",
-        "action": "store_true"
+        "long": "--group-by-prefix-length",
+        "help": "group headwords by prefix of given length (default: 2)",
+        "action": "store"
     },
     {
         "short": None,
-        "long": "--input-file-encoding",
-        "help": "use the specified encoding for reading the raw contents of input file(s) (default: 'utf-8')",
-        "action": "store"
+        "long": "--group-by-prefix-merge-across-first",
+        "help": "merge headword groups even when the first character changes (default: False)",
+        "action": "store_true"
     },
     {
         "short": None,
-        "long": "--input-parser",
-        "help": "use the specified parser function after reading the raw contents of input file(s)",
+        "long": "--group-by-prefix-merge-min-size",
+        "help": "merge headword groups until the given minimum number of headwords is reached (default: 0, meaning no merge will take place)",
         "action": "store"
     },
     {
@@ -244,6 +250,24 @@
         "help": "ignore synonyms, not reading/writing them if present (default: False)",
         "action": "store_true"
     },
+    {
+        "short": None,
+        "long": "--include-index-page",
+        "help": "include an index page (epub and mobi output only, default: False)",
+        "action": "store_true"
+    },
+    {
+        "short": None,
+        "long": "--input-file-encoding",
+        "help": "use the specified encoding for reading the raw contents of input file(s) (default: 'utf-8')",
+        "action": "store"
+    },
+    {
+        "short": None,
+        "long": "--input-parser",
+        "help": "use the specified parser function after reading the raw contents of input file(s)",
+        "action": "store"
+    },
     {
         "short": None,
         "long": "--kindlegen-path",
@@ -280,6 +304,12 @@
         "help": "do not run kindlegen, keep .opf and .html files (default: False)",
         "action": "store_true"
     },
+    {
+        "short": None,
+        "long": "--no-definitions",
+        "help": "do not output definitions for EPUB and MOBI formats (default: False)",
+        "action": "store_true"
+    },
     {
         "short": None,
         "long": "--sd-ignore-sametypesequence",
@@ -442,6 +472,7 @@ def set_default_values(args):
     def set_default_value(key, value):
         if not args.__contains__(key):
             args.__dict__[key] = value
+    set_default_value("apply_css", None)
     set_default_value("bookeen_collation_function", None)
     set_default_value("bookeen_install_file", False)
     set_default_value("csv_fs", ",")
@@ -449,13 +480,16 @@ def set_default_value(key, value):
     set_default_value("csv_ls", "\n")
     set_default_value("debug", False)
     set_default_value("dictzip_path", None)
-    set_default_value("epub_escape_strings", False)
-    set_default_value("epub_group_prefix_length", 3)
-    set_default_value("epub_merge_group_size", 100)
-    set_default_value("epub_output_definitions", False)
+    set_default_value("epub_no_compress", False)
+    set_default_value("escape_strings", False)
     set_default_value("flatten_synonyms", False)
+    set_default_value("group_by_prefix_length", 2)
+    set_default_value("group_by_prefix_function", None)
+    set_default_value("group_by_prefix_merge_across_first", False)
+    set_default_value("group_by_prefix_merge_min_size", 0)
     set_default_value("ignore_case", False)
     set_default_value("ignore_synonyms", False)
+    set_default_value("include_index_page", False)
     set_default_value("input_file_encoding", "utf-8")
     set_default_value("input_parser", None)
     set_default_value("keep", False)
@@ -465,6 +499,7 @@ def set_default_value(key, value):
     set_default_value("merge_definitions", False)
     set_default_value("merge_separator", " | ")
     set_default_value("mobi_no_kindlegen", False)
+    set_default_value("no_definitions", False)
     set_default_value("sd_ignore_sametypesequence", False)
     set_default_value("sd_no_dictzip", False)
     set_default_value("sort_after", False)
diff --git a/penelope/dictionary.py b/penelope/dictionary.py
index 81c9320..386e9da 100644
--- a/penelope/dictionary.py
+++ b/penelope/dictionary.py
@@ -16,15 +16,17 @@
 
 from __future__ import absolute_import
 from io import open
+import imp
 import os
 
+from penelope.prefix_default import get_prefix as get_prefix_default
 from penelope.utilities import get_uuid
 from penelope.utilities import print_error
 
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
@@ -425,5 +427,95 @@ def default_merge_function(headword, definitions):
         # not needed, since we called self.clear()
         #self.sort(False, False, False, False)
 
+    def group(
+            self,
+            prefix_function=None,
+            prefix_function_path=None,
+            prefix_length=2,
+            merge_min_size=0,
+            merge_across_first=False
+    ):
+        """
+        Group headwords by prefix, returning a dictionary containing
+        the prefixes as keys (possibly, with a "SPECIAL" key) and
+        the dictionary entries as elements of the list associated with a key.
+
+        :param prefix_function_path: the path to a source file containing
+                                a get_prefix function, mapping a headword
+                                and the prefix_length to the prefix;
+                                if None, a default function will be used
+        :type  prefix_function_path: path
+        :param prefix_function: the function, mapping a headword
+                                and the prefix_length to the prefix;
+                                if None, a default function will be used
+        :type  prefix_function: function
+        :param prefix_length: the lenght of the prefixes
+        :type  prefix_length: int
+        :param merge_min_size: merge headword groups until the given minimum
+                               number of headwords is reached; if 0, does not merge
+        :type  merge_min_size: int
+        :param merge_across_first: if True, merge groups even when
+                             the first character changes
+        :type  merge_across_first: False
+        :rtype: (list, list, dict)
+        """
+        def return_triple(groups):
+            """
+            Return a (list_special, list, dict),
+            where the list contains the sorted keys of dict,
+            and list_special contains the list of SPECIAL entries.
+            """
+            spec = None
+            if u"SPECIAL" in groups:
+                spec = groups[u"SPECIAL"]
+                del groups[u"SPECIAL"]
+            keys = sorted(groups.keys())
+            return (spec, keys, groups)
+
+        # load the prefix function
+        get_prefix = get_prefix_default
+        if prefix_function is not None:
+            get_prefix = prefix_function
+        elif prefix_function_path is not None:
+            try:
+                get_prefix = imp.load_source("", prefix_function).get_prefix
+            except:
+                pass
+
+        # create groups
+        raw_groups = {}
+        for index in self.entries_index_sorted:
+            entry = self.entries[index]
+            prefix = get_prefix(entry.headword, prefix_length)
+            if not prefix in raw_groups:
+                raw_groups[prefix] = []
+            raw_groups[prefix].append(self.entries[index])
+
+        # if no merge is requested, return
+        if merge_min_size == 0:
+            return return_triple(raw_groups)
+
+        # merge small groups
+        merged_groups = {}
+        if u"SPECIAL" in raw_groups:
+            # special is never merged
+            merged_groups[u"SPECIAL"] = raw_groups[u"SPECIAL"]
+            del raw_groups[u"SPECIAL"]
+        keys = sorted(raw_groups.keys())
+        accumulator_key = keys[0]
+        accumulator = raw_groups[accumulator_key]
+        for key in keys[1:]:
+            if (
+                    (len(accumulator) >= merge_min_size) or
+                    ((not merge_across_first) and (key[0] != accumulator_key[0]))
+                ):
+                merged_groups[accumulator_key] = accumulator
+                accumulator_key = key
+                accumulator = raw_groups[accumulator_key]
+            else:
+                accumulator += raw_groups[key]
+        merged_groups[accumulator_key] = accumulator
+        return return_triple(merged_groups)
+
 
 
diff --git a/penelope/dictionary_ebook.py b/penelope/dictionary_ebook.py
new file mode 100644
index 0000000..8c9ca3a
--- /dev/null
+++ b/penelope/dictionary_ebook.py
@@ -0,0 +1,523 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+"""
+DictionaryEbook represents a dictionary ebook
+in EPUB 2 and MOBI format.
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+from io import open
+import os
+import zipfile
+
+from penelope.utilities import create_temp_directory
+from penelope.utilities import delete_directory
+
+__author__ = "Alberto Pettarin"
+__copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
+__license__ = "MIT"
+__version__ = "3.1.0"
+__email__ = "alberto@albertopettarin.it"
+__status__ = "Production"
+
+class DictionaryEbook():
+    """
+    A class representing a generic ebook containing a dictionary.
+
+    It can be used to output a MOBI or an EPUB 2 container.
+
+    The ebook must have an OPF, and one or more group XHTML files.
+
+    Optionally, it can have a cover image, an NCX TOC, an index XHTML file.
+
+    The actual file templates are provided by the caller.
+    """
+
+    EPUB2 = u"epub2"
+
+    #EPUB3 = u"epub3"
+
+    MOBI = u"mobi"
+
+    GROUP_START_INDEX = 2
+
+    MIMETYPE_CONTENTS = u"application/epub+zip"
+
+    CONTAINER_XML_CONTENTS = u"""<?xml version="1.0" encoding="UTF-8" ?>
+<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
+   <rootfiles>
+      <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
+   </rootfiles>
+</container>"""
+
+    EPUB_CSS_CONTENTS = u"""@charset "UTF-8";
+body {
+  margin: 10px 25px 10px 25px;
+}  
+h1 {
+  font-size: 200%;
+}
+h2 {
+  font-size: 150%;
+}
+p {
+  margin-left: 0em;
+  margin-right: 0em;
+  margin-top: 0em;
+  margin-bottom: 0em;
+  line-height: 2em;
+  text-align: justify;
+}
+a, a:focus, a:active, a:visited {
+  color: black;
+  text-decoration: none;
+}
+body.indexPage {}
+h1.indexTitle {}
+p.indexGroups {
+  font-size: 150%;
+}
+span.indexGroup {}
+body.groupPage {}
+h1.groupTitle {}
+div.groupNavigation {}
+span.groupHeadword {}
+div.groupEntry {
+  margin-top: 0;
+  margin-bottom: 1em;
+}
+h2.groupHeadword {
+  margin-left: 5%;
+}
+p.groupDefinition {
+  margin-left: 10%;
+  margin-right: 10%;
+}
+"""
+
+    MOBI_CSS_CONTENTS = u""""@charset "UTF-8";"""
+
+    INDEX_XHTML_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+  <title>%s</title>
+  <link rel="stylesheet" type="text/css" href="style.css" />
+ </head>
+ <body class="indexPage">
+  <h1 class="indexTitle">%s</h1>
+  <p class="indexGroupss">
+%s
+  </p>
+ </body>
+</html>"""
+    INDEX_XHTML_LINK_TEMPLATE = u"""   <span class="indexGroup"><a href=\"%s\">%s</a></span>"""
+
+    INDEX_XHTML_LINK_JOINER = u" &#8226;\n"
+
+    EPUB_GROUP_XHTML_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+  <title>%s</title>
+  <link rel="stylesheet" type="text/css" href="style.css" />
+ </head>
+ <body id="groupPage" class="groupPage">
+  <h1 class="groupTitle">%s</h1>
+  <div class="groupNavigation">
+   <a href="%s">[ Previous ]</a>
+%s
+   <a href="%s">[ Next ]</a>
+  </div>
+%s
+ </body>
+</html>"""
+    EPUB_GROUP_XHTML_INDEX_LINK = u"""   <a href="index.xhtml">[ Index ]</a>"""
+
+    EPUB_GROUP_XHTML_WORD_TEMPLATE = u"""   <span class="groupHeadword">%s</span>"""
+
+    EPUB_GROUP_XHTML_WORD_JOINER = u" &#8226;\n"
+
+    EPUB_GROUP_XHTML_WORD_DEFINITION_TEMPLATE = u"""  <div class="groupEntry">
+   <h2 class="groupHeadword">%s</h2>
+   <p class="groupDefinition">%s</p>
+  </div>"""
+
+    EPUB_GROUP_XHTML_WORD_DEFINITION_JOINER = u"\n"
+
+    MOBI_GROUP_XHTML_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+  <title>%s</title>
+  <link rel="stylesheet" type="text/css" href="style.css" />
+ </head>
+ <body id="groupPage" class="groupPage">
+  <h1 class="groupTitle">%s</h1>
+  <div class="groupNavigation">
+   <a href="%s">[ Previous ]</a>
+%s
+   <a href="%s">[ Next ]</a>
+  </div>
+%s
+ </body>
+</html>"""
+
+    MOBI_GROUP_XHTML_INDEX_LINK = u"""   <a href="index.xhtml">[ Index ]</a>"""
+
+    MOBI_GROUP_XHTML_WORD_TEMPLATE = u"""   <span class="groupHeadword"><idx:entry><idx:orth>%s</idx:orth></idx:entry></span>"""
+
+    MOBI_GROUP_XHTML_WORD_JOINER = u" &#8226;\n"
+
+    MOBI_GROUP_XHTML_WORD_DEFINITION_TEMPLATE = u"""  <div class="groupEntry">
+   <idx:entry>
+    <h2 class="groupHeadword"><idx:orth>%s</idx:orth></h2>
+    <p class="groupDefinition">%s</p>
+   </idx:entry>
+  </div>"""
+
+    MOBI_GROUP_XHTML_WORD_DEFINITION_JOINER = u"\n"
+
+    EPUB2_OPF_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" ?>
+<package xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="uid">
+ <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">
+  <dc:identifier id="uid" opf:scheme="uuid">%s</dc:identifier>
+  <dc:language>%s</dc:language>
+  <dc:title>%s</dc:title>
+  <dc:creator opf:role="aut">%s</dc:creator>
+  <dc:rights>%s</dc:rights>
+  <dc:date opf:event="creation">%s-01-01</dc:date>
+%s
+ </metadata>
+ <manifest>
+%s
+ </manifest>
+ <spine toc="toc.ncx">
+%s
+ </spine>
+</package>"""
+
+    MOBI_OPF_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8"?>
+<package unique-identifier="uid">
+ <metadata>
+  <dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
+   <dc:Title>%s</dc:Title>
+   <dc:Language>%s</dc:Language>
+   <dc:Identifier id="uid">%s</dc:Identifier>
+   <dc:Creator>%s</dc:Creator>
+   <dc:Rights>%s</dc:Rights>
+   <dc:Subject BASICCode="REF008000">Dictionaries</dc:Subject>
+  </dc-metadata>
+  <x-metadata>
+   <output encoding="utf-8"></output>
+   <DictionaryInLanguage>%s</DictionaryInLanguage>
+   <DictionaryOutLanguage>%s</DictionaryOutLanguage>
+   <EmbeddedCover>%s</EmbeddedCover>
+  </x-metadata>
+ </metadata>
+ <manifest>
+%s
+ </manifest>
+ <spine>
+%s
+ </spine>
+ <tours></tours>
+ <guide></guide>
+</package>"""
+
+    OPF_MANIFEST_ITEM_TEMPLATE = u"""  <item href="%s" id="%s" media-type="%s" />"""
+
+    OPF_SPINE_ITEMREF_TEMPLATE = u"""  <itemref idref="%s" />"""
+
+    NCX_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
+ <head>
+  <meta name="dtb:uid" content="%s" />
+  <meta name="dtb:depth" content="1" />
+  <meta name="dtb:totalPageCount" content="0" />
+  <meta name="dtb:maxPageNumber" content="0" />
+ </head>
+ <docTitle>
+  <text>%s</text>
+ </docTitle>
+ <navMap>
+%s
+ </navMap>
+</ncx>"""
+
+    NCX_NAVPOINT_TEMPLATE = u""" <navPoint id="n%06d" playOrder="%d">
+  <navLabel>
+   <text>%s</text>
+  </navLabel>
+  <content src="%s" />
+ </navPoint>"""
+
+    def __init__(self, ebook_format, args):
+        self.ebook_format = ebook_format
+        self.args = args
+        self.root_directory_path = None
+        self.cover = None
+        self.files = []
+        self.manifest_files = []
+        self.groups = []
+
+    def get_tmp_path(self):
+        if self.root_directory_path is not None:
+            return self.root_directory_path
+        return u""
+
+    def delete(self):
+        if self.root_directory_path is not None:
+            delete_directory(self.root_directory_path)
+
+    def add_file(self, relative_path, contents, mode=zipfile.ZIP_DEFLATED):
+        file_path = os.path.join(self.root_directory_path, relative_path)
+        file_obj = open(file_path, "wb")
+        try:
+            # Python 2
+            if isinstance(contents, unicode):
+                contents = contents.encode("utf-8")
+        except NameError:
+            # Python 3
+            if isinstance(contents, str):
+                contents = contents.encode("utf-8")
+        except:
+            # should not occur
+            pass
+        file_obj.write(contents)
+        file_obj.close()
+        self.files.append({"path": relative_path, "mode": mode})
+
+    def write_cover(self, cover_path_absolute):
+        if cover_path_absolute is not None:
+            try:
+                basename = os.path.basename(cover_path_absolute)
+                cover_obj = open(cover_path_absolute, "rb")
+                cover = cover_obj.read()
+                cover_obj.close()
+                b = basename.lower()
+                mimetype = "image/jpeg"
+                if b.endswith(".png"):
+                    mimetype = "image/png"
+                elif b.endswith(".gif"):
+                    mimetype = "image/gif"
+                self.add_file_manifest(u"OEBPS/%s" % basename, basename, cover, mimetype)
+                self.cover = basename
+            except:
+                pass
+
+    def write_css(self, custom_css_path_absolute):
+        if self.ebook_format == self.MOBI:
+            css = self.MOBI_CSS_CONTENTS
+        else:
+            css = self.EPUB_CSS_CONTENTS
+        if custom_css_path_absolute is not None:
+            try:
+                css_obj = open(custom_css_path_absolute, "rb")
+                css = css_obj.read()
+                css_obj.close()
+            except:
+                pass
+        self.add_file_manifest(u"OEBPS/style.css", u"style.css", css, "text/css")
+
+    def add_file_manifest(self, relative_path, id, contents, mimetype):
+        self.add_file(relative_path, contents)
+        self.manifest_files.append({"path": relative_path, "id": id, "mimetype": mimetype})
+
+    def get_group_xhtml_file_name_from_index(self, index):
+        if (index < self.GROUP_START_INDEX) or (index >= len(self.groups) + self.GROUP_START_INDEX):
+            return u"#groupPage"
+        return u"g%06d.xhtml" % index
+
+    def add_group(self, key, entries):
+        self.groups.append({"key": key, "entries": entries})
+
+    def write_groups(self):
+        if self.ebook_format == self.MOBI:
+            group_template = self.MOBI_GROUP_XHTML_TEMPLATE
+            if self.args.include_index_page:
+                index_link = self.MOBI_GROUP_XHTML_INDEX_LINK
+            else:
+                index_link = u""
+            word_template = self.MOBI_GROUP_XHTML_WORD_TEMPLATE
+            word_joiner = self.MOBI_GROUP_XHTML_WORD_JOINER
+            word_definition_template = self.MOBI_GROUP_XHTML_WORD_DEFINITION_TEMPLATE
+            word_definition_joiner = self.MOBI_GROUP_XHTML_WORD_DEFINITION_JOINER
+        else:
+            group_template = self.EPUB_GROUP_XHTML_TEMPLATE
+            if self.args.include_index_page:
+                index_link = self.EPUB_GROUP_XHTML_INDEX_LINK
+            else:
+                index_link = u""
+            word_template = self.EPUB_GROUP_XHTML_WORD_TEMPLATE
+            word_joiner = self.EPUB_GROUP_XHTML_WORD_JOINER
+            word_definition_template = self.EPUB_GROUP_XHTML_WORD_DEFINITION_TEMPLATE
+            word_definition_joiner = self.EPUB_GROUP_XHTML_WORD_DEFINITION_JOINER
+
+        index = self.GROUP_START_INDEX
+        for group in self.groups:
+            group_label = self.get_group_label(group)
+            group_xhtml_path = self.get_group_xhtml_file_name_from_index(index)
+            previous_link = self.get_group_xhtml_file_name_from_index(index - 1)
+            next_link = self.get_group_xhtml_file_name_from_index(index + 1)
+            group_contents = []
+            if self.args.no_definitions:
+                for entry in group["entries"]:
+                    headword = self.escape_if_needed(entry.headword)
+                    group_contents.append(word_template % (headword))
+                group_contents = word_joiner.join(group_contents)
+            else:
+                for entry in group["entries"]:
+                    headword = self.escape_if_needed(entry.headword)
+                    definition = self.escape_if_needed(entry.definition)
+                    group_contents.append(word_definition_template % (headword, definition))
+                group_contents = word_definition_joiner.join(group_contents)
+            group_contents = group_template % (group_label, group_label, previous_link, index_link, next_link, group_contents)
+            self.add_file_manifest(u"OEBPS/%s" % group_xhtml_path, group_xhtml_path, group_contents, u"application/xhtml+xml")
+            index += 1 
+
+    def escape_if_needed(self, string):
+        def html_escape(s):
+            x = s
+            x = x.replace("&", "&amp;")
+            x = x.replace('"', "&quot;")
+            x = x.replace("'", "&apos;")
+            x = x.replace(">", "&gt;")
+            x = x.replace("<", "&lt;")
+            return x
+        if self.args.escape_strings:
+            return html_escape(string)
+        return string
+
+    def get_group_label(self, group):
+        group_label = group["key"]
+        if group_label != u"SPECIAL":
+            group_label = "%s&#8211;%s" % (group["entries"][0].headword, group["entries"][-1].headword)
+        return group_label
+
+    def write_index(self):
+        links = []
+        index = self.GROUP_START_INDEX
+        for group in self.groups:
+            group_label = self.get_group_label(group)
+            group_xhtml_path = self.get_group_xhtml_file_name_from_index(index)
+            group_link = self.INDEX_XHTML_LINK_TEMPLATE % (group_xhtml_path, group_label)
+            links.append(group_link)
+            index += 1
+        links = self.INDEX_XHTML_LINK_JOINER.join(links)
+        contents = self.INDEX_XHTML_TEMPLATE % (self.args.title, self.args.title, links)
+        self.add_file_manifest(u"OEBPS/index.xhtml", u"index.xhtml", contents, u"application/xhtml+xml")
+
+    def write_opf(self):
+        manifest_contents = []
+        spine_contents = []
+        for mi in self.manifest_files:
+            manifest_contents.append(self.OPF_MANIFEST_ITEM_TEMPLATE % (mi["id"], mi["id"], mi["mimetype"]))
+            if mi["mimetype"] == u"application/xhtml+xml":
+                spine_contents.append(self.OPF_SPINE_ITEMREF_TEMPLATE % (mi["id"]))
+        manifest_contents = u"\n".join(manifest_contents)
+        spine_contents = u"\n".join(spine_contents)
+        cover = u""
+        if self.ebook_format == self.MOBI:
+            if self.cover is not None:
+                cover = self.cover
+            opf_contents = self.MOBI_OPF_TEMPLATE % (
+                self.args.title,
+                self.args.language_from,
+                self.args.identifier,
+                self.args.author,
+                self.args.copyright,
+                self.args.language_from,
+                self.args.language_to,
+                cover,
+                manifest_contents,
+                spine_contents
+            )
+        else:
+            if self.cover is not None:
+                cover = u"""  <meta name="cover" content="%s" />""" % self.cover
+            opf_contents = self.EPUB2_OPF_TEMPLATE % (
+                self.args.identifier,
+                self.args.language_from,
+                self.args.title,
+                self.args.author,
+                self.args.copyright,
+                self.args.year,
+                cover,
+                manifest_contents,
+                spine_contents
+            )
+        self.add_file("OEBPS/content.opf", opf_contents)
+
+    def write_ncx(self):
+        ncx_items = []
+        index = 1
+        if self.args.include_index_page:
+            ncx_items.append(self.NCX_NAVPOINT_TEMPLATE % (index, index, "Index", "index.xhtml"))
+            index += 1
+        for group in self.groups:
+            group_label = self.get_group_label(group)
+            group_xhtml_path = self.get_group_xhtml_file_name_from_index(index)
+            ncx_items.append(self.NCX_NAVPOINT_TEMPLATE % (index, index, group_label, group_xhtml_path))
+            index += 1
+        ncx_items = u"\n".join(ncx_items)
+        ncx_contents = self.NCX_TEMPLATE % (self.args.identifier, self.args.title, ncx_items)
+        self.add_file_manifest(u"OEBPS/toc.ncx", u"toc.ncx", ncx_contents, u"application/x-dtbncx+xml")
+
+    def write(self, file_path_absolute, compress=True): 
+        # get cover path
+        cover_path_absolute = self.args.cover_path
+        if cover_path_absolute is not None:
+            cover_path_absolute = os.path.abspath(cover_path_absolute)
+
+        # get custom css path
+        custom_css_path_absolute = self.args.apply_css
+        if custom_css_path_absolute is not None:
+            custom_css_path_absolute = os.path.abspath(custom_css_path_absolute)
+
+        # create new tmp directory and cd there
+        self.root_directory_path = create_temp_directory()
+        cwd = os.getcwd()
+        os.chdir(self.root_directory_path)
+        os.makedirs(u"META-INF")
+        os.makedirs(u"OEBPS")
+
+        # add mimetype and container.xml
+        if self.ebook_format in [self.EPUB2]: # add EPUB3 here
+            self.add_file(u"mimetype", self.MIMETYPE_CONTENTS, mode=zipfile.ZIP_STORED)
+            self.add_file(u"META-INF/container.xml", self.CONTAINER_XML_CONTENTS)
+
+        # add cover
+        self.write_cover(cover_path_absolute)
+
+        # write CSS
+        self.write_css(custom_css_path_absolute)
+
+        # write index
+        if self.args.include_index_page:
+            self.write_index()
+
+        # write groups
+        self.write_groups()
+
+        # write ncx
+        if self.ebook_format in [self.EPUB2]: # add EPUB3 here
+            self.write_ncx()
+
+        # write opf
+        self.write_opf()
+
+        # compress 
+        if compress:
+            output_file_obj = zipfile.ZipFile(file_path_absolute, "w", compression=zipfile.ZIP_DEFLATED)
+            for file_to_compress in self.files:
+                output_file_obj.write(file_to_compress["path"], compress_type=file_to_compress["mode"])
+            output_file_obj.close()
+
+        # return to previous cwd
+        os.chdir(cwd)
+
+
diff --git a/penelope/format_bookeen.py b/penelope/format_bookeen.py
index 536534a..8db3eac 100644
--- a/penelope/format_bookeen.py
+++ b/penelope/format_bookeen.py
@@ -12,6 +12,7 @@
 import sqlite3
 import zipfile
 
+from penelope.collation_default import collate_function as collate_function_default
 from penelope.utilities import print_debug
 from penelope.utilities import print_error
 from penelope.utilities import print_info
@@ -22,14 +23,13 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
 CHUNK_FILE_PREFIX = "c_"
 CHUNK_SIZE = 262144 # 2^18
-COLLATION_DEFAULT = os.path.join(os.path.split(__file__)[0], "collation_default.py")
-EMPTY_FILE_PATH = os.path.join(os.path.split(__file__)[0], "res/empty.idx")
+EMPTY_FILE_PATH = os.path.join(os.path.split(os.path.abspath(__file__))[0], "res/empty.idx")
 HEADER = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"  \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\" [<!ENTITY ns \"&#8226;\">]><html xml:lang=\"%s\" xmlns=\"http://www.w3.org/1999/xhtml\"><head><title></title></head><body>"
 
 def read(dictionary, args, input_file_string):
@@ -166,9 +166,19 @@ def write(dictionary, args, output_file_path):
     # result to be returned
     result = None
 
+    # get absolute path
+    output_file_path_absolute = os.path.abspath(output_file_path)
+
+    # get absolute path for collation function file 
+    bookeen_collation_function_path = None
+    if args.bookeen_collation_function is not None:
+        bookeen_collation_function_path = os.path.abspath(args.bookeen_collation_function)
+
     # create tmp directory
+    cwd = os.getcwd()
     tmp_path = create_temp_directory()
     print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
+    os.chdir(tmp_path)
 
     # get the basename
     base = os.path.basename(output_file_path)
@@ -176,22 +186,22 @@ def write(dictionary, args, output_file_path):
         base = base[:-4]
 
     # copy empty.idx into tmp_path
-    idx_file_path = os.path.join(tmp_path, base + u".dict.idx")
-    dict_file_path = os.path.join(tmp_path, base + u".dict")
+    idx_file_path = base + u".dict.idx"
+    dict_file_path = base + u".dict"
     copy_file(EMPTY_FILE_PATH, idx_file_path)
 
     # open index
     sql_connection = sqlite3.connect(idx_file_path)
 
     # install collation in the index
-    collation = imp.load_source("", COLLATION_DEFAULT)
-    if args.bookeen_collation_function is not None:
+    collation_function = collate_function_default
+    if bookeen_collation_function_path is not None:
         try:
-            collation = imp.load_source("", args.bookeen_collation_function)
-            print_debug("Using collation function from '%s'" % (args.bookeen_collation_function), args.debug)
+            collation_function = imp.load_source("", bookeen_collation_function_path).collate_function
+            print_debug("Using collation function from '%s'" % (bookeen_collation_function_path), args.debug)
         except:
-            print_error("Unable to load collation function from '%s'. Using the default collation function instead." % (args.bookeen_collation_function))
-    sql_connection.create_collation("IcuNoCase", collation.collate_function)
+            print_error("Unable to load collation function from '%s'. Using the default collation function instead." % (bookeen_collation_function_path))
+    sql_connection.create_collation("IcuNoCase", collation_function)
     sql_connection.text_factory = str
 
     # get a cursor and delete any data from the index file
@@ -204,7 +214,7 @@ def write(dictionary, args, output_file_path):
     files_to_compress = []
     current_offset = 0
     chunk_index = 1
-    chunk_file_path = os.path.join(tmp_path, "%s%d" % (CHUNK_FILE_PREFIX, chunk_index))
+    chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index)
     files_to_compress.append(chunk_file_path)
     chunk_file_obj = open(chunk_file_path, "wb")
     for entry_index in dictionary.entries_index_sorted:
@@ -226,7 +236,7 @@ def write(dictionary, args, output_file_path):
         if current_offset > CHUNK_SIZE:
             chunk_file_obj.close()
             chunk_index += 1
-            chunk_file_path = os.path.join(tmp_path, "%s%d" % (CHUNK_FILE_PREFIX, chunk_index))
+            chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index)
             files_to_compress.append(chunk_file_path)
             chunk_file_obj = open(chunk_file_path, "wb")
             current_offset = 0
@@ -235,14 +245,11 @@ def write(dictionary, args, output_file_path):
 
     # compress
     print_debug("Compressing c_* files...", args.debug)
-    cwd = os.getcwd()
-    os.chdir(tmp_path)
     file_zip_obj = zipfile.ZipFile(dict_file_path, "w", zipfile.ZIP_DEFLATED)
     for file_to_compress in files_to_compress:
         file_to_compress = os.path.basename(file_to_compress)
         file_zip_obj.write(file_to_compress)
     file_zip_obj.close()
-    os.chdir(cwd)
     print_debug("Compressing c_* files... done", args.debug)
 
     # update index metadata
@@ -269,18 +276,15 @@ def write(dictionary, args, output_file_path):
     sql_connection.close()
 
     # create .install file or copy .dict.idx and .dict into requested output directory
-    parent_output_directory = os.path.split(output_file_path)[0]
+    parent_output_directory = os.path.split(output_file_path_absolute)[0]
     if args.bookeen_install_file:
         print_debug("Creating .install file...", args.debug)
-        cwd = os.getcwd()
-        os.chdir(tmp_path)
         file_zip_path = os.path.join(parent_output_directory, base + u".install")
         file_zip_obj = zipfile.ZipFile(file_zip_path, "w", zipfile.ZIP_DEFLATED)
         for file_to_compress in [dict_file_path, idx_file_path]:
             file_to_compress = os.path.basename(file_to_compress)
             file_zip_obj.write(file_to_compress)
         file_zip_obj.close()
-        os.chdir(cwd)
         result = [file_zip_path]
         print_debug("Creating .install file... done", args.debug)
     else:
@@ -293,6 +297,7 @@ def write(dictionary, args, output_file_path):
         print_debug("Copying .dict.idx and .dict files... done", args.debug)
 
     # delete tmp directory
+    os.chdir(cwd)
     if args.keep:
         print_info("Not deleting temp dir '%s'" % (tmp_path))
     else:
diff --git a/penelope/format_csv.py b/penelope/format_csv.py
index 630aeb9..04dff88 100644
--- a/penelope/format_csv.py
+++ b/penelope/format_csv.py
@@ -14,7 +14,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/format_epub.py b/penelope/format_epub.py
index ac94a8f..40fdfce 100644
--- a/penelope/format_epub.py
+++ b/penelope/format_epub.py
@@ -12,395 +12,77 @@
 import os
 import zipfile
 
+from penelope.dictionary_ebook import DictionaryEbook 
+from penelope.utilities import create_temp_directory
+from penelope.utilities import delete_directory
 from penelope.utilities import print_debug
 from penelope.utilities import print_error
 from penelope.utilities import print_info
-from penelope.utilities import create_temp_directory
-from penelope.utilities import copy_file
-from penelope.utilities import delete_directory
-from penelope.utilities import rename_file
 
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
-CONTAINER_TEMPLATE = u"""<?xml version="1.0" encoding="UTF-8" ?>
-<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
-   <rootfiles>
-      <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
-   </rootfiles>
-</container>"""
-
-OPF_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" ?>
-<package xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="uuid_id">
- <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">
-  <dc:identifier id="uuid_id" opf:scheme="uuid">%s</dc:identifier>
-  <dc:language>%s</dc:language>
-  <dc:title>%s</dc:title>
-  <dc:creator opf:role="aut">%s</dc:creator>
-  <dc:rights>%s</dc:rights>
-  <dc:date opf:event="creation">%s-01-01</dc:date>
- </metadata>
- <manifest>
-  <item href="style.css" id="css" media-type="text/css" />
-  <item href="toc.ncx"   id="ncx" media-type="application/x-dtbncx+xml" />
-%s
- </manifest>
- <spine toc="ncx">
-%s
- </spine>
-</package>"""
-
-MANIFEST_ITEM_TEMPLATE = u"""  <item href="%s" id="%s" media-type="application/xhtml+xml" />"""
-
-SPINE_ITEM_TEMPLATE = u"""  <itemref idref="%s" />"""
-
-NCX_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
-<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
- <head>
-  <meta name="dtb:uid" content="%s" />
-  <meta name="dtb:depth" content="1" />
-  <meta name="dtb:totalPageCount" content="0" />
-  <meta name="dtb:maxPageNumber" content="0" />
- </head>
- <docTitle>
-  <text>%s</text>
- </docTitle>
- <navMap>
-%s
- </navMap>
-</ncx>"""
-
-NCX_NAVPOINT_TEMPLATE = u""" <navPoint id="n%06d" playOrder="%d">
-  <navLabel>
-   <text>%s</text>
-  </navLabel>
-  <content src="%s" />
- </navPoint>"""
-
-CSS_TEMPLATE = u"""@charset "UTF-8";
-body {
-  margin: 10px 25px 10px 25px;
-}  
-h1 {
-  font-size: 200%;
-}
-p {
-  margin-left: 0em;
-  margin-right: 0em;
-  margin-top: 0em;
-  margin-bottom: 0em;
-  line-height: 2em;
-  text-align: justify;
-}
-a, a:focus, a:active, a:visited {
-  color: black;
-  text-decoration: none;
-}
-/*
-span {
-  margin: 0px 10px 0px 10px;
-  padding: 2px 2px 2px 2px;
-  border: solid 1px black;
-}
-body.index {
-  margin: 10px 50px 10px 50px;
-}
-body.letter {
-  margin: 10px 50px 10px 50px;
-}
-*/
-p.index {
-  font-size: 150%;
-}
-p.letter {
-  font-size: 150%;
-}
-
-div p {
-  margin-left: 25px;
-  margin-rigth: 25px;
-}
-
-div {
-  margin-top: 10px;
-  margin-bottom: 10px;
-}"""
-
-INDEX_XHTML_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" standalone="no"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
- <head>
-  <title>%s</title>
-  <link rel="stylesheet" type="text/css" href="style.css" />
- </head>
- <body class="index">
-  <h1>%s</h1>
-  <p class="index">
-%s
-  </p>
- </body>
-</html>"""
-
-INDEX_XHTML_LINK_TEMPLATE = u"""   <span><a href=\"%s\">%s</a></span>"""
-
-GROUP_XHTML_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8" standalone="no"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
- <head>
-  <title>%s</title>
-  <link rel="stylesheet" type="text/css" href="style.css" />
- </head>
- <body>
-  <h1>%s</h1>
-  <h3>
-   <a href="%s">[ Previous ]</a>
-   <a href="index.xhtml">[ Index ]</a>
-   <a href="%s">[ Next ]</a>
-  </h3>
-%s
- </body>
-</html>"""
-
-GROUP_XHTML_WORD_TEMPLATE = u"""   <span>%s</span>"""
-
-GROUP_XHTML_WORD_DEFINITION_TEMPLATE = u"""  <div>
-   <h4>%s</h4>
-   <p>%s</p>
-  </div>"""
-
 def read(dictionary, args, input_file_paths):
     print_error("Read function not implemented for EPUB dictionaries")
     return None
 
 def write(dictionary, args, output_file_path):
-    def get_prefix(headword, length):
-        lowercased = headword.lower()
-        if ord(lowercased[0]) < 97:
-            return u"SPECIAL"
-        if len(lowercased) < length:
-            return lowercased
-        return lowercased[0:length]
-
-    def html_escape(s):
-        x = s
-        x = x.replace("&", "&amp;")
-        x = x.replace('"', "&quot;")
-        x = x.replace("'", "&apos;")
-        x = x.replace(">", "&gt;")
-        x = x.replace("<", "&lt;")
-        return x
-
     # result to be returned
     result = None
 
-    # create tmp directory
-    cwd = os.getcwd()
-    tmp_path = create_temp_directory()
-    os.chdir(tmp_path)
-
-    # get the basename
-    files_to_compress = []
-    base = os.path.basename(output_file_path)
-    if base.endswith(".epub"):
-        base = base[:-5]
-
-    # create directories
-    os.makedirs(u"META-INF")
-    os.makedirs(u"OEBPS")
-
-    # create mimetype
-    file_mimetype_rel_path = u"mimetype"
-    file_mimetype_obj = open(file_mimetype_rel_path, "wb")
-    file_mimetype_obj.write(u"application/epub+zip")
-    file_mimetype_obj.close()
-
-    # create container.xml
-    file_container_rel_path = u"META-INF/container.xml"
-    file_container_obj = open(file_container_rel_path, "wb")
-    file_container_obj.write(CONTAINER_TEMPLATE.encode("utf-8"))
-    file_container_obj.close()
-    files_to_compress.append(file_container_rel_path)
+    # get absolute path
+    output_file_path_absolute = os.path.abspath(output_file_path)
 
     # sort by headword, optionally ignoring case
     dictionary.sort(by_headword=True, ignore_case=args.sort_ignore_case)
 
     # create groups
-    all_entries = []
-    groups = {}
-    i = 0
-    for index in dictionary.entries_index_sorted:
-        entry = dictionary.entries[index]
-        all_entries.append(entry)
-        prefix = get_prefix(entry.headword, int(args.epub_group_prefix_length))
-        if not prefix in groups:
-            groups[prefix] = []
-        groups[prefix].append(i)
-        i += 1
-
-    # merge small groups
-    merged_groups = []
-    keys = sorted(groups.keys())
-    accumulator_key = keys[0]
-    accumulator = groups[accumulator_key]
-    for key in keys[1:]:
-        if (len(accumulator) >= int(args.epub_merge_group_size)) or (key[0] != accumulator_key[0]):
-            merged_groups.append([accumulator_key, accumulator])
-            accumulator_key = key
-            accumulator = groups[accumulator_key]
-        else:
-            accumulator += groups[key]
-    merged_groups.append([accumulator_key, accumulator])
-
-    # create xhtml files
-    manifest_items = []
-    spine_items = []
-    ncx_items = []
-
-    i = 1
-    file_xhtml_rel_path_base = u"index.xhtml"
-    file_xhtml_rel_path = u"OEBPS/%s" % file_xhtml_rel_path_base
-    file_xhtml_obj = open(file_xhtml_rel_path, "wb")
-    j = 2
-    group_links = []
-    for group in merged_groups:
-        key = group[0]
-        group_links.append(INDEX_XHTML_LINK_TEMPLATE % (u"g%06d.xhtml" % (j), key))
-        j += 1
-    xhtml_content = INDEX_XHTML_TEMPLATE % (
-        args.title,
-        args.title,
-        " &#8226;\n".join(group_links)
+    special_group, group_keys, group_dict = dictionary.group(
+        prefix_function_path=args.group_by_prefix_function,
+        prefix_length=int(args.group_by_prefix_length),
+        merge_min_size=int(args.group_by_prefix_merge_min_size),
+        merge_across_first=args.group_by_prefix_merge_across_first
     )
-    file_xhtml_obj.write(xhtml_content.encode("utf-8"))
-    file_xhtml_obj.close()
-    files_to_compress.append(file_xhtml_rel_path)
-    manifest_items.append(MANIFEST_ITEM_TEMPLATE % (file_xhtml_rel_path_base, file_xhtml_rel_path_base))
-    spine_items.append(SPINE_ITEM_TEMPLATE % (file_xhtml_rel_path_base))
-    ncx_items.append(NCX_NAVPOINT_TEMPLATE % (i, i, "Table of Contents", file_xhtml_rel_path_base))
-
-    i = 2
-    for group in merged_groups:
-        key = group[0]
-        entry_indices = group[1]
-        file_xhtml_rel_path_base = u"g%06d.xhtml" % i
-        file_xhtml_rel_path = u"OEBPS/%s" % file_xhtml_rel_path_base
-        file_xhtml_obj = open(file_xhtml_rel_path, "wb")
-        page_title = u"%s" % (key)
-        if i == 2:
-            prev_path = u"#"
-        else:
-            prev_path = u"g%06d.xhtml" % (i - 1)
-        if i + 1 < len(merged_groups) + 2:
-            next_path = u"g%06d.xhtml" % (i + 1)
+    all_group_keys = group_keys
+    if special_group is not None:
+        all_group_keys += [u"SPECIAL"]
+
+    # create epub object
+    epub = DictionaryEbook(ebook_format=DictionaryEbook.EPUB2, args=args)
+   
+    # add groups
+    for key in all_group_keys:
+        if key == u"SPECIAL":
+            group_entries = special_group
         else:
-            next_path = u"#"
-        words = []
-        for entry_index in entry_indices:
-            if args.epub_output_definitions:
-                headword = all_entries[entry_index].headword
-                if args.epub_escape_strings:
-                    headword = html_escape(headword)
-                definition = all_entries[entry_index].definition
-                if args.epub_escape_strings:
-                    definition = html_escape(definition)
-                words.append(GROUP_XHTML_WORD_DEFINITION_TEMPLATE % (headword, definition))
-            else:
-                headword = all_entries[entry_index].headword
-                if args.epub_escape_strings:
-                    headword = html_escape(headword)
-                words.append(GROUP_XHTML_WORD_TEMPLATE % (headword))
-        if args.epub_output_definitions:
-            words = u"\n".join(words)
-        else:
-            words = u"<p>%s</p>" % (u" &#8226;\n".join(words))
-        xhtml_content = GROUP_XHTML_TEMPLATE % (
-            page_title,
-            page_title,
-            prev_path,
-            next_path,
-            words
-        )
-        file_xhtml_obj.write(xhtml_content.encode("utf-8"))
-        file_xhtml_obj.close()
-        files_to_compress.append(file_xhtml_rel_path)
-        manifest_items.append(MANIFEST_ITEM_TEMPLATE % (file_xhtml_rel_path_base, file_xhtml_rel_path_base))
-        spine_items.append(SPINE_ITEM_TEMPLATE % (file_xhtml_rel_path_base))
-        ncx_items.append(NCX_NAVPOINT_TEMPLATE % (i, i, key, file_xhtml_rel_path_base))
-        i += 1
-
-    manifest_items = "\n".join(manifest_items)
-    spine_items = "\n".join(spine_items)
-    ncx_items = "\n".join(ncx_items)
-
-    # create content.opf
-    file_opf_rel_path = u"OEBPS/content.opf"
-    file_opf_obj = open(file_opf_rel_path, "wb")
-    opf_content = OPF_TEMPLATE % (
-        args.identifier,
-        args.language_from,
-        args.title,
-        args.author,
-        args.copyright,
-        args.year,
-        manifest_items,
-        spine_items
-    )
-    file_opf_obj.write((opf_content).encode("utf-8"))
-    file_opf_obj.close()
-    files_to_compress.append(file_opf_rel_path)
-
-    # create toc.ncx
-    file_ncx_rel_path = u"OEBPS/toc.ncx"
-    file_ncx_obj = open(file_ncx_rel_path, "wb")
-    ncx_content = NCX_TEMPLATE % (
-        args.identifier,
-        args.title,
-        ncx_items
-    )
-    file_ncx_obj.write((ncx_content).encode("utf-8"))
-    file_ncx_obj.close()
-    files_to_compress.append(file_ncx_rel_path)
-
-    # create style.css
-    file_css_rel_path = u"OEBPS/style.css"
-    file_css_obj = open(file_css_rel_path, "wb")
-    file_css_obj.write((CSS_TEMPLATE).encode("utf-8"))
-    file_css_obj.close()
-    files_to_compress.append(file_css_rel_path)
-
-    # TODO copy cover
-    #file_cover_rel_path = u"cover"
-    #file_cover_path = os.path.join(tmp_path, file_cover_rel_path)
-    #if args.cover_path is not None:
-    #    if os.path.exists(args.cover_path):
-    #        file_cover_rel_path = os.path.basename(args.cover_path)
-    #        file_cover_path = os.path.join(tmp_path, file_cover_rel_path)
-    #        copy_file(args.cover_path, file_cover_path)
-    #    else:
-    #        print_error("Unable to read cover file '%s'" % (args.cover_path))
-    #else:
-    #    print_error("No cover image file specified: generating EPUB without cover")
-    #    print_error("Use --cover-path to specify a cover image file")
+            group_entries = group_dict[key]
+        epub.add_group(key, group_entries)
 
     # create output file
-    output_file_obj = zipfile.ZipFile(output_file_path, "w", compression=zipfile.ZIP_DEFLATED)
-    output_file_obj.write(file_mimetype_rel_path, compress_type=zipfile.ZIP_STORED)
-    for file_to_compress in files_to_compress:
-        output_file_obj.write(file_to_compress)
-    output_file_obj.close()
-    os.chdir(cwd)
-    result = [output_file_path]
+    if args.epub_no_compress:
+        print_debug("Not compressing the EPUB container")
+        epub.write(output_file_path_absolute, compress=False)
+    else:
+        print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug)
+        epub.write(output_file_path_absolute, compress=True)
+        result = [output_file_path]
+        print_debug("Writing to file '%s'... done" % (output_file_path_absolute), args.debug)
 
     # delete tmp directory
-    if args.keep:
+    tmp_path = epub.get_tmp_path()
+    if args.epub_no_compress:
+        print_info("The uncompressed EPUB is inside dir '%s'" % (tmp_path))
+        result = [tmp_path]
+    elif args.keep:
         print_info("Not deleting temp dir '%s'" % (tmp_path))
+        if result is None:
+            result = [tmp_path]
     else:
-        delete_directory(tmp_path)
+        epub.delete()
         print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)
 
     return result
diff --git a/penelope/format_kobo.py b/penelope/format_kobo.py
index 9022256..a15fea6 100644
--- a/penelope/format_kobo.py
+++ b/penelope/format_kobo.py
@@ -20,11 +20,13 @@
 
 from __future__ import absolute_import
 from io import open
+import imp
 import gzip
 import os
 import subprocess
 import zipfile
 
+from penelope.prefix_kobo import get_prefix as get_prefix_kobo
 from penelope.utilities import create_temp_directory
 from penelope.utilities import create_temp_file
 from penelope.utilities import delete_directory
@@ -37,7 +39,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
@@ -122,56 +124,45 @@ def read_single_file(dictionary, args, input_file_path):
     return dictionary
 
 def write(dictionary, args, output_file_path):
-    def is_allowed(ch):
-        # all non-ascii (x > 127) are ok
-        # all ASCII lowercase letters (97 <= x <= 122) are ok
-        # everything else is not ok
-        code = ord(ch)
-        return (code > 127) or ((code >= 97) and (code <= 122))
-
-    def compute_prefix(headword):
-        # defaults to u"11" if the first two letters of headword are not valid
-        prefix = u"11"
-        headword = headword.lower()
-        if len(headword) > 0:
-            if len(headword) == 1:
-                # for single-letter headwords, append an 'a' at the end
-                # e.g. "9" => "9a"
-                headword += u"a"
-            if is_allowed(headword[0]) and is_allowed(headword[1]):
-                prefix = headword[0:2]
-        return prefix
-
     # result to be returned
     result = None
 
+    # get absolute path
+    output_file_path_absolute = os.path.abspath(output_file_path)
+
+    # create tmp directory
+    cwd = os.getcwd()
+    tmp_path = create_temp_directory()
+    print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
+    os.chdir(tmp_path)
+
     # sort by headword
     dictionary.sort(by_headword=True)
 
     # group by prefix
     files_to_compress = []
-    prefix_to_file = {}
-    for headword in dictionary.entries_index:
-        prefix = compute_prefix(headword)
-        if not prefix in prefix_to_file:
-            prefix_to_file[prefix] = []
-        prefix_to_file[prefix] += [headword]
-
-    # create tmp directory
-    tmp_path = create_temp_directory()
-    print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
+    prefix_length = int(args.group_by_prefix_length)
+    special_group, group_keys, group_dict = dictionary.group(
+        prefix_function=get_prefix_kobo,
+        prefix_length=prefix_length,
+        merge_min_size=int(args.group_by_prefix_merge_min_size),
+        merge_across_first=args.group_by_prefix_merge_across_first
+    )
+    if special_group is not None:
+        special_group_key = u"1" * prefix_length
+        group_dict[special_group_key] = special_group
+        group_keys = [special_group_key] + group_keys
 
     # write files
-    for prefix in sorted(prefix_to_file):
+    for key in group_keys:
         # write html file
-        file_html_path = os.path.join(tmp_path, prefix + u".html")
+        file_html_path = key + u".html"
         file_html_obj = open(file_html_path, "wb")
         file_html_obj.write(u"<?xml version=\"1.0\" encoding=\"utf-8\"?><html>".encode("utf-8"))
-        for headword in prefix_to_file[prefix]:
-            entries = dictionary.entries_index[headword]
-            for entry_index in entries:
-                definition = dictionary.entries[entry_index].definition
-                file_html_obj.write((u"<w><a name=\"%s\"/><div><b>%s</b><br/>%s</div></w>" % (headword, headword, definition)).encode("utf-8"))
+        for entry in group_dict[key]:
+            headword = entry.headword
+            definition = entry.definition
+            file_html_obj.write((u"<w><a name=\"%s\"/><div><b>%s</b><br/>%s</div></w>" % (headword, headword, definition)).encode("utf-8"))
         file_html_obj.write((u"</html>").encode("utf-8"))
         file_html_obj.close()
 
@@ -189,8 +180,8 @@ def compute_prefix(headword):
         rename_file(file_gz_path, file_html_path)
         files_to_compress.append(file_html_path)
 
-    # TODO write words
-    file_words_path = os.path.join(tmp_path, WORDS_FILE_NAME)
+    # write words
+    file_words_path = WORDS_FILE_NAME
     keys = sorted(dictionary.entries_index.keys())
     try:
         import marisa_trie
@@ -231,22 +222,20 @@ def compute_prefix(headword):
         # add file_words_path to files to compress
         files_to_compress.append(file_words_path)
         # create output zip file
-        cwd = os.getcwd()
         try:
-            os.chdir(tmp_path)
-            print_debug("Writing to file '%s'..." % (output_file_path), args.debug)
-            file_zip_obj = zipfile.ZipFile(output_file_path, "w", zipfile.ZIP_DEFLATED)
+            print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug)
+            file_zip_obj = zipfile.ZipFile(output_file_path_absolute, "w", zipfile.ZIP_DEFLATED)
             for file_to_compress in files_to_compress:
                 file_to_compress = os.path.basename(file_to_compress)
                 file_zip_obj.write(file_to_compress)
             file_zip_obj.close()
             result = [output_file_path]
-            print_debug("Writing to file '%s'... success" % (output_file_path), args.debug)
+            print_debug("Writing to file '%s'... success" % (output_file_path_absolute), args.debug)
         except:
-            print_error("Writing to file '%s'... failure" % (output_file_path))
-        os.chdir(cwd)
+            print_error("Writing to file '%s'... failure" % (output_file_path_absolute))
 
     # delete tmp directory
+    os.chdir(cwd)
     if args.keep:
         print_info("Not deleting temp dir '%s'" % (tmp_path))
     else:
diff --git a/penelope/format_mobi.py b/penelope/format_mobi.py
index 76e8848..6e012e3 100644
--- a/penelope/format_mobi.py
+++ b/penelope/format_mobi.py
@@ -12,76 +12,23 @@
 import os
 import subprocess
 
+from penelope.dictionary_ebook import DictionaryEbook
 from penelope.utilities import print_debug
 from penelope.utilities import print_error
 from penelope.utilities import print_info
 from penelope.utilities import create_temp_directory
 from penelope.utilities import copy_file
 from penelope.utilities import delete_directory
-from penelope.utilities import rename_file
 
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
 KINDLEGEN = u"kindlegen"
 
-HTML_HEADER = u"""<html>
- <head>
-  <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-  <title>%s</title>
- </head>
- <body topmargin="0" bottommargin="0" leftmargin="5" rightmargin="5">
-  <center>
-   <hr />
-   <font size="+4">%s</font>
-   <hr />
-  </center>
-  <mbp:pagebreak />
-"""
-
-HTML_FOOTER = u""" </body>
-</html>"""
-
-HTML_WORD = u"""
-  <idx:entry>
-   <h1><idx:orth>%s</idx:orth></h1>
-   <p>%s</p>
-  </idx:entry>
-  <mbp:pagebreak />
-"""
-
-OPF_TEMPLATE = u"""<?xml version="1.0" encoding="utf-8"?>
-<package unique-identifier="uid">
-    <metadata>
-        <dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
-            <dc:Title>%s</dc:Title>
-            <dc:Language>%s</dc:Language>
-            <dc:Identifier id="uid">%s</dc:Identifier>
-            <dc:Creator>%s</dc:Creator>
-            <dc:Rights>%s</dc:Rights>
-            <dc:Subject BASICCode="REF008000">Dictionaries</dc:Subject>
-        </dc-metadata>
-        <x-metadata>
-            <output encoding="utf-8"></output>
-            <DictionaryInLanguage>%s</DictionaryInLanguage>
-            <DictionaryOutLanguage>%s</DictionaryOutLanguage>
-            <EmbeddedCover>%s</EmbeddedCover>
-        </x-metadata>
-    </metadata>
-    <manifest>
-        <item id="item1" media-type="text/x-oeb1-document" href="words.html"></item>
-    </manifest>
-    <spine>
-        <itemref idref="item1"/>
-    </spine>
-    <tours></tours>
-    <guide></guide>
-</package>"""
-
 def read(dictionary, args, input_file_paths):
     print_error("Read function not implemented for MOBI dictionaries")
     return None
@@ -90,67 +37,42 @@ def write(dictionary, args, output_file_path):
     # result to be returned
     result = None
 
+    # get absolute path
+    output_file_path_absolute = os.path.abspath(output_file_path)
+
     # sort by headword, optionally ignoring case
     dictionary.sort(by_headword=True, ignore_case=args.sort_ignore_case)
 
-    # create tmp directory
-    tmp_path = create_temp_directory()
-
-    # get the basename
-    base = os.path.basename(output_file_path)
-    if base.endswith(".mobi"):
-        base = base[:-5]
-    file_mobi_rel_path = base + u".mobi"
-    file_html_path = os.path.join(tmp_path, file_mobi_rel_path)
-
-    # copy cover
-    file_cover_rel_path = u"cover"
-    file_cover_path = os.path.join(tmp_path, file_cover_rel_path)
-    if args.cover_path is not None:
-        if os.path.exists(args.cover_path):
-            file_cover_rel_path = os.path.basename(args.cover_path)
-            file_cover_path = os.path.join(tmp_path, file_cover_rel_path)
-            copy_file(args.cover_path, file_cover_path)
-        else:
-            print_error("Unable to read cover file '%s'" % (args.cover_path))
-    else:
-        print_error("No cover image file specified: generating MOBI without cover")
-        print_error("Use --cover-path to specify a cover image file")
-
-    # TODO split over multiple files?
-    # write .html file
-    print_debug("Writing .html file...", args.debug)
-    file_html_rel_path = u"words.html"
-    file_html_path = os.path.join(tmp_path, file_html_rel_path)
-    file_html_obj = open(file_html_path, "wb")
-    file_html_obj.write((HTML_HEADER % (args.title, args.title)).encode("utf-8"))
-    for index in dictionary.entries_index_sorted:
-        entry = dictionary.entries[index]
-        file_html_obj.write((HTML_WORD % (entry.headword, entry.definition)).encode("utf-8"))
-    file_html_obj.write((HTML_FOOTER).encode("utf-8"))
-    file_html_obj.close()
-    print_debug("Writing .html file... done", args.debug)
-
-    # write .opf file
-    print_debug("Writing .opf file...", args.debug)
-    file_opf_rel_path = base + u".opf"
-    file_opf_path = os.path.join(tmp_path, file_opf_rel_path)
-    file_opf_obj = open(file_opf_path, "wb")
-    opf_content = OPF_TEMPLATE % (
-        args.title,
-        args.language_from,
-        args.identifier,
-        args.author,
-        args.copyright,
-        args.language_from,
-        args.language_to,
-        file_cover_rel_path
+    # create groups
+    special_group, group_keys, group_dict = dictionary.group(
+        prefix_function_path=args.group_by_prefix_function,
+        prefix_length=int(args.group_by_prefix_length),
+        merge_min_size=int(args.group_by_prefix_merge_min_size),
+        merge_across_first=args.group_by_prefix_merge_across_first
     )
-    file_opf_obj.write((opf_content).encode("utf-8"))
-    file_opf_obj.close()
-    print_debug("Writing .opf file... done", args.debug)
+    all_group_keys = group_keys
+    if special_group is not None:
+        all_group_keys += [u"SPECIAL"]
+
+    # create mobi object
+    mobi = DictionaryEbook(ebook_format=DictionaryEbook.MOBI, args=args)
+
+    # add groups
+    for key in all_group_keys:
+        if key == u"SPECIAL":
+            group_entries = special_group
+        else:
+            group_entries = group_dict[key]
+        mobi.add_group(key, group_entries)
+
+    # create output file
+    print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug)
+    mobi.write(output_file_path_absolute, compress=False)
+    result = [output_file_path]
+    print_debug("Writing to file '%s'... done" % (output_file_path_absolute), args.debug)
 
     # run kindlegen
+    tmp_path = mobi.get_tmp_path()
     if args.mobi_no_kindlegen:
         print_info("Not running kindlegen, the raw files are located in '%s'" % tmp_path)
         result = [tmp_path]
@@ -158,13 +80,16 @@ def write(dictionary, args, output_file_path):
         try:
             print_debug("Creating .mobi file with kindlegen...", args.debug)
             kindlegen_path = KINDLEGEN
+            opf_file_path_absolute = os.path.join(tmp_path, "OEBPS", "content.opf")
+            mobi_file_path_relative = u"content.mobi"
+            mobi_file_path_absolute = os.path.join(tmp_path, "OEBPS", mobi_file_path_relative)
             if args.kindlegen_path is None:
                 print_info("  Running '%s' from $PATH" % KINDLEGEN)
             else:
                 kindlegen_path = args.kindlegen_path
                 print_info("  Running '%s' from '%s'" % (KINDLEGEN, kindlegen_path))
             proc = subprocess.Popen(
-                [kindlegen_path, file_opf_path, "-o", file_mobi_rel_path],
+                [kindlegen_path, opf_file_path_absolute, "-o", mobi_file_path_relative],
                 stdout=subprocess.PIPE,
                 stdin=subprocess.PIPE,
                 stderr=subprocess.PIPE
@@ -173,7 +98,7 @@ def write(dictionary, args, output_file_path):
             if args.debug:
                 output_unicode = (output[0]).decode("utf-8")
                 print_debug(output_unicode, args.debug)
-            rename_file(file_html_path, output_file_path)
+            copy_file(mobi_file_path_absolute, output_file_path_absolute)
             result = [output_file_path]
             print_debug("Creating .mobi file with kindlegen... done", args.debug)
         except OSError as exc:
@@ -181,14 +106,14 @@ def write(dictionary, args, output_file_path):
             print_error("  Please make sure '%s':" % KINDLEGEN)
             print_error("    1. is available on your $PATH or")
             print_error("    2. specify its path with --kindlegen-path")
-            result = None
 
-        # delete tmp directory
-        if args.keep:
-            print_info("Not deleting temp dir '%s'" % (tmp_path))
-        else:
-            delete_directory(tmp_path)
-            print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)
+    # delete tmp directory
+    tmp_path = mobi.get_tmp_path()
+    if args.keep:
+        print_info("Not deleting temp dir '%s'" % (tmp_path))
+    else:
+        mobi.delete()
+        print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)
 
     return result
 
diff --git a/penelope/format_stardict.py b/penelope/format_stardict.py
index 4d38adc..1f14ad0 100644
--- a/penelope/format_stardict.py
+++ b/penelope/format_stardict.py
@@ -24,7 +24,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
@@ -267,6 +267,25 @@ def write(dictionary, args, output_file_path):
     # result to be returned
     result = None
 
+    # get absolute path
+    output_file_path_absolute = os.path.abspath(output_file_path)
+
+    # create tmp directory
+    cwd = os.getcwd()
+    tmp_path = create_temp_directory()
+    print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
+    os.chdir(tmp_path)
+
+    # get the basename and compute output file paths
+    base = os.path.basename(output_file_path)
+    if base.endswith(".zip"):
+        base = base[:-4]
+    ifo_file_path = base + ".ifo"
+    idx_file_path = base + ".idx"
+    dict_file_path = base + ".dict"
+    dict_dz_file_path = base + ".dict.dz"
+    syn_file_path = base + ".syn"
+
     # TODO by spec, the index should be sorted
     # TODO using the comparator stardict_strcmp() defined in the spec
     # TODO (it calls g_ascii_strcasecmp() and/or strcmp() ),
@@ -283,20 +302,6 @@ def write(dictionary, args, output_file_path):
     #
     dictionary.sort(by_headword=True, ignore_case=True)
 
-    # create tmp directory
-    tmp_path = create_temp_directory()
-    print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
-
-    # get the basename and compute output file paths
-    base = os.path.basename(output_file_path)
-    if base.endswith(".zip"):
-        base = base[:-4]
-    ifo_file_path = os.path.join(tmp_path, base + ".ifo")
-    idx_file_path = os.path.join(tmp_path, base + ".idx")
-    dict_file_path = os.path.join(tmp_path, base + ".dict")
-    dict_dz_file_path = os.path.join(tmp_path, base + ".dict.dz")
-    syn_file_path = os.path.join(tmp_path, base + ".syn")
-
     # write .idx and .dict files
     print_debug("Writing .idx and .dict files...", args.debug)
     idx_file_obj = open(idx_file_path, "wb")
@@ -397,23 +402,21 @@ def write(dictionary, args, output_file_path):
         ifo_file_obj.close()
 
         # create output zip file
-        cwd = os.getcwd()
         try:
-            os.chdir(tmp_path)
-            print_debug("Writing to file '%s'..." % (output_file_path), args.debug)
-            file_zip_obj = zipfile.ZipFile(output_file_path, "w", zipfile.ZIP_DEFLATED)
+            print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug)
+            file_zip_obj = zipfile.ZipFile(output_file_path_absolute, "w", zipfile.ZIP_DEFLATED)
             for file_to_compress in files_to_compress:
                 file_to_compress = os.path.basename(file_to_compress)
                 file_zip_obj.write(file_to_compress)
                 print_debug("Written %s" % (file_to_compress), args.debug)
             file_zip_obj.close()
             result = [output_file_path]
-            print_debug("Writing to file '%s'... success" % (output_file_path), args.debug)
+            print_debug("Writing to file '%s'... success" % (output_file_path_absolute), args.debug)
         except:
-            print_error("Writing to file '%s'... failure" % (output_file_path))
-        os.chdir(cwd)
+            print_error("Writing to file '%s'... failure" % (output_file_path_absolute))
 
     # delete tmp directory
+    os.chdir(cwd)
     if args.keep:
         print_info("Not deleting temp dir '%s'" % (tmp_path))
     else:
diff --git a/penelope/format_xml.py b/penelope/format_xml.py
index 4c06d43..df5fa78 100644
--- a/penelope/format_xml.py
+++ b/penelope/format_xml.py
@@ -15,7 +15,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/input_parser_identity.py b/penelope/input_parser_identity.py
index 1850eb8..e9aed40 100644
--- a/penelope/input_parser_identity.py
+++ b/penelope/input_parser_identity.py
@@ -11,7 +11,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/input_parser_webster.py b/penelope/input_parser_webster.py
index 0efc527..9e52b45 100644
--- a/penelope/input_parser_webster.py
+++ b/penelope/input_parser_webster.py
@@ -9,7 +9,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
diff --git a/penelope/prefix_default.py b/penelope/prefix_default.py
new file mode 100644
index 0000000..5c60f8f
--- /dev/null
+++ b/penelope/prefix_default.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+This is the default prefix function for grouping headwords.
+"""
+
+__author__ = "Alberto Pettarin"
+__copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
+__license__ = "MIT"
+__version__ = "3.1.0"
+__email__ = "alberto@albertopettarin.it"
+__status__ = "Production"
+
+def get_prefix(headword, length):
+    """
+    Return the prefix for the given headword,
+    of length length.
+
+    :param headword: the headword string
+    :type  headword: unicode
+    :param length: prefix length
+    :type  length: int
+    :rtype: unicode
+    """
+    if headword is None:
+        return None
+    lowercased = headword.lower()
+    if ord(lowercased[0]) < 97:
+        return u"SPECIAL"
+    if len(lowercased) < length:
+        return lowercased
+    return lowercased[0:length]
+
+
diff --git a/penelope/prefix_kobo.py b/penelope/prefix_kobo.py
new file mode 100644
index 0000000..44930ba
--- /dev/null
+++ b/penelope/prefix_kobo.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+This is the prefix function for grouping headwords for Kobo format.
+"""
+
+__author__ = "Alberto Pettarin"
+__copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
+__license__ = "MIT"
+__version__ = "3.1.0"
+__email__ = "alberto@albertopettarin.it"
+__status__ = "Production"
+
+def get_prefix(headword, length):
+    """
+    Return the prefix for the given headword,
+    of length length.
+
+    Note that the procedure implemented here is the result
+    of reverse engineering, since no official specification
+    has been published by Kobo so far. YMMV.
+
+    :param headword: the headword string
+    :type  headword: unicode
+    :param length: prefix length
+    :type  length: int
+    :rtype: unicode
+    """
+    def is_allowed(character):
+        # all non-ascii (x > 127) are ok
+        # all ASCII lowercase letters (97 <= x <= 122) are ok
+        # everything else is not ok
+        try:
+            code = ord(character)
+            return (code > 127) or ((code >= 97) and (code <= 122))
+        except:
+            pass
+        return True
+
+    # defaults to u"SPECIAL", it will be mapped to u"11...1" later
+    prefix = u"SPECIAL"
+    headword = headword.lower()
+    if len(headword) > 0:
+        while len(headword) < length:
+            # for headwords shorter than length, append an 'a' at the end
+            # e.g. length=3, "xy" => "xya"
+            headword += u"a"
+        # TODO maybe the check should be done only for the first character
+        is_ok = True
+        for character in headword:
+            if not is_allowed(character):
+                is_ok = False
+                break
+        if is_ok:
+            prefix = headword[0:length]
+    return prefix
+
+
+
diff --git a/penelope/utilities.py b/penelope/utilities.py
index fcee638..e5d0f57 100644
--- a/penelope/utilities.py
+++ b/penelope/utilities.py
@@ -7,16 +7,18 @@
 
 from __future__ import absolute_import
 from __future__ import print_function
+from io import open
 import imp
 import os
 import shutil
 import tempfile
 import uuid
+import zipfile
 
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
@@ -59,7 +61,7 @@ def create_temp_directory():
 
 def copy_file(origin, destination):
     try:
-        shutil.copyfile(origin, destination)
+        shutil.copy(origin, destination)
     except:
         pass
 
diff --git a/setup.py b/setup.py
index 59f0bee..ff49dca 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
 __author__ = "Alberto Pettarin"
 __copyright__ = "Copyright 2012-2015, Alberto Pettarin (www.albertopettarin.it)"
 __license__ = "MIT"
-__version__ = "3.0.1"
+__version__ = "3.1.0"
 __email__ = "alberto@albertopettarin.it"
 __status__ = "Production"
 
@@ -18,7 +18,7 @@
     name="penelope",
     packages=["penelope"],
     package_data={"penelope": ["res/*"]},
-    version="3.0.1.11",
+    version="3.1.0.1",
     description="Penelope is a multi-tool for creating, editing and converting dictionaries, especially for eReader devices",
     author="Alberto Pettarin",
     author_email="alberto@albertopettarin.it",