Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes up formatting around anchor tags #21

Merged
merged 3 commits into from
Jul 3, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum
dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor
invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos
et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea
takimata sanctus est Lorem ipsum dolor sit amet.www.github.com
takimata sanctus est Lorem ipsum dolor sit amet. Github [www.github.com]

At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd
gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum
Expand Down Expand Up @@ -256,7 +256,7 @@ MAILTO FORMATING
Some Company
Some Street 42
Somewhere
E-Mail:[email protected]
E-Mail: Click here [[email protected]]
```

## License
Expand Down
22 changes: 16 additions & 6 deletions lib/formatter.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ var helper = require('./helper');
function formatText(elem, options) {
var text = _s.strip(elem.raw);
text = helper.decodeHTMLEntities(text);
return helper.wordwrap(text, options.wordwrap);
return helper.wordwrap(elem.needsSpace ? ' ' + text : text, options.wordwrap);
};

function formatLineBreak(elem, fn, options) {
Expand All @@ -21,13 +21,23 @@ function formatHeading(elem, fn, options) {
return fn(elem.children, options).toUpperCase() + '\n';
}

// If we have both href and anchor text, format it in a useful manner:
// - "anchor text [href]"
// Otherwise if we have only anchor text or an href, we return the part we have:
// - "anchor text" or
// - "href"
function formatAnchor(elem, fn, options) {
var href = '';
// Always get the anchor text
var result = _s.strip(fn(elem.children || [], options));
// Get the href, if present
if (elem.attribs && elem.attribs.href) {
return elem.attribs.href.replace(/^mailto\:/, '');
}
else {
return helper.wordwrap(helper.decodeHTMLEntities(_s.strip(elem.raw)), options.wordwrap);
}
href = elem.attribs.href.replace(/^mailto\:/, '');
}
if (result && href) {
result += ' [' + href + ']';
}
return formatText({ raw: result || href, needsSpace: elem.needsSpace }, options);
};

function formatHorizontalLine(elem, fn, options) {
Expand Down
5 changes: 3 additions & 2 deletions lib/helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ exports.decodeHTMLEntities = function decodeHTMLEntities(text) {
};

exports.wordwrap = function wordwrap(text, max) {
var result = '';
// Preserve leading space
var result = _s.startsWith(text, ' ') ? ' ' : '';
var words = _s.words(text);
var length = 0;
var length = result.length;
var buffer = [];
_.each(words, function(word) {
if (length + word.length > max) {
Expand Down
11 changes: 10 additions & 1 deletion lib/html-to-text.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,15 @@ function containsTable(attr, tables) {

function walk(dom, options) {
var result = '';
var whiteSpaceRegex = /\S$/;
_.each(dom, function(elem) {
switch(elem.type) {
case 'tag':
switch(elem.name.toLowerCase()) {
case 'a':
// Inline element needs a leading space if `result` currently
// doesn't end with whitespace
elem.needsSpace = whiteSpaceRegex.test(result);
result += format.anchor(elem, walk, options);
break;
case 'p':
Expand Down Expand Up @@ -111,7 +115,12 @@ function walk(dom, options) {
}
break;
case 'text':
if (elem.raw !== '\r\n') result += format.text(elem, options);
if (elem.raw !== '\r\n') {
// Text needs a leading space if `result` currently
// doesn't end with whitespace
elem.needsSpace = whiteSpaceRegex.test(result);
result += format.text(elem, options);
}
break;
default:
if (!_.include(SKIP_TYPES, elem.type)) {
Expand Down