Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for base directions #484

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/IRIs.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ export default {
string: `${XSD}string`,
},
rdf: {
type: `${RDF}type`,
nil: `${RDF}nil`,
first: `${RDF}first`,
rest: `${RDF}rest`,
langString: `${RDF}langString`,
type: `${RDF}type`,
nil: `${RDF}nil`,
first: `${RDF}first`,
rest: `${RDF}rest`,
langString: `${RDF}langString`,
dirLangString: `${RDF}dirLangString`,
},
owl: {
sameAs: 'http://www.w3.org/2002/07/owl#sameAs',
Expand Down
47 changes: 38 additions & 9 deletions src/N3DataFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,17 @@ export class Literal extends Term {
// Find the last quotation mark (e.g., '"abc"@en-us')
const id = this.id;
let atPos = id.lastIndexOf('"') + 1;
const dirPos = id.lastIndexOf('--');
// If "@" it follows, return the remaining substring; empty otherwise
return atPos < id.length && id[atPos++] === '@' ? id.substr(atPos).toLowerCase() : '';
return atPos < id.length && id[atPos++] === '@' ? (dirPos > atPos ? id.substr(0, dirPos) : id).substr(atPos).toLowerCase() : '';
}

// ### The direction of this literal
get direction() {
// Find the last double dash (e.g., '"abc"@en-us--ltr')
const id = this.id;
const atPos = id.lastIndexOf('--') + 2;
return atPos > 1 && atPos < id.length ? id.substr(atPos).toLowerCase() : '';
}

// ### The datatype IRI of this literal
Expand All @@ -104,8 +113,8 @@ export class Literal extends Term {
const char = dtPos < id.length ? id[dtPos] : '';
// If "^" it follows, return the remaining substring
return char === '^' ? id.substr(dtPos + 2) :
// If "@" follows, return rdf:langString; xsd:string otherwise
(char !== '@' ? xsd.string : rdf.langString);
// If "@" follows, return rdf:langString or rdf:dirLangString; xsd:string otherwise
(char !== '@' ? xsd.string : (id.indexOf('--', dtPos) > 0 ? rdf.dirLangString : rdf.langString));
}

// ### Returns whether this object represents the same term as the other
Expand All @@ -119,14 +128,16 @@ export class Literal extends Term {
this.termType === other.termType &&
this.value === other.value &&
this.language === other.language &&
((this.direction === other.direction) || (this.direction === '' && !other.direction)) &&
this.datatype.value === other.datatype.value;
}

toJSON() {
return {
termType: this.termType,
value: this.value,
language: this.language,
termType: this.termType,
value: this.value,
language: this.language,
direction: this.direction,
datatype: { termType: 'NamedNode', value: this.datatypeString },
};
}
Expand Down Expand Up @@ -216,9 +227,22 @@ export function termFromId(id, factory, nested) {
return factory.literal(id.substr(1, id.length - 2));
// Literal with datatype or language
const endPos = id.lastIndexOf('"', id.length - 1);
let languageOrDatatype;
if (id[endPos + 1] === '@') {
languageOrDatatype = id.substr(endPos + 2);
const dashDashIndex = languageOrDatatype.lastIndexOf('--');
if (dashDashIndex > 0 && dashDashIndex < languageOrDatatype.length) {
languageOrDatatype = {
language: languageOrDatatype.substr(0, dashDashIndex),
direction: languageOrDatatype.substr(dashDashIndex + 2),
};
}
}
else {
languageOrDatatype = factory.namedNode(id.substr(endPos + 3));
}
return factory.literal(id.substr(1, endPos - 1),
id[endPos + 1] === '@' ? id.substr(endPos + 2)
: factory.namedNode(id.substr(endPos + 3)));
languageOrDatatype);
Comment on lines +230 to +245
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Fix variable declaration in switch case.

The variable declaration in the switch case could lead to scoping issues. Wrap the declaration in a block to restrict its access.

Apply this diff to fix the scoping issue:

  case '"':
+   {
    // Shortcut for internal literals
    if (factory === DataFactory)
      return new Literal(id);
    // Literal without datatype or language
    if (id[id.length - 1] === '"')
      return factory.literal(id.substr(1, id.length - 2));
    // Literal with datatype or language
    const endPos = id.lastIndexOf('"', id.length - 1);
    let languageOrDatatype;
    if (id[endPos + 1] === '@') {
      languageOrDatatype = id.substr(endPos + 2);
      const dashDashIndex = languageOrDatatype.lastIndexOf('--');
      if (dashDashIndex > 0 && dashDashIndex < languageOrDatatype.length) {
        languageOrDatatype = {
          language: languageOrDatatype.substr(0, dashDashIndex),
          direction: languageOrDatatype.substr(dashDashIndex + 2),
        };
      }
    }
    else {
      languageOrDatatype = factory.namedNode(id.substr(endPos + 3));
    }
    return factory.literal(id.substr(1, endPos - 1),
            languageOrDatatype);
+   }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
let languageOrDatatype;
if (id[endPos + 1] === '@') {
languageOrDatatype = id.substr(endPos + 2);
const dashDashIndex = languageOrDatatype.lastIndexOf('--');
if (dashDashIndex > 0 && dashDashIndex < languageOrDatatype.length) {
languageOrDatatype = {
language: languageOrDatatype.substr(0, dashDashIndex),
direction: languageOrDatatype.substr(dashDashIndex + 2),
};
}
}
else {
languageOrDatatype = factory.namedNode(id.substr(endPos + 3));
}
return factory.literal(id.substr(1, endPos - 1),
id[endPos + 1] === '@' ? id.substr(endPos + 2)
: factory.namedNode(id.substr(endPos + 3)));
languageOrDatatype);
{
// Shortcut for internal literals
if (factory === DataFactory)
return new Literal(id);
// Literal without datatype or language
if (id[id.length - 1] === '"')
return factory.literal(id.substr(1, id.length - 2));
// Literal with datatype or language
const endPos = id.lastIndexOf('"', id.length - 1);
let languageOrDatatype;
if (id[endPos + 1] === '@') {
languageOrDatatype = id.substr(endPos + 2);
const dashDashIndex = languageOrDatatype.lastIndexOf('--');
if (dashDashIndex > 0 && dashDashIndex < languageOrDatatype.length) {
languageOrDatatype = {
language: languageOrDatatype.substr(0, dashDashIndex),
direction: languageOrDatatype.substr(dashDashIndex + 2),
};
}
}
else {
languageOrDatatype = factory.namedNode(id.substr(endPos + 3));
}
return factory.literal(id.substr(1, endPos - 1),
languageOrDatatype);
}
🧰 Tools
🪛 Biome (1.9.4)

[error] 230-230: Other switch clauses can erroneously access this declaration.
Wrap the declaration in a block to restrict its access to the switch clause.

The declaration is defined in this switch clause:

Unsafe fix: Wrap the declaration in a block.

(lint/correctness/noSwitchDeclarations)

case '[':
id = JSON.parse(id);
break;
Expand Down Expand Up @@ -255,7 +279,7 @@ export function termToId(term, nested) {
case 'Variable': return `?${term.value}`;
case 'DefaultGraph': return '';
case 'Literal': return `"${term.value}"${
term.language ? `@${term.language}` :
term.language ? `@${term.language}${term.direction ? `--${term.direction}` : ''}` :
(term.datatype && term.datatype.value !== xsd.string ? `^^${term.datatype.value}` : '')}`;
case 'Quad':
const res = [
Expand Down Expand Up @@ -350,6 +374,11 @@ function literal(value, languageOrDataType) {
if (typeof languageOrDataType === 'string')
return new Literal(`"${value}"@${languageOrDataType.toLowerCase()}`);

// Create a language-tagged string with base direction
if (languageOrDataType !== undefined && !('termType' in languageOrDataType)) {
return new Literal(`"${value}"@${languageOrDataType.language.toLowerCase()}--${languageOrDataType.direction.toLowerCase()}`);
}

// Automatically determine datatype for booleans and numbers
let datatype = languageOrDataType ? languageOrDataType.value : '';
if (datatype === '') {
Expand Down
11 changes: 10 additions & 1 deletion src/N3Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const lineModeRegExps = {
_unescapedIri: true,
_simpleQuotedString: true,
_langcode: true,
_dircode: true,
_blank: true,
_newline: true,
_comment: true,
Expand All @@ -38,7 +39,8 @@ export default class N3Lexer {
this._unescapedIri = /^<([^\x00-\x20<>\\"\{\}\|\^\`]*)>[ \t]*/; // IRI without escape sequences; no unescaping
this._simpleQuotedString = /^"([^"\\\r\n]*)"(?=[^"])/; // string without escape sequences
this._simpleApostropheString = /^'([^'\\\r\n]*)'(?=[^'])/;
this._langcode = /^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\-])/i;
this._langcode = /^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9])/i;
this._dircode = /^--(ltr)|(rtl)/;
this._prefix = /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:(?=[#\s<])/;
this._prefixed = /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?:[ \t]+|(?=\.?[,;!\^\s#()\[\]\{\}"'<>]))/;
this._variable = /^\?(?:(?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?=[.,;!\^\s#()\[\]\{\}"'<>])/;
Expand Down Expand Up @@ -240,6 +242,13 @@ export default class N3Lexer {
case '9':
case '+':
case '-':
if (input[1] === '-') {
// Try to find a direction code
if (this._previousMarker === 'langcode' && (match = this._dircode.exec(input)))
type = 'dircode', matchLength = 2, value = (match[1] || match[2]), matchLength = value.length + 2;
break;
}

// Try to find a number. Since it can contain (but not end with) a dot,
// we always need a non-dot character before deciding it is a number.
// Therefore, try inserting a space if we're at the end of the input.
Expand Down
47 changes: 41 additions & 6 deletions src/N3Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -522,9 +522,10 @@ export default class N3Parser {
}

// ### `_completeLiteral` completes a literal with an optional datatype or language
_completeLiteral(token) {
_completeLiteral(token, component) {
// Create a simple string literal by default
let literal = this._factory.literal(this._literalValue);
let readCb;

switch (token.type) {
// Create a datatyped literal
Expand All @@ -538,37 +539,71 @@ export default class N3Parser {
// Create a language-tagged string
case 'langcode':
literal = this._factory.literal(this._literalValue, token.value);
this._literalLanguage = token.value;
token = null;
readCb = this._readDirCode.bind(this, component);
break;
}

return { token, literal };
return { token, literal, readCb };
}

_readDirCode(component, listItem, token) {
// Attempt to read a dircode
if (token.type === 'dircode') {
const term = this._factory.literal(this._literalValue, { language: this._literalLanguage, direction: token.value });
if (component === 'subject')
this._subject = term;
else
this._object = term;
this._literalLanguage = undefined;
token = null;
}

if (component === 'subject')
return token === null ? this._readPredicateOrNamedGraph : this._readPredicateOrNamedGraph(token);
return this._completeObjectLiteralPost(token, listItem);
Comment on lines +551 to +565
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add validation for direction codes.

The _readDirCode method should validate the direction code values.

 _readDirCode(component, listItem, token) {
   if (token.type === 'dircode') {
+    const validDirections = ['ltr', 'rtl'];
+    if (!validDirections.includes(token.value)) {
+      return this._error(`Invalid direction code "${token.value}"`, token);
+    }
     const term = this._factory.literal(this._literalValue, { language: this._literalLanguage, direction: token.value });
     if (component === 'subject')
       this._subject = term;
     else
       this._object = term;
     this._literalLanguage = undefined;
     token = null;
   }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
_readDirCode(component, listItem, token) {
// Attempt to read a dircode
if (token.type === 'dircode') {
const term = this._factory.literal(this._literalValue, { language: this._literalLanguage, direction: token.value });
if (component === 'subject')
this._subject = term;
else
this._object = term;
this._literalLanguage = undefined;
token = null;
}
if (component === 'subject')
return token === null ? this._readPredicateOrNamedGraph : this._readPredicateOrNamedGraph(token);
return this._completeObjectLiteralPost(token, listItem);
_readDirCode(component, listItem, token) {
// Attempt to read a dircode
if (token.type === 'dircode') {
const validDirections = ['ltr', 'rtl'];
if (!validDirections.includes(token.value)) {
return this._error(`Invalid direction code "${token.value}"`, token);
}
const term = this._factory.literal(this._literalValue, { language: this._literalLanguage, direction: token.value });
if (component === 'subject')
this._subject = term;
else
this._object = term;
this._literalLanguage = undefined;
token = null;
}
if (component === 'subject')
return token === null ? this._readPredicateOrNamedGraph : this._readPredicateOrNamedGraph(token);
return this._completeObjectLiteralPost(token, listItem);

}

// Completes a literal in subject position
_completeSubjectLiteral(token) {
this._subject = this._completeLiteral(token).literal;
const completed = this._completeLiteral(token, 'subject');
this._subject = completed.literal;

// Postpone completion if the literal is only partially completed (such as lang+dir).
if (completed.readCb)
return completed.readCb.bind(this, false);

return this._readPredicateOrNamedGraph;
}

// Completes a literal in object position
_completeObjectLiteral(token, listItem) {
const completed = this._completeLiteral(token);
const completed = this._completeLiteral(token, 'object');
if (!completed)
return;

this._object = completed.literal;

// Postpone completion if the literal is only partially completed (such as lang+dir).
if (completed.readCb)
return completed.readCb.bind(this, listItem);

return this._completeObjectLiteralPost(completed.token, listItem);
}

_completeObjectLiteralPost(token, listItem) {
// If this literal was part of a list, write the item
// (we could also check the context stack, but passing in a flag is faster)
if (listItem)
this._emit(this._subject, this.RDF_FIRST, this._object, this._graph);
// If the token was consumed, continue with the rest of the input
if (completed.token === null)
if (token === null)
return this._getContextEndReader();
// Otherwise, consume the token now
else {
this._readCallback = this._getContextEndReader();
return this._readCallback(completed.token);
return this._readCallback(token);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/N3Writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,9 @@ export default class N3Writer {
value = value.replace(escapeAll, characterReplacer);

// Write a language-tagged literal
const direction = literal.direction ? `--${literal.direction}` : '';
if (literal.language)
return `"${value}"@${literal.language}`;
return `"${value}"@${literal.language}${direction}`;

// Write dedicated literals per data type
if (this._lineMode) {
Expand Down
Loading