Merge pull request #14 from vict0rsch/biorxiv

vict0rsch · Nov 15, 2021 · 044ebb9 · 044ebb9
2 parents 82749a9 + a4ad465
commit 044ebb9
Show file tree

Hide file tree

Showing 11 changed files with 111 additions and 24 deletions.
diff --git a/Readme.md b/Readme.md
@@ -1,16 +1,20 @@
 # My Paper Memory ⚡
 
-<br/><br/>
+<br/>
 
 <p align="center">
-    <a href="https://chrome.google.com/webstore/detail/arxivtools/hmebhknlgddhfbbdhgplnillngljgmdi?authuser=1&hl=fr">
-        <strong>
-            🏪 Install from the Chrome (& Brave) web-store
-        </strong>
+    🏪 Official stores
+    <br/>
+    <a href="https://chrome.google.com/webstore/detail/arxivtools/hmebhknlgddhfbbdhgplnillngljgmdi">
+        <strong>Chrome & Brave</strong>
+    </a>
+    &nbsp;•&nbsp;
+    <a href="https://addons.mozilla.org/en-US/firefox/addon/paper-memory/">
+        <strong>Firefox</strong>
     </a>
 </p>
 
-<br/><br/>
+<br/>
 
 An **automated**, web-based and minimalist reference manager.
 
@@ -28,6 +32,7 @@ This browser extension allows you to do automatically store research papers you
 Supported venues:
 
 * arxiv.org
+* biorxiv.org
 * neurips.cc (NeurIPS)
 * openreview.net (ICLR etc.)
 * openaccess.cvf.com (I/ECCV, CVPR etc.)

diff --git a/contributing.md b/contributing.md
@@ -66,7 +66,7 @@ TODO
 * update `content_script.js:makePaper()` to create a new entry
   * Typically, add a parser function in `parsers.js`  
 * `memory.js:focusExistingOrCreateNewPaperTab()` -> update the `match` creation process to define the piece of a pdf's URL which should be matched to existing tabs in order to focus it.
-* Update `functions.js:paperToAbs()` to enable to pdf->webpage button
+* Update `functions.js:paperToAbs()` and `functions.js:paperToPDF()` to enable to pdf<->webpage button
 * Update `manifest.json` to
   * trigger `content_script.js` in the correct domains
   * enable your parsing function to fetch/query the data you need

diff --git a/manifest.json b/manifest.json
@@ -1,6 +1,6 @@
 {
   "name": "Paper Memory",
-  "version": "0.3.0",
+  "version": "0.3.1",
   "manifest_version": 2,
   "description": "Automatically record papers you read from Arxiv, OpenReview & more! Organize your library with tags, links to code and quick notes.",
   "homepage_url": "https://github.com/vict0rsch/PaperMemory",
@@ -29,6 +29,7 @@
   },
   "permissions": [
     "https://arxiv.org/*",
+    "*://api.biorxiv.org/*",
     "https://export.arxiv.org/*",
     "*://*.neurips.cc/*",
     "*://*.nips.cc/*",
@@ -45,6 +46,7 @@
     {
       "matches": [
         "*://arxiv.org/*",
+        "*://*.biorxiv.org/*",
         "*://*.arxiv-vanity.com/*",
         "*://*.neurips.cc/*",
         "*://*.nips.cc/*",

diff --git a/src/content_scripts/content_script.js b/src/content_scripts/content_script.js
@@ -239,6 +239,9 @@ const makePaper = async (is, url, id) => {
     } else if (is.openreview) {
         paper = await parseOpenReviewJSON(url);
         paper.source = "openreview";
+    } else if (is.biorxiv) {
+        paper = await parseBiorxivJSON(url);
+        paper.source = "biorxiv";
     } else {
         throw Error("Unknown paper source: " + JSON.stringify({ is, url, id }));
     }
@@ -529,14 +532,13 @@ const vanity = () => {
 
 $(() => {
     const url = window.location.href;
-
+    info("Executing Paper Memory content script");
     if (
         Object.values(global.knownPaperPages)
             .reduce((a, b) => a.concat(b), [])
             .some((d) => url.includes(d))
     ) {
-        // not on a paper page
-        info("Executing Paper Memory content script");
+        info("Running contentScriptMain for", url);
         contentScriptMain(url);
     }
 

diff --git a/src/popup/popup-js.min.js b/src/popup/popup-js.min.js
diff --git a/src/popup/popup-js/memory.js b/src/popup/popup-js/memory.js
@@ -137,6 +137,9 @@ const focusExistingOrCreateNewPaperTab = (paper) => {
         .replace(".pdf", "") // remove .pdf (cvf)
         .split("?") // remove get args if any
         .reverse()[0]; // find id (openreview)
+    if (paper.source === "biorxiv") {
+        match = cleanBiorxivURL(paper.pdfLink);
+    }
     if (match.match(/\d{5}v\d+$/) && paper.source === "arxiv") {
         // remove potential pdf version on arxiv
         match = match.split("v")[0];

diff --git a/src/popup/popup-js/popup.js b/src/popup/popup-js/popup.js
@@ -142,7 +142,7 @@ const popupMain = async (url, isKnownPage) => {
         }
 
         const paper = global.state.papers[id];
-        const eid = paper.id.replace(".", "\\.");
+        const eid = paper.id.replaceAll(".", "\\.");
 
         // -----------------------------
         // -----  Fill Paper Data  -----
@@ -157,6 +157,7 @@ const popupMain = async (url, isKnownPage) => {
         // ----------------------------------
         // -----  Customize Popup html  -----
         // ----------------------------------
+        console.log(paper);
         setHTML("popup-memory-edit", getPopupEditFormHTML(paper));
         setHTML("popup-copy-icons", getPopupPaperIconsHTML(paper, url));
         findEl(`checkFavorite--${id}`).checked = paper.favorite;

diff --git a/src/shared/utils.min.js b/src/shared/utils.min.js
diff --git a/src/shared/utils/config.js b/src/shared/utils/config.js
@@ -68,6 +68,7 @@ global.knownPaperPages = {
     neurips: ["neurips.cc/paper/", "nips.cc/paper/"],
     cvf: ["openaccess.thecvf.com/content"],
     openreview: ["openreview.net/forum", "openreview.net/pdf"],
+    biorxiv: ["biorxiv.org/content"],
 };
 
 global.overrideORConfs = {

diff --git a/src/shared/utils/functions.js b/src/shared/utils/functions.js
@@ -711,6 +711,14 @@ const isPaper = (url) => {
     return is;
 };
 
+const cleanBiorxivURL = (url) => {
+    url = url.replace(".full.pdf", "");
+    if (!url.match(/\d$/)) {
+        url = url.split(".").slice(0, -1).join(".");
+    }
+    return url;
+};
+
 const parseIdFromUrl = (url) => {
     const is = isPaper(url);
     if (is.arxiv) {
@@ -728,6 +736,13 @@ const parseIdFromUrl = (url) => {
             return p.id.includes(OR_id);
         })[0];
         return paper && paper.id;
+    } else if (is.biorxiv) {
+        url = cleanBiorxivURL(url);
+        let id = url.split("/").reverse()[0];
+        if (id.match(/v\d+$/)) {
+            id = id.split("v")[0];
+        }
+        return `Biorxiv-${id}`;
     } else {
         throw Error("unknown paper url");
     }
@@ -738,7 +753,7 @@ const paperToAbs = (paper) => {
     let abs = "";
     switch (paper.source) {
         case "arxiv":
-            abs = `https://arxiv.org/pdf/${paper.id.split("-")[1]}`;
+            abs = `https://arxiv.org/abs/${paper.id.split("-")[1]}`;
             break;
 
         case "neurips":
@@ -755,6 +770,10 @@ const paperToAbs = (paper) => {
             abs = pdf.replace("/pdf?", "/forum?");
             break;
 
+        case "biorxiv":
+            abs = pdf.replace(".full.pdf", "");
+            break;
+
         default:
             abs = "https://xkcd.com/1969/";
             break;
@@ -763,33 +782,36 @@ const paperToAbs = (paper) => {
     return abs.replace("http://", "https://");
 };
 const paperToPDF = (paper) => {
-    const pdf = paper.pdfLink;
-    let abs = "";
+    let pdf = paper.pdfLink;
     switch (paper.source) {
         case "arxiv":
-            abs = `https://arxiv.org/abs/${paper.id.split("-")[1]}`;
+            pdf = `https://arxiv.org/pdf/${paper.id.split("-")[1]}.pdf`;
             break;
 
         case "neurips":
-            abs = pdf
+            pdf = pdf
                 .replace("/hash/", "/file/")
                 .replace("-Abstract.html", "-Paper.pdf");
             break;
 
         case "cvf":
-            abs = pdf.replace("/html/", "/papers/").replace(".html", ".pdf");
+            pdf = pdf.replace("/html/", "/papers/").replace(".html", ".pdf");
             break;
 
         case "openreview":
-            abs = pdf.replace("/forum?", "/pdf?");
+            pdf = pdf.replace("/forum?", "/pdf?");
+            break;
+
+        case "biorxiv":
+            pdf = cleanBiorxivURL(pdf) + ".full.pdf";
             break;
 
         default:
-            abs = "https://xkcd.com/1969/";
+            pdf = "https://xkcd.com/1969/";
             break;
     }
 
-    return abs.replace("http://", "https://");
+    return pdf.replace("http://", "https://");
 };
 
 const textareaFocusEnd = (element) => {

diff --git a/src/shared/utils/parsers.js b/src/shared/utils/parsers.js
@@ -307,6 +307,57 @@ const parseOpenReviewJSON = async (url) => {
     return { author, bibtex, conf, id, key, note, pdfLink, title, year };
 };
 
+const parseBiorxivJSON = async (url) => {
+    const biorxivAPI = "https://api.biorxiv.org/";
+    const pageURL = url.replace(".full.pdf", "");
+    const biorxivID = url
+        .split("/")
+        .slice(-2)
+        .join("/")
+        .replace(".full.pdf", "")
+        .split("v")[0];
+    const api = `${biorxivAPI}/details/biorxiv/${biorxivID}`;
+    const data = await fetch(api).then((response) => {
+        return response.json();
+    });
+
+    if (data.messages[0].status !== "ok")
+        throw new Error(`${api} returned ${data.messages[0].status}`);
+
+    const paper = data.collection.reverse()[0];
+
+    const pageData = await fetch(pageURL);
+    const pageText = await pageData.text();
+    const dom = new DOMParser().parseFromString(
+        pageText.replaceAll("\n", ""),
+        "text/html"
+    );
+    const bibtextLink = dom.querySelector(".bibtext a").href;
+    const bibtex = await (await fetch(bibtextLink)).text();
+
+    const author = bibtex
+        .match(/author\ ?=\ ?{.+}/)[0]
+        .replace(/author\ ?=\ ?{/, "")
+        .trim()
+        .slice(0, -1)
+        .replaceAll("{", "")
+        .replaceAll("}", "")
+        .replaceAll("\\", "")
+        .split(" and ")
+        .map((a) => a.split(", ").reverse().join(" "))
+        .join(" and ");
+
+    const conf = "BioRxiv";
+    const id = parseIdFromUrl(url);
+    const key = bibtex.split("\n")[0].split("{")[1].replace(",", "").trim();
+    const note = "";
+    const pdfLink = cleanBiorxivURL(url) + ".full.pdf";
+    const title = paper.title;
+    const year = paper.date.split("-")[0];
+
+    return { author, bibtex, conf, id, key, note, pdfLink, title, year };
+};
+
 // ----------------------------------------------
 // -----  Papers With Code: non functional  -----
 // ----------------------------------------------