Merge pull request #16 from vict0rsch/pmlr

vict0rsch · Nov 20, 2021 · d25a0e9 · d25a0e9
2 parents 044ebb9 + 60f24ea
commit d25a0e9
Show file tree

Hide file tree

Showing 10 changed files with 127 additions and 24 deletions.
diff --git a/Readme.md b/Readme.md
@@ -31,11 +31,12 @@ This browser extension allows you to do automatically store research papers you
 
 Supported venues:
 
-* arxiv.org
-* biorxiv.org
-* neurips.cc (NeurIPS)
-* openreview.net (ICLR etc.)
-* openaccess.cvf.com (I/ECCV, CVPR etc.)
+* Arxiv
+* BioRxiv
+* NeurIPS
+* Open Review (ICLR etc.)
+* Computer Vision Foundation (I/ECCV, CVPR etc.)
+* Proceedings of Machine Learning Research (PMLR) (AISTATS, ICML, CoRL, CoLT, ALT, UAI etc.)
 * [Add more](https://github.com/vict0rsch/PaperMemory/issues/13)
 
 ## Demo

diff --git a/manifest.json b/manifest.json
@@ -1,6 +1,6 @@
 {
   "name": "Paper Memory",
-  "version": "0.3.1",
+  "version": "0.3.2",
   "manifest_version": 2,
   "description": "Automatically record papers you read from Arxiv, OpenReview & more! Organize your library with tags, links to code and quick notes.",
   "homepage_url": "https://github.com/vict0rsch/PaperMemory",
@@ -33,6 +33,7 @@
     "https://export.arxiv.org/*",
     "*://*.neurips.cc/*",
     "*://*.nips.cc/*",
+    "*://proceedings.mlr.press/*",
     "https://openaccess.thecvf.com/*",
     "https://openreview.net/*",
     "https://api.openreview.net/*",
@@ -47,6 +48,7 @@
       "matches": [
         "*://arxiv.org/*",
         "*://*.biorxiv.org/*",
+        "*://proceedings.mlr.press/*",
         "*://*.arxiv-vanity.com/*",
         "*://*.neurips.cc/*",
         "*://*.nips.cc/*",

diff --git a/src/content_scripts/content_script.js b/src/content_scripts/content_script.js
@@ -242,6 +242,9 @@ const makePaper = async (is, url, id) => {
     } else if (is.biorxiv) {
         paper = await parseBiorxivJSON(url);
         paper.source = "biorxiv";
+    } else if (is.pmlr) {
+        paper = await parsePMLRHTML(url);
+        paper.source = "pmlr";
     } else {
         throw Error("Unknown paper source: " + JSON.stringify({ is, url, id }));
     }

diff --git a/src/popup/popup-js.min.js b/src/popup/popup-js.min.js
diff --git a/src/popup/popup-js/memory.js b/src/popup/popup-js/memory.js
@@ -357,7 +357,7 @@ const filterMemoryByString = (letters) => {
         const title = paper.title.toLowerCase();
         const author = paper.author.toLowerCase();
         const note = paper.note.toLowerCase();
-        const displayId = paper.id.split("-")[0].toLowerCase();
+        const displayId = getDisplayId(paper.id).toLowerCase();
         if (
             words.every(
                 (w) =>

diff --git a/src/popup/popup-js/templates.js b/src/popup/popup-js/templates.js
@@ -6,7 +6,7 @@
 const getMemoryItemHTML = (paper) => {
     const addDate = new Date(paper.addDate).toLocaleString().replace(",", "");
     const lastOpenDate = new Date(paper.lastOpenDate).toLocaleString().replace(",", "");
-    const displayId = paper.id.split("_")[0].split(".")[0];
+    const displayId = getDisplayId(paper.id);
     const note = paper.note || "";
     const id = paper.id;
     const tags = new Set(paper.tags);
@@ -179,7 +179,7 @@ const getPopupEditFormHTML = (paper) => {
     const tagOptions = getTagsOptions(paper);
     const note = paper.note || "";
     const checked = "";
-    const displayId = paper.id.split("_")[0].split(".")[0];
+    const displayId = getDisplayId(paper.id);
 
     return /*html*/ `
     <div style="max-width: 100%; display: flex; justify-content: space-between; align-items: center; padding: 4px 16px;">

diff --git a/src/shared/utils.min.js b/src/shared/utils.min.js
diff --git a/src/shared/utils/config.js b/src/shared/utils/config.js
@@ -69,12 +69,21 @@ global.knownPaperPages = {
     cvf: ["openaccess.thecvf.com/content"],
     openreview: ["openreview.net/forum", "openreview.net/pdf"],
     biorxiv: ["biorxiv.org/content"],
+    pmlr: ["proceedings.mlr.press/"],
 };
 
 global.overrideORConfs = {
     "robot-learning": "CoRL",
     ijcai: "IJCAI",
 };
+global.overridePMLRConfs = {
+    "Conference on Learning Theory": "CoLT",
+    "International Conference on Machine Learning": "ICML",
+    "Conference on Uncertainty in Artificial Intelligence": "UAI",
+    "Conference on Robot Learning": "CoRL",
+    "International Conference on Artificial Intelligence and Statistics": "AISTATS",
+    "International Conference on Algorithmic Learning Theory": "ALT",
+};
 
 /**
  * English words to ignore when creating an arxiv paper's BibTex key.

diff --git a/src/shared/utils/functions.js b/src/shared/utils/functions.js
@@ -250,6 +250,14 @@ const info = (...args) => {
     console.log("%c" + args.join(" "), "color: #328DD2");
 };
 
+const getDisplayId = (id) => {
+    id = id.split("_")[0].split(".")[0];
+    if (!id.startsWith("OR-")) {
+        id = id.split("-").slice(0, 2).join("-");
+    }
+    return id;
+};
+
 const defaultPDFTitleFn = (title, id) => {
     title = title.replaceAll("\n", " ").replace(/\s\s+/g, " ");
     id = id.split("_")[0].split(".")[0];
@@ -743,6 +751,10 @@ const parseIdFromUrl = (url) => {
             id = id.split("v")[0];
         }
         return `Biorxiv-${id}`;
+    } else if (is.pmlr) {
+        const key = url.split("/").reverse()[0].split(".")[0];
+        const year = "20" + key.match(/\d+/)[0];
+        return `PMLR-${year}-${key}`;
     } else {
         throw Error("unknown paper url");
     }
@@ -774,6 +786,10 @@ const paperToAbs = (paper) => {
             abs = pdf.replace(".full.pdf", "");
             break;
 
+        case "pmlr":
+            abs = pdf.split("/").slice(0, -1).join("/") + ".html";
+            break;
+
         default:
             abs = "https://xkcd.com/1969/";
             break;
@@ -806,6 +822,9 @@ const paperToPDF = (paper) => {
             pdf = cleanBiorxivURL(pdf) + ".full.pdf";
             break;
 
+        case "pmlr":
+            break;
+
         default:
             pdf = "https://xkcd.com/1969/";
             break;

diff --git a/src/shared/utils/parsers.js b/src/shared/utils/parsers.js
@@ -1,3 +1,27 @@
+// -------------------
+// -----  Utils  -----
+// -------------------
+
+const extractBibtexValue = (bibtex, key) => {
+    const regex = new RegExp(`${key}\\s?=\\s?{(.+)},`, "gi");
+    console.log(regex);
+    const match = regex.exec(bibtex);
+    if (match) {
+        const regex2 = new RegExp(`${key}\\s?=\\s?{`, "gi");
+        return match[0].replace(regex2, "").slice(0, -2);
+    }
+    return "";
+};
+
+const extractAuthor = (bibtex) =>
+    extractBibtexValue(bibtex, "author")
+        .replaceAll("{", "")
+        .replaceAll("}", "")
+        .replaceAll("\\", "")
+        .split(" and ")
+        .map((a) => a.split(", ").reverse().join(" "))
+        .join(" and ");
+
 // -------------------
 // -----  Fetch  -----
 // -------------------
@@ -335,17 +359,7 @@ const parseBiorxivJSON = async (url) => {
     const bibtextLink = dom.querySelector(".bibtext a").href;
     const bibtex = await (await fetch(bibtextLink)).text();
 
-    const author = bibtex
-        .match(/author\ ?=\ ?{.+}/)[0]
-        .replace(/author\ ?=\ ?{/, "")
-        .trim()
-        .slice(0, -1)
-        .replaceAll("{", "")
-        .replaceAll("}", "")
-        .replaceAll("\\", "")
-        .split(" and ")
-        .map((a) => a.split(", ").reverse().join(" "))
-        .join(" and ");
+    const author = extractAuthor(bibtex);
 
     const conf = "BioRxiv";
     const id = parseIdFromUrl(url);
@@ -358,6 +372,61 @@ const parseBiorxivJSON = async (url) => {
     return { author, bibtex, conf, id, key, note, pdfLink, title, year };
 };
 
+const parsePMLRHTML = async (url) => {
+    const key = url.split("/").reverse()[0].split(".")[0];
+    const id = parseIdFromUrl(url);
+
+    const absURL = url.includes(".html")
+        ? url
+        : url.split("/").slice(0, -2).join("/") + `${key}.html`;
+
+    const pdfLink = absURL.replace(".html", "") + `/${key}.pdf`;
+
+    const doc = new DOMParser().parseFromString(
+        (await (await fetch(absURL)).text()).replaceAll("\n", ""),
+        "text/html"
+    );
+
+    const bibURL = doc
+        .getElementById("button-bibtex1")
+        .getAttribute("onclick")
+        .match(/https.+\.bib/)[0];
+    const bibtexRaw = doc
+        .getElementById("bibtex")
+        .innerText.replaceAll("\t", " ")
+        .replaceAll(/\s\s+/g, " ");
+    let bibtex = bibtexRaw;
+    const items = bibtexRaw.match(/,\ ?\w+ ?= ?{/g);
+    for (const item of items) {
+        bibtex = bibtex.replace(
+            item,
+            item.replace(", ", ",\n    ").replace(" = ", "=")
+        );
+    }
+    if (bibtex.endsWith("}}")) {
+        bibtex = bibtex.slice(0, -2) + "}\n}";
+    }
+
+    const author = extractAuthor(bibtex);
+    const title = doc.getElementsByTagName("h1")[0].innerText;
+    const year = extractBibtexValue(bibtex, "year");
+
+    let conf = extractBibtexValue(bibtex, "booktitle").replaceAll(
+        "Proceedings of the",
+        ""
+    );
+    note = "Accepted @ " + conf + ` (${year})`;
+    for (const long in global.overridePMLRConfs) {
+        if (conf.includes(long)) {
+            conf = global.overridePMLRConfs[long] + " " + year;
+            note = "Accepted @ " + conf;
+            break;
+        }
+    }
+
+    return { author, bibtex, conf, id, key, note, pdfLink, title, year };
+};
+
 // ----------------------------------------------
 // -----  Papers With Code: non functional  -----
 // ----------------------------------------------