diff --git a/app/__init__.py b/app/__init__.py
index 7bf6a7e..da06d20 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -23,3 +23,25 @@
     ],
 )
 logging.debug(f"Logging to {log_path}")
+
+
+def strtobool(val) -> bool:
+    """
+    Replaces deprecated https://docs.python.org/3.9/distutils/apiref.html#distutils.util.strtobool
+    The deprecation recommendation is to re-implement the function https://peps.python.org/pep-0632/
+
+    ------------------------------------------------------------
+
+    Convert a string representation of truth to true (1) or false (0).
+
+    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
+    'val' is anything else.
+    """
+    val_str = str(val).lower()
+    if val_str in ("y", "yes", "t", "true", "on", "1"):
+        return True
+    elif val_str in ("n", "no", "f", "false", "off", "0"):
+        return False
+    else:
+        raise ValueError(f"invalid truth value {val!r}")
diff --git a/app/downloads/.gitignore b/app/downloads/.gitignore
index a1b5658..2c09a62 100644
--- a/app/downloads/.gitignore
+++ b/app/downloads/.gitignore
@@ -1,2 +1,2 @@
 *.ipynb
-cache/MultiQC
+sources/MultiQC
diff --git a/app/downloads/daily.py b/app/downloads/daily.py
index 9e3f3a3..3ebe840 100644
--- a/app/downloads/daily.py
+++ b/app/downloads/daily.py
@@ -33,7 +33,10 @@
 
 logger = logging.getLogger(__name__)
 
-PYPI_HISTORIC_PATH = Path(__file__).parent / "sources" / "pypi-historic.csv"
+SOURCES_DIR = Path(__file__).parent / "sources"
+# Whether we can write back daily.csv and other pulled stats to keep under version control.
+# Usually the code dir is not writable in the container environment.
+SOURCES_IS_WRITABLE = os.access(SOURCES_DIR, os.W_OK)
 
 
 @click.command()
@@ -45,20 +48,18 @@ def main(days: int | None):
     collect_daily_download_stats(days=days)
 
 
-def collect_daily_download_stats(cache_dir: Path | None = None, days: int | None = None) -> pd.DataFrame:
-    cache_dir = cache_dir or Path(__file__).parent / "cache"
-
-    df = _collect_daily_download_stats(cache_dir, days=days)
+def collect_daily_download_stats(days: int | None = None) -> pd.DataFrame:
+    df = _collect_daily_download_stats(days=days)
 
     # Update the existing CSV.
     # Only add data where it's not already present.
     # For existing data, check that it's the same as the new data.
     keys = [k for k in df.keys() if k != "date"]
-    cache_path = cache_dir / "daily.csv"
-    if cache_path.exists():
-        logger.info(f"Loading existing daily downloads stats from cache location {cache_path}")
+    csv_path = SOURCES_DIR / "daily.csv"
+    if csv_path.exists():
+        logger.info(f"Loading existing daily downloads stats from cache location {csv_path}")
         existing_df = pd.read_csv(
-            cache_path,
+            csv_path,
             dtype={k: "date" if k == "date" else "Int64" for k in keys},  # Int64 is a nullable integer version of int64
         ).set_index("date")
         # Fixing <NA> as nan and converting values to int
@@ -68,12 +69,13 @@ def collect_daily_download_stats(cache_dir: Path | None = None, days: int | None
         full_df = existing_df.combine_first(df)
     else:
         full_df = df
-    logger.info(f"Saving daily downloads stats to {cache_path}")
-    full_df.to_csv(cache_path, index=True)
+    if SOURCES_IS_WRITABLE:
+        logger.info(f"Saving daily downloads stats to {csv_path}")
+        full_df.to_csv(csv_path, index=True)
     return df
 
 
-def _collect_daily_download_stats(cache_dir: Path, days: int | None = None) -> pd.DataFrame:
+def _collect_daily_download_stats(days: int | None = None) -> pd.DataFrame:
     logger.info("Collecting PyPI stats...")
     df = get_pypi(days=days)
 
@@ -82,7 +84,7 @@ def _collect_daily_download_stats(cache_dir: Path, days: int | None = None) -> p
         df = df.merge(df_bioconda, on="date", how="outer").sort_values("date")
 
     logger.info("Collecting BioContainers (Quay mirror) stats...")
-    df_quay = get_biocontainers_quay(cache_dir, days=days)
+    df_quay = get_biocontainers_quay(days=days)
     df = df.merge(df_quay, on="date", how="outer").sort_values("date")
 
     logger.info("Collecting GitHub PRs...")
@@ -90,7 +92,7 @@ def _collect_daily_download_stats(cache_dir: Path, days: int | None = None) -> p
     df = df.merge(df_prs, on="date", how="outer").sort_values("date")
 
     logger.info("Collecting GitHub modules...")
-    df_modules = github_modules(cache_dir, days=days)
+    df_modules = github_modules(days=days)
     df = df.merge(df_modules, on="date", how="outer").sort_values("date")
 
     today = pd.to_datetime("today").strftime("%Y-%m-%d")
@@ -168,6 +170,7 @@ def get_pypi_historic():
     ''', project_id=os.environ["GCP_PROJECT"])
     ```
     """
+    PYPI_HISTORIC_PATH = SOURCES_DIR / "pypi-historic.csv"
     logger.info(f"Loading historic PyPI stats from {PYPI_HISTORIC_PATH}")
     df = pd.read_csv(
         PYPI_HISTORIC_PATH,
@@ -238,7 +241,7 @@ def biocontainers_aws_total():
     return count
 
 
-def get_biocontainers_quay(cache_dir: Path, days: int | None = None):
+def get_biocontainers_quay(days: int | None = None):
     """
     For the last 3 months, total numbers of BioContainers Quay.io mirror downloads.
     """
@@ -254,7 +257,7 @@ def get_biocontainers_quay(cache_dir: Path, days: int | None = None):
     df.sort_values("date", inplace=True)
     df = df.set_index("date")
     if days is None or days > 90:
-        path = cache_dir / "biocontainers-quay-historic.csv"
+        path = SOURCES_DIR / "biocontainers-quay-historic.csv"
         if path.exists():
             print(f"Previous Quay stats found at {path}, appending")
             existing_df = pd.read_csv(path, index_col="date", dtype={"count": "Int64"})
@@ -264,7 +267,8 @@ def get_biocontainers_quay(cache_dir: Path, days: int | None = None):
             df = df[~df.index.duplicated(keep="last")]
             # sort by date
             df.sort_index(inplace=True)
-        df.to_csv(path)
+        if SOURCES_IS_WRITABLE:
+            df.to_csv(path)
         df = pd.read_csv(path)
         df = df.set_index("date")
         print(f"Saved {path}")
@@ -359,7 +363,7 @@ def get_github_prs(days: int | None = None):
     return df.set_index("date")
 
 
-def github_modules(cache_dir: Path, days: int | None = None):
+def github_modules(days: int | None = None):
     """
     Daily and total new MultiQC modules.
     """
@@ -369,7 +373,8 @@ def github_modules(cache_dir: Path, days: int | None = None):
     from git import Repo
 
     repo_url = "https://github.com/MultiQC/MultiQC.git"
-    clone_path = cache_dir / "MultiQC"
+    tmp_dir = Path(os.getenv("TMPDIR", "/tmp"))
+    clone_path = tmp_dir / "MultiQC"
     if not clone_path.exists():
         Repo.clone_from(repo_url, clone_path)
         logger.debug(f"{repo_url} cloned at {clone_path}")
diff --git a/app/downloads/cache/biocontainers-quay-historic.csv b/app/downloads/sources/biocontainers-quay-historic.csv
similarity index 98%
rename from app/downloads/cache/biocontainers-quay-historic.csv
rename to app/downloads/sources/biocontainers-quay-historic.csv
index 37b39dd..8719fd9 100644
--- a/app/downloads/cache/biocontainers-quay-historic.csv
+++ b/app/downloads/sources/biocontainers-quay-historic.csv
@@ -180,3 +180,6 @@ date,count
 2024-02-16,3556
 2024-02-17,623
 2024-02-18,455
+2024-02-19,2011
+2024-02-20,2054
+2024-02-21,1958
diff --git a/app/downloads/cache/daily.csv b/app/downloads/sources/daily.csv
similarity index 100%
rename from app/downloads/cache/daily.csv
rename to app/downloads/sources/daily.csv
diff --git a/app/main.py b/app/main.py
index 0b048b9..29d1bbb 100644
--- a/app/main.py
+++ b/app/main.py
@@ -25,7 +25,7 @@
 from plotly.graph_objs import Layout
 from sqlalchemy.exc import ProgrammingError
 
-from app import __version__, db, models
+from app import __version__, db, models, strtobool
 from app.downloads import daily
 
 logger = logging.getLogger(__name__)
@@ -266,17 +266,16 @@ def _update_download_stats():
     except ProgrammingError:
         logger.error("The table does not exist, will create and populate with historical data")
         existing_downloads = []
-    cache_dir = Path(os.getenv("MULTIQC_API_CACHE_DIR", os.getenv("TMPDIR", "/tmp")))
     if len(existing_downloads) == 0:  # first time, populate historical data
         logger.info("Collecting historical downloads data...")
-        df = daily.collect_daily_download_stats(cache_dir=cache_dir)
+        df = daily.collect_daily_download_stats()
         logger.info(f"Adding {len(df)} historical entries to the table...")
         db.insert_download_stats(df)
         logger.info(f"Successfully populated {len(df)} historical entries")
     else:  # recent days only
         n_days = 4
-        logger.info(f"Updating data for the last {n_days} days...")
-        df = daily.collect_daily_download_stats(cache_dir=cache_dir, days=n_days)
+        logger.info(f"Updating downloads data for the last {n_days} days...")
+        df = daily.collect_daily_download_stats(days=n_days)
         logger.info(f"Adding {len(df)} recent entries to the table. Will update existing entries at the same date")
         db.insert_download_stats(df)
         logger.info(f"Successfully updated {len(df)} new daily download statistics")
@@ -467,27 +466,5 @@ def plotly_image_response(plot, format: PlotlyImageFormats = PlotlyImageFormats.
     return Response(content=plot)
 
 
-def strtobool(val) -> bool:
-    """
-    Replaces deprecated https://docs.python.org/3.9/distutils/apiref.html#distutils.util.strtobool
-    The deprecation recommendation is to re-implement the function https://peps.python.org/pep-0632/
-
-    ------------------------------------------------------------
-
-    Convert a string representation of truth to true (1) or false (0).
-
-    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
-    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
-    'val' is anything else.
-    """
-    val_str = str(val).lower()
-    if val_str in ("y", "yes", "t", "true", "on", "1"):
-        return True
-    elif val_str in ("n", "no", "f", "false", "off", "0"):
-        return False
-    else:
-        raise ValueError(f"invalid truth value {val!r}")
-
-
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/docker-compose.yml b/docker-compose.yml
index 7b13d82..3ef857b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,7 +12,6 @@ services:
     environment:
       # Set in .env
       GITHUB_TOKEN: $GITHUB_TOKEN
-      MULTIQC_API_CACHE_DIR: /code/app/downloads/cache
       # Matches the "db" service below
       DATABASE_URL: mysql+pymysql://root:1@db:3306/multiqc
     depends_on: