Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/retain comments #4

Merged
merged 2 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added clean_dotenv/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions clean_dotenv/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from __future__ import annotations

from clean_dotenv._main import main

if __name__ == "__main__":
raise SystemExit(main())
23 changes: 18 additions & 5 deletions clean_dotenv.py → clean_dotenv/_main.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,35 @@
import os
import argparse
from typing import Iterator
import dotenv.main
import clean_dotenv._parser as DotEnvParser


def _clean_env(path_to_env: str):
# Open the .env file and remove the sensitive data
# We rely on python-dotenv to parse the file, since we do not want to write our own parser

dotenv_file = dotenv.main.DotEnv(dotenv_path=path_to_env)
dotenv_elements = DotEnvParser.parse_stream(open(path_to_env))

# Create new filename for the .env file --> test.env becomes test.env.example
path_to_example_file = path_to_env + ".example"

# Write .example file
with open(path_to_example_file, "w") as example_env_f:
for key, _ in dotenv_file.dict().items():
print(f"{key}=", file=example_env_f)
# We now iterate through the original .env file and write everything except for the value into the new file
for i, dotenv_element in enumerate(dotenv_elements):
if dotenv_element.multiline_whitespace:
print(dotenv_element.multiline_whitespace, end="", file=example_env_f)
if dotenv_element.export: # e.g. export AWS_KEY=...
print(dotenv_element.export, end="", file=example_env_f)
if dotenv_element.key:
print(
f"{dotenv_element.key}={dotenv_element.separator}{dotenv_element.separator}",
end="",
file=example_env_f,
)
if dotenv_element.comment:
print(dotenv_element.comment, end="", file=example_env_f)
if dotenv_element.end_of_line:
print(dotenv_element.end_of_line, end="", file=example_env_f)


def _find_dotenv_files(path_to_root: str) -> Iterator[str]:
Expand Down
235 changes: 235 additions & 0 deletions clean_dotenv/_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# This is the original python-dotenv parser with minor changes to work in clean-dotenv
# Find the copyright and license below:

# Copyright (c) 2014, Saurabh Kumar (python-dotenv), 2013, Ted Tieken (django-dotenv-rw), 2013, Jacob Kaplan-Moss (django-dotenv)

# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:

# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.

# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.

# - Neither the name of django-dotenv nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import codecs
import re
from typing import (
IO,
Iterator,
Match,
NamedTuple,
Optional, # noqa:F401
Pattern,
Sequence,
Tuple,
)


def make_regex(string: str, extra_flags: int = 0) -> Pattern[str]:
return re.compile(string, re.UNICODE | extra_flags)


_newline = make_regex(r"(\r\n|\n|\r)")
_multiline_whitespace = make_regex(r"(\s*)", extra_flags=re.MULTILINE)
_whitespace = make_regex(r"([^\S\r\n]*)")
_export = make_regex(r"(export[^\S\r\n]+)?")
_single_quoted_key = make_regex(r"'([^']+)'")
_unquoted_key = make_regex(r"([^=\#\s]+)")
_equal_sign = make_regex(r"(=[^\S\r\n]*)")
_single_quoted_value = make_regex(r"'((?:\\'|[^'])*)'")
_double_quoted_value = make_regex(r'"((?:\\"|[^"])*)"')
_unquoted_value = make_regex(r"([^\r\n]*)")
_comment = make_regex(r"([^\S\r\n]*#[^\r\n]*)?")
_end_of_line = make_regex(r"[^\S\r\n]*(\r\n|\n|\r|$)")
_rest_of_line = make_regex(r"[^\r\n]*(?:\r|\n|\r\n)?")
_double_quote_escapes = make_regex(r"\\[\\'\"abfnrtv]")
_single_quote_escapes = make_regex(r"\\[\\']")


class Original(NamedTuple):
string: str
line: int


class Binding(NamedTuple):
multiline_whitespace: Optional[str]
export: Optional[str]
key: Optional[str]
value: Optional[str]
separator: Optional[str]
original: Original
comment: Optional[str]
end_of_line: Optional[str]
error: bool


class Position:
def __init__(self, chars: int, line: int) -> None:
self.chars = chars
self.line = line

@classmethod
def start(cls) -> "Position":
return cls(chars=0, line=1)

def set(self, other: "Position") -> None:
self.chars = other.chars
self.line = other.line

def advance(self, string: str) -> None:
self.chars += len(string)
self.line += len(re.findall(_newline, string))


class Error(Exception):
pass


class Reader:
def __init__(self, stream: IO[str]) -> None:
self.string = stream.read()
self.position = Position.start()
self.mark = Position.start()

def has_next(self) -> bool:
return self.position.chars < len(self.string)

def set_mark(self) -> None:
self.mark.set(self.position)

def get_marked(self) -> Original:
return Original(
string=self.string[self.mark.chars : self.position.chars],
line=self.mark.line,
)

def peek(self, count: int) -> str:
return self.string[self.position.chars : self.position.chars + count]

def read(self, count: int) -> str:
result = self.string[self.position.chars : self.position.chars + count]
if len(result) < count:
raise Error("read: End of string")
self.position.advance(result)
return result

def read_regex(self, regex: Pattern[str]) -> Sequence[str]:
match = regex.match(self.string, self.position.chars)
if match is None:
raise Error("read_regex: Pattern not found")
self.position.advance(self.string[match.start() : match.end()])
return match.groups()


def decode_escapes(regex: Pattern[str], string: str) -> str:
def decode_match(match: Match[str]) -> str:
return codecs.decode(match.group(0), "unicode-escape") # type: ignore

return regex.sub(decode_match, string)


def parse_key(reader: Reader) -> Optional[str]:
char = reader.peek(1)
if char == "#":
return None
elif char == "'":
(key,) = reader.read_regex(_single_quoted_key)
else:
(key,) = reader.read_regex(_unquoted_key)
return key


def parse_unquoted_value(reader: Reader) -> str:
(part,) = reader.read_regex(_unquoted_value)
return re.sub(r"\s+#.*", "", part).rstrip()


def parse_value(reader: Reader) -> Tuple[str, str]:
char = reader.peek(1)
if char == "'":
(value,) = reader.read_regex(_single_quoted_value)
return decode_escapes(_single_quote_escapes, value), "'"
elif char == '"':
(value,) = reader.read_regex(_double_quoted_value)
return decode_escapes(_double_quote_escapes, value), '"'
elif char in ("", "\n", "\r"):
return "", ""
else:
return parse_unquoted_value(reader), ""


def parse_binding(reader: Reader) -> Binding:
reader.set_mark()
try:
(multiline_whitespace,) = reader.read_regex(_multiline_whitespace)
if not reader.has_next():
return Binding(
multiline_whitespace=multiline_whitespace,
export=None,
key=None,
value=None,
separator=None,
comment=None,
end_of_line=None,
original=reader.get_marked(),
error=False,
)
(export,) = reader.read_regex(_export)
key = parse_key(reader)
reader.read_regex(_whitespace)
if reader.peek(1) == "=":
reader.read_regex(_equal_sign)
value, separator = parse_value(reader)
else:
value = None
separator = None
(comment,) = reader.read_regex(_comment)
(end_of_line,) = reader.read_regex(_end_of_line)
return Binding(
multiline_whitespace=multiline_whitespace,
export=export,
key=key,
value=value,
separator=separator,
comment=comment,
end_of_line=end_of_line,
original=reader.get_marked(),
error=False,
)
except Error:
reader.read_regex(_rest_of_line)
return Binding(
multiline_whitespace=None,
export=export,
key=None,
value=None,
separator=None,
comment=None,
original=reader.get_marked(),
end_of_line=None,
error=True,
)


def parse_stream(stream: IO[str]) -> Iterator[Binding]:
reader = Reader(stream)
while reader.has_next():
yield parse_binding(reader)
13 changes: 9 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,25 @@ classifiers =
Programming Language :: Python :: Implementation :: PyPy

[options]
py_modules = clean_dotenv
install_requires =
python-dotenv>=1.0
packages = find:
python_requires = >=3.8

[options.packages.find]
exclude =
tests*

[options.entry_points]
console_scripts =
clean-dotenv = clean_dotenv:main
clean-dotenv = clean_dotenv._main:main

[bdist_wheel]
universal = True

[coverage:run]
plugins = covdefaults
omit =
# We arent responsible for this file, since it comes from python-dotenv
_parser.py

[mypy]
check_untyped_defs = true
Expand Down
Loading
Loading