Skip to content

Commit

Permalink
Merge pull request #1 from Mic92/python2.7-removal
Browse files Browse the repository at this point in the history
Only support python versions that receive security update
  • Loading branch information
ashleysommer authored Sep 25, 2024
2 parents fd4f032 + 29d3072 commit 86f0d21
Show file tree
Hide file tree
Showing 60 changed files with 45 additions and 173 deletions.
29 changes: 0 additions & 29 deletions .appveyor.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .github/workflows/python-tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ jobs:
os: [ubuntu-latest, windows-latest]
deps: [base, optional]
include:
- python: "pypy-2.7"
os: ubuntu-latest
deps: base
- python: "pypy-3.10"
os: ubuntu-latest
deps: base
Expand Down
16 changes: 2 additions & 14 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ or:
By default, the ``document`` will be an ``xml.etree`` element instance.
Whenever possible, html5lib chooses the accelerated ``ElementTree``
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
implementation.

Two other tree types are supported: ``xml.dom.minidom`` and
``lxml.etree``. To use an alternative format, specify the name of
Expand All @@ -41,18 +41,6 @@ a treebuilder:
with open("mydocument.html", "rb") as f:
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
When using with ``urllib2`` (Python 2), the charset from HTTP should be
pass into html5lib as follows:

.. code-block:: python
from contextlib import closing
from urllib2 import urlopen
import html5lib
with closing(urlopen("http://example.com/")) as f:
document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))
When using with ``urllib.request`` (Python 3), the charset from HTTP
should be pass into html5lib as follows:

Expand Down Expand Up @@ -90,7 +78,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
Installation
------------

html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
html5lib works on CPython 3.8+ and PyPy. To install:

.. code-block:: bash
Expand Down
1 change: 0 additions & 1 deletion debug-info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import print_function, unicode_literals

import platform
import sys
Expand Down
3 changes: 1 addition & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# html5lib documentation build configuration file, created by
# sphinx-quickstart on Wed May 8 00:04:49 2013.
Expand Down Expand Up @@ -100,7 +99,7 @@
}


class CExtMock(object):
class CExtMock:
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
def __init__(self, *args, **kwargs):
pass
Expand Down
1 change: 0 additions & 1 deletion html5lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
* :func:`~.serializer.serialize`
"""

from __future__ import absolute_import, division, unicode_literals

from .html5parser import HTMLParser, parse, parseFragment
from .treebuilders import getTreeBuilder
Expand Down
3 changes: 1 addition & 2 deletions html5lib/_ihatexml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import re
import warnings
Expand Down Expand Up @@ -181,7 +180,7 @@ def escapeRegexp(string):
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")


class InfosetFilter(object):
class InfosetFilter:
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")

def __init__(self,
Expand Down
9 changes: 4 additions & 5 deletions html5lib/_inputstream.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from six.moves import http_client, urllib
Expand Down Expand Up @@ -48,7 +47,7 @@
charsUntilRegEx = {}


class BufferedStream(object):
class BufferedStream:
"""Buffering for streams that do not have buffering of their own
The buffer is implemented as a list of chunks on the assumption that
Expand Down Expand Up @@ -145,7 +144,7 @@ def HTMLInputStream(source, **kwargs):
return HTMLBinaryInputStream(source, **kwargs)


class HTMLUnicodeInputStream(object):
class HTMLUnicodeInputStream:
"""Provides a unicode stream of characters to the HTMLTokenizer.
This class takes care of character encoding and removing or replacing
Expand Down Expand Up @@ -673,7 +672,7 @@ def jumpTo(self, bytes):
return True


class EncodingParser(object):
class EncodingParser:
"""Mini parser for detecting character encoding from meta elements"""

def __init__(self, data):
Expand Down Expand Up @@ -861,7 +860,7 @@ def getAttribute(self):
attrValue.append(c)


class ContentAttrParser(object):
class ContentAttrParser:
def __init__(self, data):
assert isinstance(data, bytes)
self.data = data
Expand Down
3 changes: 1 addition & 2 deletions html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import unichr as chr

Expand All @@ -24,7 +23,7 @@
attributeMap = OrderedDict


class HTMLTokenizer(object):
class HTMLTokenizer:
""" This class takes care of tokenizing HTML.
* self.currentToken
Expand Down
1 change: 0 additions & 1 deletion html5lib/_trie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from .py import Trie

Expand Down
6 changes: 1 addition & 5 deletions html5lib/_trie/_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
from __future__ import absolute_import, division, unicode_literals

try:
from collections.abc import Mapping
except ImportError: # Python 2.7
from collections import Mapping
from collections.abc import Mapping


class Trie(Mapping):
Expand Down
1 change: 0 additions & 1 deletion html5lib/_trie/py.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type

from bisect import bisect_left
Expand Down
10 changes: 3 additions & 7 deletions html5lib/_utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
from __future__ import absolute_import, division, unicode_literals

from types import ModuleType

try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from collections.abc import Mapping

from six import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
else:
try:
import xml.etree.cElementTree as default_etree
import xml.etree.ElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree

Expand Down Expand Up @@ -122,7 +118,7 @@ def moduleFactoryFactory(factory):
moduleCache = {}

def moduleFactory(baseModule, *args, **kwargs):
if isinstance(ModuleType.__name__, type("")):
if isinstance(ModuleType.__name__, str):
name = "_%s_factory" % baseModule.__name__
else:
name = b"_%s_factory" % baseModule.__name__
Expand Down
1 change: 0 additions & 1 deletion html5lib/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import string

Expand Down
1 change: 0 additions & 1 deletion html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

Expand Down
3 changes: 1 addition & 2 deletions html5lib/filters/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import absolute_import, division, unicode_literals


class Filter(object):
class Filter:
def __init__(self, source):
self.source = source

Expand Down
1 change: 0 additions & 1 deletion html5lib/filters/inject_meta_charset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

Expand Down
1 change: 0 additions & 1 deletion html5lib/filters/lint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type

Expand Down
1 change: 0 additions & 1 deletion html5lib/filters/optionaltags.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

Expand Down
1 change: 0 additions & 1 deletion html5lib/filters/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
if Bleach is unsuitable for your needs.
"""
from __future__ import absolute_import, division, unicode_literals

import re
import warnings
Expand Down
1 change: 0 additions & 1 deletion html5lib/filters/whitespace.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import re

Expand Down
9 changes: 4 additions & 5 deletions html5lib/html5parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals
from six import viewkeys

from . import _inputstream
Expand Down Expand Up @@ -69,7 +68,7 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen
return p.parseFragment(doc, container=container, **kwargs)


class HTMLParser(object):
class HTMLParser:
"""HTML parser
Generates a tree structure from a stream of (possibly malformed) HTML.
Expand Down Expand Up @@ -397,7 +396,7 @@ def parseRCDataRawtext(self, token, contentType):
self.phase = self.phases["text"]


class Phase(object):
class Phase:
"""Base class for helper object that implements each phase of processing
"""
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
Expand Down Expand Up @@ -428,7 +427,7 @@ def processSpaceCharacters(self, token):
def processStartTag(self, token):
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
# (CPython 3.8) GC cost when parsing many short inputs
name = token["name"]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
Expand All @@ -455,7 +454,7 @@ def startTagHtml(self, token):
def processEndTag(self, token):
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
# (CPython 3.8) GC cost when parsing many short inputs
name = token["name"]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
Expand Down
3 changes: 1 addition & 2 deletions html5lib/serializer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type

import re
Expand Down Expand Up @@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts):
return s.render(walker(input), encoding)


class HTMLSerializer(object):
class HTMLSerializer:

# attribute quoting options
quote_attr_values = "legacy" # be secure by default
Expand Down
1 change: 0 additions & 1 deletion html5lib/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
5 changes: 2 additions & 3 deletions html5lib/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import print_function
import os.path
import sys

Expand Down Expand Up @@ -54,7 +53,7 @@ def pytest_configure(config):
# Check for optional requirements
req_file = os.path.join(_root, "requirements-optional.txt")
if os.path.exists(req_file):
with open(req_file, "r") as fp:
with open(req_file) as fp:
for line in fp:
if (line.strip() and
not (line.startswith("-r") or
Expand All @@ -79,7 +78,7 @@ def pytest_configure(config):
import xml.etree.ElementTree as ElementTree

try:
import xml.etree.cElementTree as cElementTree
import xml.etree.ElementTree as cElementTree
except ImportError:
msgs.append("cElementTree unable to be imported")
else:
Expand Down
1 change: 0 additions & 1 deletion html5lib/tests/sanitizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import codecs
import json
Expand Down
3 changes: 1 addition & 2 deletions html5lib/tests/support.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

# pylint:disable=wrong-import-position

Expand Down Expand Up @@ -86,7 +85,7 @@ def __getitem__(self, key):
return dict.get(self, key, self.default)


class TestData(object):
class TestData:
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
if encoding is None:
self.f = open(filename, mode="rb")
Expand Down
1 change: 0 additions & 1 deletion html5lib/tests/test_alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from collections import OrderedDict

Expand Down
5 changes: 2 additions & 3 deletions html5lib/tests/test_encoding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import os

Expand All @@ -9,7 +8,7 @@


def test_basic_prescan_length():
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
pad = 1024 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 1024 # Sanity
Expand All @@ -18,7 +17,7 @@ def test_basic_prescan_length():


def test_parser_reparse():
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
pad = 10240 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 10240 # Sanity
Expand Down
Loading

0 comments on commit 86f0d21

Please sign in to comment.