Skip to content

Commit

Permalink
Fix an issue where TDK servers were blocking Python urllib requests
Browse files Browse the repository at this point in the history
Requests with user agent strings that contained "Python" were blocked.

Using a common user agent string solved the issue.

Also added some other common headers to avoid blocked by heuristics
in the future.

Closes: #1
  • Loading branch information
emreozcan committed Apr 6, 2022
1 parent 7583bb1 commit e287b73
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="tdk-py",
version="1.1.0.post1",
version="1.1.1",
author="Emre Özcan",
author_email="[email protected]",
description="Python API for the Turkish Language Foundation",
Expand Down
10 changes: 5 additions & 5 deletions src/tdk/gts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
import json
import urllib.request
from typing import List

from . import remote_paths
from .parsers import parse_index
from ..exceptions import TdkIdLookupErrorException, TdkIdLookupUnexpectedResponseException, \
TdkSearchUnexpectedResponseException, TdkSearchErrorException
from ..models import Entry
from ..networking import make_request
from ..tools import lowercase


def index() -> List[str]:
with urllib.request.urlopen(url=remote_paths.autocomplete_index()) as response:
with make_request(url=remote_paths.autocomplete_index()) as response:
autocomplete_index = json.loads(response.read())
return parse_index(autocomplete_index)


def search(query: str) -> List[Entry]:
query = lowercase(query, remove_unknown_characters=False)
with urllib.request.urlopen(url=remote_paths.general_search(query)) as response:
with make_request(url=remote_paths.general_search(query)) as response:
words = json.loads(response.read())
if not isinstance(words, list):
if "error" in words:
Expand All @@ -31,7 +31,7 @@ def search(query: str) -> List[Entry]:


def get(_id: int) -> Entry:
with urllib.request.urlopen(url=remote_paths.get_with_id(_id)) as response:
with make_request(url=remote_paths.get_with_id(_id)) as response:
word = json.loads(response.read())
if not isinstance(word, list):
if "error" in word:
Expand All @@ -44,6 +44,6 @@ def get(_id: int) -> Entry:


def suggest(query: str) -> List[str]:
with urllib.request.urlopen(url=remote_paths.suggest(query)) as response:
with make_request(url=remote_paths.suggest(query)) as response:
index = json.loads(response.read())
return parse_index(index)
23 changes: 23 additions & 0 deletions src/tdk/networking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import urllib.request

_http_headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Referer": "https://sozluk.gov.tr/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/99.0.4844.51 "
"Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}


def make_request(*args, **kwargs):
"""
Helper function to add default headers to a urllib request.
All arguments are passed down to urllib.request.Request,
Default headers are added if the "headers" keyword argument is not given.
"""
if "headers" not in kwargs:
kwargs["headers"] = _http_headers
return urllib.request.urlopen(urllib.request.Request(*args, **kwargs))

0 comments on commit e287b73

Please sign in to comment.