Skip to content

Commit

Permalink
Merge pull request #1 from LABHDUFBA/custom-logger
Browse files Browse the repository at this point in the history
Change logging system to use custom logger
  • Loading branch information
vmussa authored Aug 24, 2021
2 parents 608a73e + a3d8e28 commit 1cd8326
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 20 deletions.
154 changes: 153 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pandas = "^1.2.4"
requests = "^2.25.1"

[tool.poetry.dev-dependencies]
black = "^21.7b0"

[tool.poetry.scripts]
qualitube = 'qualitube.main:main'
Expand Down
3 changes: 3 additions & 0 deletions qualitube/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logger = logging.getLogger('qualitube')
32 changes: 19 additions & 13 deletions qualitube/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,54 @@
from configparser import ConfigParser
from .videos import Videos
import pandas as pd
from .log import logger
import logging
import sys


config = ConfigParser()
config.read("config.ini")
CHANNEL_IDS = config['channels']['ids'].split("\n")[1:]
CHANNEL_IDS = config["channels"]["ids"].split("\n")[1:]


def set_logger(logger):
logger.setLevel("INFO")
logger.addHandler(logging.FileHandler("pipeline.log"))
logger.addHandler(logging.StreamHandler(sys.stdout))


def get_playlist_items_objs(channel_ids: str) -> PlaylistItems:
"""Get list of PlaylistItems objects from channel ids."""
objs = [PlaylistItems(channel_id) for channel_id in channel_ids]
return objs


def chunks(lst: list, n) -> list:
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i:i + n]

def main():
logging.basicConfig(
level=logging.INFO,
handlers=[
logging.FileHandler('pipeline.log'),
logging.StreamHandler(sys.stdout)
]
)

logging.info('Beginning of pipeline.')
def main():
"""Main function."""
set_logger(logger)
logger.info("Beginning of pipeline.")

playlists = get_playlist_items_objs(channel_ids=CHANNEL_IDS)
dfs = [playlist.to_df() for playlist in playlists]
raw = pd.concat(dfs)
videos_ids_lst = raw['id'].to_list()
videos_ids_lst = raw["id"].to_list()
chunked = list(chunks(videos_ids_lst, 50))

dfs = []
for chunk in chunked:
dfs.append(Videos(chunk).to_df())

df = pd.concat(dfs).reset_index(drop=True)
df.to_csv('corpus.csv', index=False)
df.to_csv("corpus.csv", index=False)

logger.info("End of pipeline.")

logging.info('End of pipeline.')

if __name__ == "__main__":
main()
6 changes: 3 additions & 3 deletions qualitube/playlist_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import pandas as pd
from configparser import ConfigParser
import logging
from .log import logger
from .exceptions import QualitubeException


Expand All @@ -24,7 +24,7 @@ def _try_parse(self, item, key):
try:
parsed = item[key]
except KeyError:
logging.warn(
logger.warn(
f"YouTube Data API v3 does not provide the `{key}` parameter fo"
f"r the requested playlist item. Setting it as `None`"
)
Expand Down Expand Up @@ -55,7 +55,7 @@ def _parse_response(self, data):
'description': self._try_parse(item['snippet'], 'description'),
'published_at': self._try_parse(item['snippet'], 'publishedAt')
})
logging.info(f"Got PlaylistItem -> id: {item['id']} / title: {item['snippet']['title']}")
logger.info(f"Got PlaylistItem -> id: {item['id']} / title: {item['snippet']['title']}")
try:
next_page_token = raw["nextPageToken"]
except KeyError:
Expand Down
6 changes: 3 additions & 3 deletions qualitube/videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import pandas as pd
from configparser import ConfigParser
import logging
from .log import logger
from .exceptions import QualitubeException


Expand All @@ -24,7 +24,7 @@ def _try_parse(self, item, key):
try:
parsed = item[key]
except KeyError:
logging.warn(
logger.warn(
f"YouTube Data API v3 does not provide the `{key}` parameter fo"
f"r the requested video. Setting it as `None`"
)
Expand Down Expand Up @@ -60,7 +60,7 @@ def _parse_response(self, data):
'video_favorite_count': self._try_parse(item['statistics'], 'favoriteCount'),
'video_comment_count': self._try_parse(item['statistics'], 'commentCount')
})
logging.info(f"Got Video -> id: {item['id']} title: {item['snippet']['title']}")
logger.info(f"Got Video -> id: {item['id']} title: {item['snippet']['title']}")

try:
next_page_token = raw["nextPageToken"]
Expand Down

0 comments on commit 1cd8326

Please sign in to comment.