Skip to content

Commit

Permalink
Isolate log handlers setup
Browse files Browse the repository at this point in the history
  • Loading branch information
vmussa committed Aug 24, 2021
1 parent 22e5aa2 commit a3d8e28
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 10 deletions.
154 changes: 153 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pandas = "^1.2.4"
requests = "^2.25.1"

[tool.poetry.dev-dependencies]
black = "^21.7b0"

[tool.poetry.scripts]
qualitube = 'qualitube.main:main'
Expand Down
4 changes: 0 additions & 4 deletions qualitube/log.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import logging
import sys

logger = logging.getLogger('qualitube')
logger.setLevel('INFO')
logger.addHandler(logging.FileHandler('pipeline.log'))
logger.addHandler(logging.StreamHandler(sys.stdout))
23 changes: 18 additions & 5 deletions qualitube/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,53 @@
from .videos import Videos
import pandas as pd
from .log import logger
import logging
import sys


config = ConfigParser()
config.read("config.ini")
CHANNEL_IDS = config['channels']['ids'].split("\n")[1:]
CHANNEL_IDS = config["channels"]["ids"].split("\n")[1:]


def set_logger(logger):
logger.setLevel("INFO")
logger.addHandler(logging.FileHandler("pipeline.log"))
logger.addHandler(logging.StreamHandler(sys.stdout))


def get_playlist_items_objs(channel_ids: str) -> PlaylistItems:
"""Get list of PlaylistItems objects from channel ids."""
objs = [PlaylistItems(channel_id) for channel_id in channel_ids]
return objs


def chunks(lst: list, n) -> list:
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i:i + n]


def main():
"""Main function."""
logger.info('Beginning of pipeline.')
set_logger(logger)
logger.info("Beginning of pipeline.")

playlists = get_playlist_items_objs(channel_ids=CHANNEL_IDS)
dfs = [playlist.to_df() for playlist in playlists]
raw = pd.concat(dfs)
videos_ids_lst = raw['id'].to_list()
videos_ids_lst = raw["id"].to_list()
chunked = list(chunks(videos_ids_lst, 50))

dfs = []
for chunk in chunked:
dfs.append(Videos(chunk).to_df())

df = pd.concat(dfs).reset_index(drop=True)
df.to_csv('corpus.csv', index=False)
df.to_csv("corpus.csv", index=False)

logger.info("End of pipeline.")

logger.info('End of pipeline.')

if __name__ == "__main__":
main()

0 comments on commit a3d8e28

Please sign in to comment.