diff --git a/placement/.gitignore b/placement/.gitignore new file mode 100644 index 0000000..c465821 --- /dev/null +++ b/placement/.gitignore @@ -0,0 +1,105 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +/*/__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/placement/.vscode/settings.json b/placement/.vscode/settings.json new file mode 100644 index 0000000..5e97901 --- /dev/null +++ b/placement/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "spider/bin/python3.6" +} \ No newline at end of file diff --git a/placement/__init__.py b/placement/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/placement/items.py b/placement/items.py new file mode 100644 index 0000000..a4a776c --- /dev/null +++ b/placement/items.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- + +# Define here the models for your scraped items +# +# See documentation in: +# https://doc.scrapy.org/en/latest/topics/items.html + +import scrapy + +class Product(scrapy.Item): + link = scrapy.Field() + name = scrapy.Field() + year = scrapy.Field() diff --git a/placement/middlewares.py b/placement/middlewares.py new file mode 100644 index 0000000..df86216 --- /dev/null +++ b/placement/middlewares.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# Define here the models for your spider middleware +# +# See documentation in: +# https://doc.scrapy.org/en/latest/topics/spider-middleware.html + +from scrapy import signals + + +class PlacementSpiderMiddleware(object): + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the spider middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_spider_input(self, response, spider): + # Called for each response that goes through the spider + # middleware and into the spider. + + # Should return None or raise an exception. + return None + + def process_spider_output(self, response, result, spider): + # Called with the results returned from the Spider, after + # it has processed the response. + + # Must return an iterable of Request, dict or Item objects. + for i in result: + yield i + + def process_spider_exception(self, response, exception, spider): + # Called when a spider or process_spider_input() method + # (from other spider middleware) raises an exception. + + # Should return either None or an iterable of Response, dict + # or Item objects. + pass + + def process_start_requests(self, start_requests, spider): + # Called with the start requests of the spider, and works + # similarly to the process_spider_output() method, except + # that it doesn’t have a response associated. + + # Must return only requests (not items). + for r in start_requests: + yield r + + def spider_opened(self, spider): + spider.logger.info('Spider opened: %s' % spider.name) + + +class PlacementDownloaderMiddleware(object): + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the downloader middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_request(self, request, spider): + # Called for each request that goes through the downloader + # middleware. + + # Must either: + # - return None: continue processing this request + # - or return a Response object + # - or return a Request object + # - or raise IgnoreRequest: process_exception() methods of + # installed downloader middleware will be called + return None + + def process_response(self, request, response, spider): + # Called with the response returned from the downloader. + + # Must either; + # - return a Response object + # - return a Request object + # - or raise IgnoreRequest + return response + + def process_exception(self, request, exception, spider): + # Called when a download handler or a process_request() + # (from other downloader middleware) raises an exception. + + # Must either: + # - return None: continue processing this exception + # - return a Response object: stops process_exception() chain + # - return a Request object: stops process_exception() chain + pass + + def spider_opened(self, spider): + spider.logger.info('Spider opened: %s' % spider.name) diff --git a/placement/monitors.py b/placement/monitors.py new file mode 100644 index 0000000..ca1431c --- /dev/null +++ b/placement/monitors.py @@ -0,0 +1,38 @@ +from spidermon import Monitor, MonitorSuite, monitors +from spidermon.contrib.monitors.mixins import StatsMonitorMixin +# @monitors.name('Item count') +# class ItemCountMonitor(Monitor): + +# @monitors.name('Minimum number of items') +# def test_minimum_number_of_items(self): +# item_extracted = getattr( +# self.data.stats, 'item_scraped_count', 0) +# minimum_threshold = 10 + +# msg = 'Extracted less than {} items'.format( +# minimum_threshold) +# self.assertTrue( +# item_extracted >= minimum_threshold, msg=msg +# ) + +@monitors.name('Item validation') +class ItemValidationMonitor(Monitor, StatsMonitorMixin): +# class ItemValidationMonitor(Monitor): + + @monitors.name('No item validation errors') + def test_no_item_validation_errors(self): + validation_errors = getattr( + self.stats, 'spidermon/validation/fields/errors', 0 + ) + self.assertEqual( + validation_errors, + 0, + msg='Found validation errors in {} fields'.format( + validation_errors) + ) + +class SpiderCloseMonitorSuite(MonitorSuite): + monitors = [ + # ItemCountMonitor, + ItemValidationMonitor, + ] diff --git a/placement/pipelines.py b/placement/pipelines.py new file mode 100644 index 0000000..d352283 --- /dev/null +++ b/placement/pipelines.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html + + +class PlacementPipeline(object): + def process_item(self, item, spider): + return item diff --git a/placement/schema.json b/placement/schema.json new file mode 100644 index 0000000..f2e29df --- /dev/null +++ b/placement/schema.json @@ -0,0 +1,15 @@ +{ + "link": { + "type": "string", + "required": true + }, + "name": { + "type": "Integer", + "required": true + }, + "year": { + "type": "integer", + "max": 2021, + "min": 2018 + } +} diff --git a/placement/schemax.json b/placement/schemax.json new file mode 100644 index 0000000..a5d0067 --- /dev/null +++ b/placement/schemax.json @@ -0,0 +1,23 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Placement Opportunities", + "description": "Placement opportunities at Amity University", + "type": "object", + "properties": { + "link": { + "description": "URL to more information about opportunity", + "type": "integer" + }, + "name": { + "description": "Title of the opportunity", + "type": "string" + }, + "year": { + "description": "For students of which year", + "type": "integer", + "maximum": 2021, + "minimum": 2018 + } + }, + "required": ["link", "name"] +} diff --git a/placement/settings.py b/placement/settings.py new file mode 100644 index 0000000..b92eda0 --- /dev/null +++ b/placement/settings.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +# Scrapy settings for placement project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://doc.scrapy.org/en/latest/topics/settings.html +# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html +# https://doc.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = 'placement' + +SPIDER_MODULES = ['placement.spiders'] +NEWSPIDER_MODULE = 'placement.spiders' + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = 'placement (+http://www.yourdomain.com)' + +# Obey robots.txt rules +ROBOTSTXT_OBEY = True + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en', +#} + +# Enable or disable spider middlewares +# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# 'placement.middlewares.PlacementSpiderMiddleware': 543, +#} + +# Enable or disable downloader middlewares +# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { +# 'placement.middlewares.PlacementDownloaderMiddleware': 543, +#} + +# Enable or disable extensions +# See https://doc.scrapy.org/en/latest/topics/extensions.html +EXTENSIONS = { + 'spidermon.contrib.scrapy.extensions.Spidermon': 500, +} + +# Configure item pipelines +# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html +SPIDERMON_ENABLED = True + +ITEM_PIPELINES = { + 'spidermon.contrib.scrapy.pipelines.ItemValidationPipeline': 800, +} + +SPIDERMON_VALIDATION_CERBERUS = { + '/home/vipulgupta2048/placement/placement/schema.json' +} +# SPIDERMON_VALIDATION_SCHEMAS = { +# '/home/vipulgupta2048/placement/placement/jsonx.schema' +# } + +# SPIDERMON_VALIDATION_MODELS = ( +# 'placement.validators.Product', +# ) + +# SPIDERMON_SPIDER_CLOSE_MONITORS = ( +# 'placement.monitors.SpiderCloseMonitorSuite', +# ) + +# SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS = True + +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://doc.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = 'httpcache' +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/placement/spider/bin/activate b/placement/spider/bin/activate new file mode 100644 index 0000000..f8ed00b --- /dev/null +++ b/placement/spider/bin/activate @@ -0,0 +1,78 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + unset -f pydoc >/dev/null 2>&1 + + # reset old environment variables + # ! [ -z ${VAR+_} ] returns true if VAR is declared at all + if ! [ -z "${_OLD_VIRTUAL_PATH+_}" ] ; then + PATH="$_OLD_VIRTUAL_PATH" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if ! [ -z "${_OLD_VIRTUAL_PYTHONHOME+_}" ] ; then + PYTHONHOME="$_OLD_VIRTUAL_PYTHONHOME" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then + hash -r 2>/dev/null + fi + + if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then + PS1="$_OLD_VIRTUAL_PS1" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + if [ ! "${1-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV="/home/vipulgupta2048/placement/placement/spider" +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/bin:$PATH" +export PATH + +# unset PYTHONHOME if set +if ! [ -z "${PYTHONHOME+_}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="$PYTHONHOME" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then + _OLD_VIRTUAL_PS1="$PS1" + if [ "x" != x ] ; then + PS1="$PS1" + else + PS1="(`basename \"$VIRTUAL_ENV\"`) $PS1" + fi + export PS1 +fi + +# Make sure to unalias pydoc if it's already there +alias pydoc 2>/dev/null >/dev/null && unalias pydoc + +pydoc () { + python -m pydoc "$@" +} + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then + hash -r 2>/dev/null +fi diff --git a/placement/spider/bin/activate.csh b/placement/spider/bin/activate.csh new file mode 100644 index 0000000..c6f72a0 --- /dev/null +++ b/placement/spider/bin/activate.csh @@ -0,0 +1,42 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . + +set newline='\ +' + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH:q" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT:q" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate && unalias pydoc' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV "/home/vipulgupta2048/placement/placement/spider" + +set _OLD_VIRTUAL_PATH="$PATH:q" +setenv PATH "$VIRTUAL_ENV:q/bin:$PATH:q" + + + +if ("" != "") then + set env_name = "" +else + set env_name = "$VIRTUAL_ENV:t:q" +endif + +# Could be in a non-interactive environment, +# in which case, $prompt is undefined and we wouldn't +# care about the prompt anyway. +if ( $?prompt ) then + set _OLD_VIRTUAL_PROMPT="$prompt:q" +if ( "$prompt:q" =~ *"$newline:q"* ) then + : +else + set prompt = "[$env_name:q] $prompt:q" +endif +endif + +unset env_name + +alias pydoc python -m pydoc + +rehash diff --git a/placement/spider/bin/activate.fish b/placement/spider/bin/activate.fish new file mode 100644 index 0000000..b34502f --- /dev/null +++ b/placement/spider/bin/activate.fish @@ -0,0 +1,76 @@ +# This file must be used using `source bin/activate.fish` *within a running fish ( http://fishshell.com ) session*. +# Do not run it directly. + +function deactivate -d 'Exit virtualenv mode and return to the normal environment.' + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + set -gx PATH $_OLD_VIRTUAL_PATH + set -e _OLD_VIRTUAL_PATH + end + + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + # Set an empty local `$fish_function_path` to allow the removal of `fish_prompt` using `functions -e`. + set -l fish_function_path + + # Erase virtualenv's `fish_prompt` and restore the original. + functions -e fish_prompt + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + set -e _OLD_FISH_PROMPT_OVERRIDE + end + + set -e VIRTUAL_ENV + + if test "$argv[1]" != 'nondestructive' + # Self-destruct! + functions -e pydoc + functions -e deactivate + end +end + +# Unset irrelevant variables. +deactivate nondestructive + +set -gx VIRTUAL_ENV "/home/vipulgupta2048/placement/placement/spider" + +set -gx _OLD_VIRTUAL_PATH $PATH +set -gx PATH "$VIRTUAL_ENV/bin" $PATH + +# Unset `$PYTHONHOME` if set. +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +function pydoc + python -m pydoc $argv +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # Copy the current `fish_prompt` function as `_old_fish_prompt`. + functions -c fish_prompt _old_fish_prompt + + function fish_prompt + # Save the current $status, for fish_prompts that display it. + set -l old_status $status + + # Prompt override provided? + # If not, just prepend the environment name. + if test -n "" + printf '%s%s' "" (set_color normal) + else + printf '%s(%s) ' (set_color normal) (basename "$VIRTUAL_ENV") + end + + # Restore the original $status + echo "exit $old_status" | source + _old_fish_prompt + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" +end diff --git a/placement/spider/bin/activate.ps1 b/placement/spider/bin/activate.ps1 new file mode 100644 index 0000000..89dbec6 --- /dev/null +++ b/placement/spider/bin/activate.ps1 @@ -0,0 +1,150 @@ +# This file must be dot sourced from PoSh; you cannot run it +# directly. Do this: . ./activate.ps1 + +# FIXME: clean up unused vars. +$script:THIS_PATH = $myinvocation.mycommand.path +$script:BASE_DIR = split-path (resolve-path "$THIS_PATH/..") -Parent +$script:DIR_NAME = split-path $BASE_DIR -Leaf + +function global:deactivate ( [switch] $NonDestructive ){ + + if ( test-path variable:_OLD_VIRTUAL_PATH ) { + $env:PATH = $variable:_OLD_VIRTUAL_PATH + remove-variable "_OLD_VIRTUAL_PATH" -scope global + } + + if ( test-path function:_old_virtual_prompt ) { + $function:prompt = $function:_old_virtual_prompt + remove-item function:\_old_virtual_prompt + } + + if ($env:VIRTUAL_ENV) { + $old_env = split-path $env:VIRTUAL_ENV -leaf + remove-item env:VIRTUAL_ENV -erroraction silentlycontinue + } + + if ( !$NonDestructive ) { + # Self destruct! + remove-item function:deactivate + } +} + +# unset irrelevant variables +deactivate -nondestructive + +$VIRTUAL_ENV = $BASE_DIR +$env:VIRTUAL_ENV = $VIRTUAL_ENV + +$global:_OLD_VIRTUAL_PATH = $env:PATH +$env:PATH = "$env:VIRTUAL_ENV/bin:" + $env:PATH +if (! $env:VIRTUAL_ENV_DISABLE_PROMPT) { + function global:_old_virtual_prompt { "" } + $function:_old_virtual_prompt = $function:prompt + function global:prompt { + # Add a prefix to the current prompt, but don't discard it. + write-host "($(split-path $env:VIRTUAL_ENV -leaf)) " -nonewline + & $function:_old_virtual_prompt + } +} + +# SIG # Begin signature block +# MIISeAYJKoZIhvcNAQcCoIISaTCCEmUCAQExCzAJBgUrDgMCGgUAMGkGCisGAQQB +# gjcCAQSgWzBZMDQGCisGAQQBgjcCAR4wJgIDAQAABBAfzDtgWUsITrck0sYpfvNR +# AgEAAgEAAgEAAgEAAgEAMCEwCQYFKw4DAhoFAAQUS5reBwSg3zOUwhXf2jPChZzf +# yPmggg6tMIIGcDCCBFigAwIBAgIBJDANBgkqhkiG9w0BAQUFADB9MQswCQYDVQQG +# EwJJTDEWMBQGA1UEChMNU3RhcnRDb20gTHRkLjErMCkGA1UECxMiU2VjdXJlIERp +# Z2l0YWwgQ2VydGlmaWNhdGUgU2lnbmluZzEpMCcGA1UEAxMgU3RhcnRDb20gQ2Vy +# dGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDcxMDI0MjIwMTQ2WhcNMTcxMDI0MjIw +# MTQ2WjCBjDELMAkGA1UEBhMCSUwxFjAUBgNVBAoTDVN0YXJ0Q29tIEx0ZC4xKzAp +# BgNVBAsTIlNlY3VyZSBEaWdpdGFsIENlcnRpZmljYXRlIFNpZ25pbmcxODA2BgNV +# BAMTL1N0YXJ0Q29tIENsYXNzIDIgUHJpbWFyeSBJbnRlcm1lZGlhdGUgT2JqZWN0 +# IENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyiOLIjUemqAbPJ1J +# 0D8MlzgWKbr4fYlbRVjvhHDtfhFN6RQxq0PjTQxRgWzwFQNKJCdU5ftKoM5N4YSj +# Id6ZNavcSa6/McVnhDAQm+8H3HWoD030NVOxbjgD/Ih3HaV3/z9159nnvyxQEckR +# ZfpJB2Kfk6aHqW3JnSvRe+XVZSufDVCe/vtxGSEwKCaNrsLc9pboUoYIC3oyzWoU +# TZ65+c0H4paR8c8eK/mC914mBo6N0dQ512/bkSdaeY9YaQpGtW/h/W/FkbQRT3sC +# pttLVlIjnkuY4r9+zvqhToPjxcfDYEf+XD8VGkAqle8Aa8hQ+M1qGdQjAye8OzbV +# uUOw7wIDAQABo4IB6TCCAeUwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC +# AQYwHQYDVR0OBBYEFNBOD0CZbLhLGW87KLjg44gHNKq3MB8GA1UdIwQYMBaAFE4L +# 7xqkQFulF2mHMMo0aEPQQa7yMD0GCCsGAQUFBwEBBDEwLzAtBggrBgEFBQcwAoYh +# aHR0cDovL3d3dy5zdGFydHNzbC5jb20vc2ZzY2EuY3J0MFsGA1UdHwRUMFIwJ6Al +# oCOGIWh0dHA6Ly93d3cuc3RhcnRzc2wuY29tL3Nmc2NhLmNybDAnoCWgI4YhaHR0 +# cDovL2NybC5zdGFydHNzbC5jb20vc2ZzY2EuY3JsMIGABgNVHSAEeTB3MHUGCysG +# AQQBgbU3AQIBMGYwLgYIKwYBBQUHAgEWImh0dHA6Ly93d3cuc3RhcnRzc2wuY29t +# L3BvbGljeS5wZGYwNAYIKwYBBQUHAgEWKGh0dHA6Ly93d3cuc3RhcnRzc2wuY29t +# L2ludGVybWVkaWF0ZS5wZGYwEQYJYIZIAYb4QgEBBAQDAgABMFAGCWCGSAGG+EIB +# DQRDFkFTdGFydENvbSBDbGFzcyAyIFByaW1hcnkgSW50ZXJtZWRpYXRlIE9iamVj +# dCBTaWduaW5nIENlcnRpZmljYXRlczANBgkqhkiG9w0BAQUFAAOCAgEAcnMLA3Va +# N4OIE9l4QT5OEtZy5PByBit3oHiqQpgVEQo7DHRsjXD5H/IyTivpMikaaeRxIv95 +# baRd4hoUcMwDj4JIjC3WA9FoNFV31SMljEZa66G8RQECdMSSufgfDYu1XQ+cUKxh +# D3EtLGGcFGjjML7EQv2Iol741rEsycXwIXcryxeiMbU2TPi7X3elbwQMc4JFlJ4B +# y9FhBzuZB1DV2sN2irGVbC3G/1+S2doPDjL1CaElwRa/T0qkq2vvPxUgryAoCppU +# FKViw5yoGYC+z1GaesWWiP1eFKAL0wI7IgSvLzU3y1Vp7vsYaxOVBqZtebFTWRHt +# XjCsFrrQBngt0d33QbQRI5mwgzEp7XJ9xu5d6RVWM4TPRUsd+DDZpBHm9mszvi9g +# VFb2ZG7qRRXCSqys4+u/NLBPbXi/m/lU00cODQTlC/euwjk9HQtRrXQ/zqsBJS6U +# J+eLGw1qOfj+HVBl/ZQpfoLk7IoWlRQvRL1s7oirEaqPZUIWY/grXq9r6jDKAp3L +# ZdKQpPOnnogtqlU4f7/kLjEJhrrc98mrOWmVMK/BuFRAfQ5oDUMnVmCzAzLMjKfG +# cVW/iMew41yfhgKbwpfzm3LBr1Zv+pEBgcgW6onRLSAn3XHM0eNtz+AkxH6rRf6B +# 2mYhLEEGLapH8R1AMAo4BbVFOZR5kXcMCwowggg1MIIHHaADAgECAgIEuDANBgkq +# hkiG9w0BAQUFADCBjDELMAkGA1UEBhMCSUwxFjAUBgNVBAoTDVN0YXJ0Q29tIEx0 +# ZC4xKzApBgNVBAsTIlNlY3VyZSBEaWdpdGFsIENlcnRpZmljYXRlIFNpZ25pbmcx +# ODA2BgNVBAMTL1N0YXJ0Q29tIENsYXNzIDIgUHJpbWFyeSBJbnRlcm1lZGlhdGUg +# T2JqZWN0IENBMB4XDTExMTIwMzE1MzQxOVoXDTEzMTIwMzE0NTgwN1owgYwxIDAe +# BgNVBA0TFzU4MTc5Ni1HaDd4Zkp4a3hRU0lPNEUwMQswCQYDVQQGEwJERTEPMA0G +# A1UECBMGQmVybGluMQ8wDQYDVQQHEwZCZXJsaW4xFjAUBgNVBAMTDUphbm5pcyBM +# ZWlkZWwxITAfBgkqhkiG9w0BCQEWEmphbm5pc0BsZWlkZWwuaW5mbzCCAiIwDQYJ +# KoZIhvcNAQEBBQADggIPADCCAgoCggIBAMcPeABYdN7nPq/AkZ/EkyUBGx/l2Yui +# Lfm8ZdLG0ulMb/kQL3fRY7sUjYPyn9S6PhqqlFnNoGHJvbbReCdUC9SIQYmOEjEA +# raHfb7MZU10NjO4U2DdGucj2zuO5tYxKizizOJF0e4yRQZVxpUGdvkW/+GLjCNK5 +# L7mIv3Z1dagxDKHYZT74HXiS4VFUwHF1k36CwfM2vsetdm46bdgSwV+BCMmZICYT +# IJAS9UQHD7kP4rik3bFWjUx08NtYYFAVOd/HwBnemUmJe4j3IhZHr0k1+eDG8hDH +# KVvPgLJIoEjC4iMFk5GWsg5z2ngk0LLu3JZMtckHsnnmBPHQK8a3opUNd8hdMNJx +# gOwKjQt2JZSGUdIEFCKVDqj0FmdnDMPfwy+FNRtpBMl1sz78dUFhSrnM0D8NXrqa +# 4rG+2FoOXlmm1rb6AFtpjAKksHRpYcPk2DPGWp/1sWB+dUQkS3gOmwFzyqeTuXpT +# 0juqd3iAxOGx1VRFQ1VHLLf3AzV4wljBau26I+tu7iXxesVucSdsdQu293jwc2kN +# xK2JyHCoZH+RyytrwS0qw8t7rMOukU9gwP8mn3X6mgWlVUODMcHTULjSiCEtvyZ/ +# aafcwjUbt4ReEcnmuZtWIha86MTCX7U7e+cnpWG4sIHPnvVTaz9rm8RyBkIxtFCB +# nQ3FnoQgyxeJAgMBAAGjggOdMIIDmTAJBgNVHRMEAjAAMA4GA1UdDwEB/wQEAwIH +# gDAuBgNVHSUBAf8EJDAiBggrBgEFBQcDAwYKKwYBBAGCNwIBFQYKKwYBBAGCNwoD +# DTAdBgNVHQ4EFgQUWyCgrIWo8Ifvvm1/YTQIeMU9nc8wHwYDVR0jBBgwFoAU0E4P +# QJlsuEsZbzsouODjiAc0qrcwggIhBgNVHSAEggIYMIICFDCCAhAGCysGAQQBgbU3 +# AQICMIIB/zAuBggrBgEFBQcCARYiaHR0cDovL3d3dy5zdGFydHNzbC5jb20vcG9s +# aWN5LnBkZjA0BggrBgEFBQcCARYoaHR0cDovL3d3dy5zdGFydHNzbC5jb20vaW50 +# ZXJtZWRpYXRlLnBkZjCB9wYIKwYBBQUHAgIwgeowJxYgU3RhcnRDb20gQ2VydGlm +# aWNhdGlvbiBBdXRob3JpdHkwAwIBARqBvlRoaXMgY2VydGlmaWNhdGUgd2FzIGlz +# c3VlZCBhY2NvcmRpbmcgdG8gdGhlIENsYXNzIDIgVmFsaWRhdGlvbiByZXF1aXJl +# bWVudHMgb2YgdGhlIFN0YXJ0Q29tIENBIHBvbGljeSwgcmVsaWFuY2Ugb25seSBm +# b3IgdGhlIGludGVuZGVkIHB1cnBvc2UgaW4gY29tcGxpYW5jZSBvZiB0aGUgcmVs +# eWluZyBwYXJ0eSBvYmxpZ2F0aW9ucy4wgZwGCCsGAQUFBwICMIGPMCcWIFN0YXJ0 +# Q29tIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MAMCAQIaZExpYWJpbGl0eSBhbmQg +# d2FycmFudGllcyBhcmUgbGltaXRlZCEgU2VlIHNlY3Rpb24gIkxlZ2FsIGFuZCBM +# aW1pdGF0aW9ucyIgb2YgdGhlIFN0YXJ0Q29tIENBIHBvbGljeS4wNgYDVR0fBC8w +# LTAroCmgJ4YlaHR0cDovL2NybC5zdGFydHNzbC5jb20vY3J0YzItY3JsLmNybDCB +# iQYIKwYBBQUHAQEEfTB7MDcGCCsGAQUFBzABhitodHRwOi8vb2NzcC5zdGFydHNz +# bC5jb20vc3ViL2NsYXNzMi9jb2RlL2NhMEAGCCsGAQUFBzAChjRodHRwOi8vYWlh +# LnN0YXJ0c3NsLmNvbS9jZXJ0cy9zdWIuY2xhc3MyLmNvZGUuY2EuY3J0MCMGA1Ud +# EgQcMBqGGGh0dHA6Ly93d3cuc3RhcnRzc2wuY29tLzANBgkqhkiG9w0BAQUFAAOC +# AQEAhrzEV6zwoEtKjnFRhCsjwiPykVpo5Eiye77Ve801rQDiRKgSCCiW6g3HqedL +# OtaSs65Sj2pm3Viea4KR0TECLcbCTgsdaHqw2x1yXwWBQWZEaV6EB05lIwfr94P1 +# SFpV43zkuc+bbmA3+CRK45LOcCNH5Tqq7VGTCAK5iM7tvHwFlbQRl+I6VEL2mjpF +# NsuRjDOVrv/9qw/a22YJ9R7Y1D0vUSs3IqZx2KMUaYDP7H2mSRxJO2nADQZBtriF +# gTyfD3lYV12MlIi5CQwe3QC6DrrfSMP33i5Wa/OFJiQ27WPxmScYVhiqozpImFT4 +# PU9goiBv9RKXdgTmZE1PN0NQ5jGCAzUwggMxAgEBMIGTMIGMMQswCQYDVQQGEwJJ +# TDEWMBQGA1UEChMNU3RhcnRDb20gTHRkLjErMCkGA1UECxMiU2VjdXJlIERpZ2l0 +# YWwgQ2VydGlmaWNhdGUgU2lnbmluZzE4MDYGA1UEAxMvU3RhcnRDb20gQ2xhc3Mg +# MiBQcmltYXJ5IEludGVybWVkaWF0ZSBPYmplY3QgQ0ECAgS4MAkGBSsOAwIaBQCg +# eDAYBgorBgEEAYI3AgEMMQowCKACgAChAoAAMBkGCSqGSIb3DQEJAzEMBgorBgEE +# AYI3AgEEMBwGCisGAQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMCMGCSqGSIb3DQEJ +# BDEWBBRVGw0FDSiaIi38dWteRUAg/9Pr6DANBgkqhkiG9w0BAQEFAASCAgCInvOZ +# FdaNFzbf6trmFDZKMojyx3UjKMCqNjHVBbuKY0qXwFC/ElYDV1ShJ2CBZbdurydO +# OQ6cIQ0KREOCwmX/xB49IlLHHUxNhEkVv7HGU3EKAFf9IBt9Yr7jikiR9cjIsfHK +# 4cjkoKJL7g28yEpLLkHt1eo37f1Ga9lDWEa5Zq3U5yX+IwXhrUBm1h8Xr033FhTR +# VEpuSz6LHtbrL/zgJnCzJ2ahjtJoYevdcWiNXffosJHFaSfYDDbiNsPRDH/1avmb +# 5j/7BhP8BcBaR6Fp8tFbNGIcWHHGcjqLMnTc4w13b7b4pDhypqElBa4+lCmwdvv9 +# GydYtRgPz8GHeoBoKj30YBlMzRIfFYaIFGIC4Ai3UEXkuH9TxYohVbGm/W0Kl4Lb +# RJ1FwiVcLcTOJdgNId2vQvKc+jtNrjcg5SP9h2v/C4aTx8tyc6tE3TOPh2f9b8DL +# S+SbVArJpuJqrPTxDDoO1QNjTgLcdVYeZDE+r/NjaGZ6cMSd8db3EaG3ijD/0bud +# SItbm/OlNVbQOFRR76D+ZNgPcU5iNZ3bmvQQIg6aSB9MHUpIE/SeCkNl9YeVk1/1 +# GFULgNMRmIYP4KLvu9ylh5Gu3hvD5VNhH6+FlXANwFy07uXks5uF8mfZVxVCnodG +# xkNCx+6PsrA5Z7WP4pXcmYnMn97npP/Q9EHJWw== +# SIG # End signature block diff --git a/placement/spider/bin/activate_this.py b/placement/spider/bin/activate_this.py new file mode 100644 index 0000000..444d3fd --- /dev/null +++ b/placement/spider/bin/activate_this.py @@ -0,0 +1,36 @@ +"""By using execfile(this_file, dict(__file__=this_file)) you will +activate this virtualenv environment. + +This can be used when you must use an existing Python interpreter, not +the virtualenv bin/python +""" + +try: + __file__ +except NameError: + raise AssertionError( + "You must run this like execfile('path/to/activate_this.py', dict(__file__='path/to/activate_this.py'))" + ) +import os +import site +import sys + +old_os_path = os.environ.get("PATH", "") +os.environ["PATH"] = os.path.dirname(os.path.abspath(__file__)) + os.pathsep + old_os_path +base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if sys.platform == "win32": + site_packages = os.path.join(base, "Lib", "site-packages") +else: + site_packages = os.path.join(base, "lib", "python%s" % sys.version[:3], "site-packages") +prev_sys_path = list(sys.path) + +site.addsitedir(site_packages) +sys.real_prefix = sys.prefix +sys.prefix = base +# Move the added items to the front of the path: +new_sys_path = [] +for item in list(sys.path): + if item not in prev_sys_path: + new_sys_path.append(item) + sys.path.remove(item) +sys.path[:0] = new_sys_path diff --git a/placement/spider/bin/easy_install b/placement/spider/bin/easy_install new file mode 100755 index 0000000..01db361 --- /dev/null +++ b/placement/spider/bin/easy_install @@ -0,0 +1,11 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 + +# -*- coding: utf-8 -*- +import re +import sys + +from setuptools.command.easy_install import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/easy_install-3.6 b/placement/spider/bin/easy_install-3.6 new file mode 100755 index 0000000..01db361 --- /dev/null +++ b/placement/spider/bin/easy_install-3.6 @@ -0,0 +1,11 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 + +# -*- coding: utf-8 -*- +import re +import sys + +from setuptools.command.easy_install import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/jsonpointer b/placement/spider/bin/jsonpointer new file mode 100755 index 0000000..d01c60c --- /dev/null +++ b/placement/spider/bin/jsonpointer @@ -0,0 +1,69 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import sys +import os.path +import json +import jsonpointer +import argparse + + +parser = argparse.ArgumentParser( + description='Resolve a JSON pointer on JSON files') + +# Accept pointer as argument or as file +ptr_group = parser.add_mutually_exclusive_group(required=True) + +ptr_group.add_argument('-f', '--pointer-file', type=argparse.FileType('r'), + nargs='?', + help='File containing a JSON pointer expression') + +ptr_group.add_argument('POINTER', type=str, nargs='?', + help='A JSON pointer expression') + +parser.add_argument('FILE', type=argparse.FileType('r'), nargs='+', + help='Files for which the pointer should be resolved') +parser.add_argument('--indent', type=int, default=None, + help='Indent output by n spaces') +parser.add_argument('-v', '--version', action='version', + version='%(prog)s ' + jsonpointer.__version__) + + +def main(): + try: + resolve_files() + except KeyboardInterrupt: + sys.exit(1) + + +def parse_pointer(args): + if args.POINTER: + ptr = args.POINTER + elif args.pointer_file: + ptr = args.pointer_file.read().strip() + else: + parser.print_usage() + sys.exit(1) + + return ptr + + +def resolve_files(): + """ Resolve a JSON pointer on JSON files """ + args = parser.parse_args() + + ptr = parse_pointer(args) + + for f in args.FILE: + doc = json.load(f) + try: + result = jsonpointer.resolve_pointer(doc, ptr) + print(json.dumps(result, indent=args.indent)) + except jsonpointer.JsonPointerException as e: + print('Could not resolve pointer: %s' % str(e), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/placement/spider/bin/jsonschema b/placement/spider/bin/jsonschema new file mode 100755 index 0000000..cc67809 --- /dev/null +++ b/placement/spider/bin/jsonschema @@ -0,0 +1,10 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from jsonschema.cli import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/pip b/placement/spider/bin/pip new file mode 100755 index 0000000..314741b --- /dev/null +++ b/placement/spider/bin/pip @@ -0,0 +1,11 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 + +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/pip3 b/placement/spider/bin/pip3 new file mode 100755 index 0000000..314741b --- /dev/null +++ b/placement/spider/bin/pip3 @@ -0,0 +1,11 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 + +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/pip3.6 b/placement/spider/bin/pip3.6 new file mode 100755 index 0000000..314741b --- /dev/null +++ b/placement/spider/bin/pip3.6 @@ -0,0 +1,11 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 + +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/python b/placement/spider/bin/python new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/placement/spider/bin/python @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/placement/spider/bin/python-config b/placement/spider/bin/python-config new file mode 100755 index 0000000..30262f8 --- /dev/null +++ b/placement/spider/bin/python-config @@ -0,0 +1,78 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python + +import sys +import getopt +import sysconfig + +valid_opts = ['prefix', 'exec-prefix', 'includes', 'libs', 'cflags', + 'ldflags', 'help'] + +if sys.version_info >= (3, 2): + valid_opts.insert(-1, 'extension-suffix') + valid_opts.append('abiflags') +if sys.version_info >= (3, 3): + valid_opts.append('configdir') + + +def exit_with_usage(code=1): + sys.stderr.write("Usage: {0} [{1}]\n".format( + sys.argv[0], '|'.join('--'+opt for opt in valid_opts))) + sys.exit(code) + +try: + opts, args = getopt.getopt(sys.argv[1:], '', valid_opts) +except getopt.error: + exit_with_usage() + +if not opts: + exit_with_usage() + +pyver = sysconfig.get_config_var('VERSION') +getvar = sysconfig.get_config_var + +opt_flags = [flag for (flag, val) in opts] + +if '--help' in opt_flags: + exit_with_usage(code=0) + +for opt in opt_flags: + if opt == '--prefix': + print(sysconfig.get_config_var('prefix')) + + elif opt == '--exec-prefix': + print(sysconfig.get_config_var('exec_prefix')) + + elif opt in ('--includes', '--cflags'): + flags = ['-I' + sysconfig.get_path('include'), + '-I' + sysconfig.get_path('platinclude')] + if opt == '--cflags': + flags.extend(getvar('CFLAGS').split()) + print(' '.join(flags)) + + elif opt in ('--libs', '--ldflags'): + abiflags = getattr(sys, 'abiflags', '') + libs = ['-lpython' + pyver + abiflags] + libs += getvar('LIBS').split() + libs += getvar('SYSLIBS').split() + # add the prefix/lib/pythonX.Y/config dir, but only if there is no + # shared library in prefix/lib/. + if opt == '--ldflags': + if not getvar('Py_ENABLE_SHARED'): + libs.insert(0, '-L' + getvar('LIBPL')) + if not getvar('PYTHONFRAMEWORK'): + libs.extend(getvar('LINKFORSHARED').split()) + print(' '.join(libs)) + + elif opt == '--extension-suffix': + ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') + if ext_suffix is None: + ext_suffix = sysconfig.get_config_var('SO') + print(ext_suffix) + + elif opt == '--abiflags': + if not getattr(sys, 'abiflags', None): + exit_with_usage() + print(sys.abiflags) + + elif opt == '--configdir': + print(sysconfig.get_config_var('LIBPL')) diff --git a/placement/spider/bin/python3 b/placement/spider/bin/python3 new file mode 100755 index 0000000..72aec15 Binary files /dev/null and b/placement/spider/bin/python3 differ diff --git a/placement/spider/bin/python3.6 b/placement/spider/bin/python3.6 new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/placement/spider/bin/python3.6 @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/placement/spider/bin/slugify b/placement/spider/bin/slugify new file mode 100755 index 0000000..6bcbf77 --- /dev/null +++ b/placement/spider/bin/slugify @@ -0,0 +1,10 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from slugify.slugify import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/bin/wheel b/placement/spider/bin/wheel new file mode 100755 index 0000000..be88322 --- /dev/null +++ b/placement/spider/bin/wheel @@ -0,0 +1,11 @@ +#!/home/vipulgupta2048/placement/placement/spider/bin/python3 + +# -*- coding: utf-8 -*- +import re +import sys + +from wheel.cli import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/placement/spider/include/python3.6m b/placement/spider/include/python3.6m new file mode 120000 index 0000000..ba3a1bf --- /dev/null +++ b/placement/spider/include/python3.6m @@ -0,0 +1 @@ +/usr/include/python3.6m \ No newline at end of file diff --git a/placement/spidermon-1.9.0.linux-x86_64.tar.gz b/placement/spidermon-1.9.0.linux-x86_64.tar.gz new file mode 100644 index 0000000..15988fd Binary files /dev/null and b/placement/spidermon-1.9.0.linux-x86_64.tar.gz differ diff --git a/placement/spiders/__init__.py b/placement/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/placement/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/placement/spiders/new.py b/placement/spiders/new.py new file mode 100644 index 0000000..584571a --- /dev/null +++ b/placement/spiders/new.py @@ -0,0 +1,36 @@ +import re +# import os +# import json +import scrapy +# from cerberus import Validator +from placement.items import Product + +class PlacementSpider(scrapy.Spider): + name = "new" + start_urls = ['https://amity.edu/placement/upcoming-recruitment.asp'] + + def parse(self, response): + lists = response.xpath("//div[@class='content-wrapper']//li") + + x = Product() + for ilist in lists: + x['link'] = "https://amity.edu/placement/" + ilist.xpath("./a/@href").extract_first() + x['name'] = ilist.xpath(".//strong/text()").extract_first().strip() + year = re.findall(r'(20\w{2})', ilist.xpath(".//strong/text()").extract_first()) + x['year'] = int(year[0]) + + yield x + + # Validation with Cerberus + # with open(os.path.abspath("schema.json")) as f: + # schema = json.loads(f.read()) + + # validator = Validator() + # validator.validate(x, schema) + + # if validator.errors: + # exit(f"Validation failed: {validator.errors}") + # print() + # else: + # print("No validation errors") + # print() diff --git a/placement/validators.py b/placement/validators.py new file mode 100644 index 0000000..0518539 --- /dev/null +++ b/placement/validators.py @@ -0,0 +1,9 @@ +from schematics.models import Model +from schematics.types import URLType, StringType, NumberType +# from schematics.types import URLType, StringType, IntType + +class Product(Model): + year = NumberType(min_value=2018, max_value=2021, strict=False) + # year = IntType() + name = StringType(required=True) + link = URLType(required=True)