diff --git a/README.md b/README.md index eba61ad..62c4f32 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,9 @@ cd # Initialise git repo git init +# Activate shell for this repo +pipenv shell + # Install dependencies pipenv install --dev diff --git a/bls.ipynb b/bls.ipynb new file mode 100644 index 0000000..4e0f221 --- /dev/null +++ b/bls.ipynb @@ -0,0 +1,444 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Web Scraper for BLS.gov\n", + "\n", + "https://www.bls.gov/ maintains webpages of outlook of jobs in the US. Each page has a lot of good information, but it is not possible to see the whole information at one place.\n", + "\n", + "The code below is a web-scraper that getches the webpages, extracts the information of interests, and collects it in a dictionary." + ], + "metadata": { + "id": "53TBMpJFk51U" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "21m1Y6zsjyhd", + "outputId": "b1b9bae1-45d6-4267-dbce-c497bfed99a7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"role\": \"Aerospace Engineering and Operations Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/aerospace-engineering-and-operations-technicians.htm\",\n", + " \"pay2021\": 73580,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 11300,\n", + " \"growth10\": 6\n", + "}\n", + "{\n", + " \"role\": \"Aerospace Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/aerospace-engineers.htm\",\n", + " \"pay2021\": 122270,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 58800,\n", + " \"growth10\": 6\n", + "}\n", + "{\n", + " \"role\": \"Agricultural Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/agricultural-engineers.htm\",\n", + " \"pay2021\": 82640,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 1200,\n", + " \"growth10\": 1\n", + "}\n", + "{\n", + " \"role\": \"Architects\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/architects.htm\",\n", + " \"pay2021\": 80180,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Internship/residency\",\n", + " \"jobs2021\": 125500,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Bioengineers and Biomedical Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/biomedical-engineers.htm\",\n", + " \"pay2021\": 97410,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 17900,\n", + " \"growth10\": 10\n", + "}\n", + "{\n", + " \"role\": \"Cartographers and Photogrammetrists\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/cartographers-and-photogrammetrists.htm\",\n", + " \"pay2021\": 68900,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 13400,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Chemical Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/chemical-engineers.htm\",\n", + " \"pay2021\": 105550,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 26900,\n", + " \"growth10\": 14\n", + "}\n", + "{\n", + " \"role\": \"Civil Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/civil-engineering-technicians.htm\",\n", + " \"pay2021\": 58320,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 66300,\n", + " \"growth10\": 0\n", + "}\n", + "{\n", + " \"role\": \"Civil Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/civil-engineers.htm\",\n", + " \"pay2021\": 88050,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 318300,\n", + " \"growth10\": 7\n", + "}\n", + "{\n", + " \"role\": \"Computer Hardware Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/computer-hardware-engineers.htm\",\n", + " \"pay2021\": 128170,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 76900,\n", + " \"growth10\": 5\n", + "}\n", + "{\n", + " \"role\": \"Drafters\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/drafters.htm\",\n", + " \"pay2021\": 60290,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 192200,\n", + " \"growth10\": -3\n", + "}\n", + "{\n", + " \"role\": \"Electrical and Electronic Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/electrical-and-electronics-engineering-technicians.htm\",\n", + " \"pay2021\": 63640,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 105000,\n", + " \"growth10\": 0\n", + "}\n", + "{\n", + " \"role\": \"Electrical and Electronics Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/electrical-and-electronics-engineers.htm\",\n", + " \"pay2021\": 101780,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 303800,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Electro-mechanical and Mechatronics Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/electro-mechanical-technicians.htm\",\n", + " \"pay2021\": 60360,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 12100,\n", + " \"growth10\": -4\n", + "}\n", + "{\n", + " \"role\": \"Environmental Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/environmental-engineering-technicians.htm\",\n", + " \"pay2021\": 48390,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 15500,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Environmental Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/environmental-engineers.htm\",\n", + " \"pay2021\": 96820,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 44000,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Health and Safety Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/health-and-safety-engineers.htm\",\n", + " \"pay2021\": 99040,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 23600,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Industrial Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/industrial-engineering-technicians.htm\",\n", + " \"pay2021\": 60220,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 64200,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Industrial Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/industrial-engineers.htm\",\n", + " \"pay2021\": 95300,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 301000,\n", + " \"growth10\": 10\n", + "}\n", + "{\n", + " \"role\": \"Landscape Architects\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/landscape-architects.htm\",\n", + " \"pay2021\": 67950,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Internship/residency\",\n", + " \"jobs2021\": 19800,\n", + " \"growth10\": 0\n", + "}\n", + "{\n", + " \"role\": \"Marine Engineers and Naval Architects\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/marine-engineers-and-naval-architects.htm\",\n", + " \"pay2021\": 93370,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 7600,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Materials Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/materials-engineers.htm\",\n", + " \"pay2021\": 98300,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 22100,\n", + " \"growth10\": 6\n", + "}\n", + "{\n", + " \"role\": \"Mechanical Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/mechanical-engineering-technicians.htm\",\n", + " \"pay2021\": 60460,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 41700,\n", + " \"growth10\": 2\n", + "}\n", + "{\n", + " \"role\": \"Mechanical Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/mechanical-engineers.htm\",\n", + " \"pay2021\": 95300,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 284900,\n", + " \"growth10\": 2\n", + "}\n", + "{\n", + " \"role\": \"Mining and Geological Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/mining-and-geological-engineers.htm\",\n", + " \"pay2021\": 97090,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 7500,\n", + " \"growth10\": 2\n", + "}\n", + "{\n", + " \"role\": \"Nuclear Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/nuclear-engineers.htm\",\n", + " \"pay2021\": 120380,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 13900,\n", + " \"growth10\": -11\n", + "}\n", + "{\n", + " \"role\": \"Petroleum Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/petroleum-engineers.htm\",\n", + " \"pay2021\": 130850,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 22800,\n", + " \"growth10\": 8\n", + "}\n", + "{\n", + " \"role\": \"Surveying and Mapping Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/surveying-and-mapping-technicians.htm\",\n", + " \"pay2021\": 46910,\n", + " \"education\": \"High school diploma or equivalent\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Moderate-term on-the-job training\",\n", + " \"jobs2021\": 59800,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Surveyors\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/surveyors.htm\",\n", + " \"pay2021\": 61600,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Internship/residency\",\n", + " \"jobs2021\": 50000,\n", + " \"growth10\": 1\n", + "}\n" + ] + } + ], + "source": [ + "from bs4 import BeautifulSoup\n", + "import requests\n", + "import json\n", + "\n", + "\n", + "def get_quick_facts(url1: str) -> dict:\n", + " page1 = requests.get(url1)\n", + " soup1 = BeautifulSoup(page1.content, \"html.parser\")\n", + " title1 = soup1.title.string\n", + " role1 = title1.split(\":\")[0].strip()\n", + "\n", + " facts1 = {\"role\": role1, \"Reference\": url1}\n", + " quick_facts = soup1.find('table')\n", + " for row in quick_facts.find_all('tr')[1:]:\n", + " key1 = row.find('th').text.strip()\n", + " val1 = row.find('td').text.strip()\n", + " facts1[key1] = val1\n", + "\n", + " return facts1\n", + "\n", + "def parse_pay(pay_str: str) -> int:\n", + " # \"$101,780 per year\\r\\n $48.93 per hour\"\n", + " val1 = pay_str.split(' ')[0]\n", + " val1 = val1.replace('$', '').replace(',','')\n", + " return int(val1)\n", + "\n", + "def parse_nop(x):\n", + " return x\n", + "\n", + "def parse_jobs(job_str: str) -> int:\n", + " # '303,800'\n", + " val1 = job_str.replace(',','')\n", + " return int(val1)\n", + "\n", + "def parse_outlook(outlook_str: str) -> int:\n", + " # '3% (Slower than average)'\n", + " val1 = outlook_str.split('%')[0]\n", + " return int(val1)\n", + "\n", + "def parse_facts(facts1: dict) -> dict:\n", + " parsers = {\n", + " \"role\": {\"label\": \"role\", \"parser\": parse_nop},\n", + " \"Reference\": {\"label\": \"ref\", \"parser\": parse_nop},\n", + " \"2021 Median Pay\": {\"label\": \"pay2021\", \"parser\": parse_pay},\n", + " \"Typical Entry-Level Education\": {\"label\": \"education\", \"parser\": parse_nop},\n", + " \"Work Experience in a Related Occupation\": {\"label\": \"experience\", \"parser\": parse_nop},\n", + " \"On-the-job Training\": {\"label\": \"training\", \"parser\": parse_nop},\n", + " \"Number of Jobs, 2021\": {\"label\": \"jobs2021\", \"parser\": parse_jobs},\n", + " \"Job Outlook, 2021-31\": {\"label\": \"growth10\", \"parser\": parse_outlook},\n", + " }\n", + "\n", + " res = {}\n", + " for k, v in parsers.items():\n", + " res[v[\"label\"]] = v[\"parser\"](facts1[k])\n", + " return res\n", + "\n", + "def get_role_url(group_url: str) -> list:\n", + " page1 = requests.get(group_url)\n", + " soup1 = BeautifulSoup(page1.content, \"html.parser\")\n", + "\n", + " roles = []\n", + " all_roles = soup1.find('table')\n", + " base_url = \"https://www.bls.gov\"\n", + " for row in all_roles.find_all('tr')[1:]:\n", + " role1 = row.find('td')\n", + " rel_url = role1.find('a')['href']\n", + " roles.append(f\"{base_url}/{rel_url}\")\n", + "\n", + " return roles\n", + "\n", + "def get_group_facts(group_url1: str) -> list:\n", + " all_roles = get_role_url(group_url1)\n", + " all_facts = []\n", + " for role1 in all_roles:\n", + " all_facts.append(parse_facts(get_quick_facts(role1)))\n", + " return all_facts\n", + "\n", + "#url1 = \"https://www.bls.gov/ooh/architecture-and-engineering/electrical-and-electronics-engineers.htm\"\n", + "#fact1 = get_quick_facts(url1)\n", + "#parse_facts(fact1)\n", + "group_url1 = \"https://www.bls.gov/ooh/architecture-and-engineering/home.htm\"\n", + "group_facts = get_group_facts(group_url1)\n", + "for f in group_facts:\n", + " print(json.dumps(f, indent=4))" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "_T37PACjk4M4" + } + } + ] +} \ No newline at end of file diff --git a/bls_ooh_1.ipynb b/bls_ooh_1.ipynb new file mode 100644 index 0000000..444c862 --- /dev/null +++ b/bls_ooh_1.ipynb @@ -0,0 +1,444 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Web Scraper for BLS.gov\n", + "\n", + "https://www.bls.gov/ maintains webpages of outlook of jobs in the US. Each page has a lot of good information, but it is not possible to see the whole information at one place.\n", + "\n", + "The code below is a web-scraper that getches the webpages, extracts the information of interests, and collects it in a dictionary." + ], + "metadata": { + "id": "53TBMpJFk51U" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "21m1Y6zsjyhd", + "outputId": "6388d89b-1a84-45f8-f6c7-aa51bd784ce8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"role\": \"Aerospace Engineering and Operations Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/aerospace-engineering-and-operations-technicians.htm\",\n", + " \"pay2021\": 73580,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 11300,\n", + " \"growth10\": 6\n", + "}\n", + "{\n", + " \"role\": \"Aerospace Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/aerospace-engineers.htm\",\n", + " \"pay2021\": 122270,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 58800,\n", + " \"growth10\": 6\n", + "}\n", + "{\n", + " \"role\": \"Agricultural Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/agricultural-engineers.htm\",\n", + " \"pay2021\": 82640,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 1200,\n", + " \"growth10\": 1\n", + "}\n", + "{\n", + " \"role\": \"Architects\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/architects.htm\",\n", + " \"pay2021\": 80180,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Internship/residency\",\n", + " \"jobs2021\": 125500,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Bioengineers and Biomedical Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/biomedical-engineers.htm\",\n", + " \"pay2021\": 97410,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 17900,\n", + " \"growth10\": 10\n", + "}\n", + "{\n", + " \"role\": \"Cartographers and Photogrammetrists\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/cartographers-and-photogrammetrists.htm\",\n", + " \"pay2021\": 68900,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 13400,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Chemical Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/chemical-engineers.htm\",\n", + " \"pay2021\": 105550,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 26900,\n", + " \"growth10\": 14\n", + "}\n", + "{\n", + " \"role\": \"Civil Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/civil-engineering-technicians.htm\",\n", + " \"pay2021\": 58320,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 66300,\n", + " \"growth10\": 0\n", + "}\n", + "{\n", + " \"role\": \"Civil Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/civil-engineers.htm\",\n", + " \"pay2021\": 88050,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 318300,\n", + " \"growth10\": 7\n", + "}\n", + "{\n", + " \"role\": \"Computer Hardware Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/computer-hardware-engineers.htm\",\n", + " \"pay2021\": 128170,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 76900,\n", + " \"growth10\": 5\n", + "}\n", + "{\n", + " \"role\": \"Drafters\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/drafters.htm\",\n", + " \"pay2021\": 60290,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 192200,\n", + " \"growth10\": -3\n", + "}\n", + "{\n", + " \"role\": \"Electrical and Electronic Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/electrical-and-electronics-engineering-technicians.htm\",\n", + " \"pay2021\": 63640,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 105000,\n", + " \"growth10\": 0\n", + "}\n", + "{\n", + " \"role\": \"Electrical and Electronics Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/electrical-and-electronics-engineers.htm\",\n", + " \"pay2021\": 101780,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 303800,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Electro-mechanical and Mechatronics Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/electro-mechanical-technicians.htm\",\n", + " \"pay2021\": 60360,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 12100,\n", + " \"growth10\": -4\n", + "}\n", + "{\n", + " \"role\": \"Environmental Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/environmental-engineering-technicians.htm\",\n", + " \"pay2021\": 48390,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 15500,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Environmental Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/environmental-engineers.htm\",\n", + " \"pay2021\": 96820,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 44000,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Health and Safety Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/health-and-safety-engineers.htm\",\n", + " \"pay2021\": 99040,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 23600,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Industrial Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/industrial-engineering-technicians.htm\",\n", + " \"pay2021\": 60220,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 64200,\n", + " \"growth10\": 3\n", + "}\n", + "{\n", + " \"role\": \"Industrial Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/industrial-engineers.htm\",\n", + " \"pay2021\": 95300,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 301000,\n", + " \"growth10\": 10\n", + "}\n", + "{\n", + " \"role\": \"Landscape Architects\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/landscape-architects.htm\",\n", + " \"pay2021\": 67950,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Internship/residency\",\n", + " \"jobs2021\": 19800,\n", + " \"growth10\": 0\n", + "}\n", + "{\n", + " \"role\": \"Marine Engineers and Naval Architects\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/marine-engineers-and-naval-architects.htm\",\n", + " \"pay2021\": 93370,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 7600,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Materials Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/materials-engineers.htm\",\n", + " \"pay2021\": 98300,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 22100,\n", + " \"growth10\": 6\n", + "}\n", + "{\n", + " \"role\": \"Mechanical Engineering Technologists and Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/mechanical-engineering-technicians.htm\",\n", + " \"pay2021\": 60460,\n", + " \"education\": \"Associate's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 41700,\n", + " \"growth10\": 2\n", + "}\n", + "{\n", + " \"role\": \"Mechanical Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/mechanical-engineers.htm\",\n", + " \"pay2021\": 95300,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 284900,\n", + " \"growth10\": 2\n", + "}\n", + "{\n", + " \"role\": \"Mining and Geological Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/mining-and-geological-engineers.htm\",\n", + " \"pay2021\": 97090,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 7500,\n", + " \"growth10\": 2\n", + "}\n", + "{\n", + " \"role\": \"Nuclear Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/nuclear-engineers.htm\",\n", + " \"pay2021\": 120380,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 13900,\n", + " \"growth10\": -11\n", + "}\n", + "{\n", + " \"role\": \"Petroleum Engineers\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/petroleum-engineers.htm\",\n", + " \"pay2021\": 130850,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"None\",\n", + " \"jobs2021\": 22800,\n", + " \"growth10\": 8\n", + "}\n", + "{\n", + " \"role\": \"Surveying and Mapping Technicians\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/surveying-and-mapping-technicians.htm\",\n", + " \"pay2021\": 46910,\n", + " \"education\": \"High school diploma or equivalent\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Moderate-term on-the-job training\",\n", + " \"jobs2021\": 59800,\n", + " \"growth10\": 4\n", + "}\n", + "{\n", + " \"role\": \"Surveyors\",\n", + " \"ref\": \"https://www.bls.gov//ooh/architecture-and-engineering/surveyors.htm\",\n", + " \"pay2021\": 61600,\n", + " \"education\": \"Bachelor's degree\",\n", + " \"experience\": \"None\",\n", + " \"training\": \"Internship/residency\",\n", + " \"jobs2021\": 50000,\n", + " \"growth10\": 1\n", + "}\n" + ] + } + ], + "source": [ + "from bs4 import BeautifulSoup\n", + "import requests\n", + "import json\n", + "\n", + "\n", + "def get_quick_facts(url1: str) -> dict:\n", + " page1 = requests.get(url1)\n", + " soup1 = BeautifulSoup(page1.content, \"html.parser\")\n", + " title1 = soup1.title.string\n", + " role1 = title1.split(\":\")[0].strip()\n", + "\n", + " facts1 = {\"role\": role1, \"Reference\": url1}\n", + " quick_facts = soup1.find('table')\n", + " for row in quick_facts.find_all('tr')[1:]:\n", + " key1 = row.find('th').text.strip()\n", + " val1 = row.find('td').text.strip()\n", + " facts1[key1] = val1\n", + "\n", + " return facts1\n", + "\n", + "def parse_pay(pay_str: str) -> int:\n", + " # \"$101,780 per year\\r\\n $48.93 per hour\"\n", + " val1 = pay_str.split(' ')[0]\n", + " val1 = val1.replace('$', '').replace(',','')\n", + " return int(val1)\n", + "\n", + "def parse_nop(x):\n", + " return x\n", + "\n", + "def parse_jobs(job_str: str) -> int:\n", + " # '303,800'\n", + " val1 = job_str.replace(',','')\n", + " return int(val1)\n", + "\n", + "def parse_outlook(outlook_str: str) -> int:\n", + " # '3% (Slower than average)'\n", + " val1 = outlook_str.split('%')[0]\n", + " return int(val1)\n", + "\n", + "def parse_facts(facts1: dict) -> dict:\n", + " parsers = {\n", + " \"role\": {\"label\": \"role\", \"parser\": parse_nop},\n", + " \"Reference\": {\"label\": \"ref\", \"parser\": parse_nop},\n", + " \"2021 Median Pay\": {\"label\": \"pay2021\", \"parser\": parse_pay},\n", + " \"Typical Entry-Level Education\": {\"label\": \"education\", \"parser\": parse_nop},\n", + " \"Work Experience in a Related Occupation\": {\"label\": \"experience\", \"parser\": parse_nop},\n", + " \"On-the-job Training\": {\"label\": \"training\", \"parser\": parse_nop},\n", + " \"Number of Jobs, 2021\": {\"label\": \"jobs2021\", \"parser\": parse_jobs},\n", + " \"Job Outlook, 2021-31\": {\"label\": \"growth10\", \"parser\": parse_outlook},\n", + " }\n", + "\n", + " res = {}\n", + " for k, v in parsers.items():\n", + " res[v[\"label\"]] = v[\"parser\"](facts1[k])\n", + " return res\n", + "\n", + "def get_role_url(group_url: str) -> list:\n", + " page1 = requests.get(group_url)\n", + " soup1 = BeautifulSoup(page1.content, \"html.parser\")\n", + "\n", + " roles = []\n", + " all_roles = soup1.find('table')\n", + " base_url = \"https://www.bls.gov\"\n", + " for row in all_roles.find_all('tr')[1:]:\n", + " role1 = row.find('td')\n", + " rel_url = role1.find('a')['href']\n", + " roles.append(f\"{base_url}/{rel_url}\")\n", + "\n", + " return roles\n", + "\n", + "def get_group_facts(group_url1: str) -> list:\n", + " all_roles = get_role_url(group_url1)\n", + " all_facts = []\n", + " for role1 in all_roles:\n", + " all_facts.append(parse_facts(get_quick_facts(role1)))\n", + " return all_facts\n", + "\n", + "#url1 = \"https://www.bls.gov/ooh/architecture-and-engineering/electrical-and-electronics-engineers.htm\"\n", + "#fact1 = get_quick_facts(url1)\n", + "#parse_facts(fact1)\n", + "group_url1 = \"https://www.bls.gov/ooh/architecture-and-engineering/home.htm\"\n", + "group_facts = get_group_facts(group_url1)\n", + "for f in group_facts:\n", + " print(json.dumps(f, indent=4))" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "_T37PACjk4M4" + } + } + ] +} \ No newline at end of file