firecrawl/examples/o1_job_recommender/o1_job_recommender.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%\n",
    "import os\n",
    "import datetime\n",
    "import time\n",
    "import requests\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from firecrawl import FirecrawlApp\n",
    "from pydantic import BaseModel, Field\n",
    "from typing import List\n",
    "\n",
    "# Load environment variables\n",
    "load_dotenv()\n",
    "\n",
    "# Retrieve API keys from environment variables\n",
    "firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
    "\n",
    "# Initialize the FirecrawlApp with your API key\n",
    "app = FirecrawlApp(api_key=firecrawl_api_key)\n",
    "\n",
    "# Set the jobs page URL\n",
    "jobs_page_url = \"https://openai.com/careers\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total pages mapped (excluding original URL): 14\n",
      "['https://openai.com/careers/research-scientist', 'https://openai.com/careers/analytics-engineer', 'https://openai.com/careers/solutions-architect', 'https://openai.com/careers/iam-engineer', 'https://openai.com/careers/talent-partnerships', 'https://openai.com/careers/product-designer', 'https://openai.com/careers/recruiting-coordinator', 'https://openai.com/careers/av-specialist', 'https://openai.com/careers/it-support', 'https://openai.com/careers/director-edu', 'https://openai.com/careers/research-engineer', 'https://openai.com/careers/solutions-engineer', 'https://openai.com/careers/software-engineer-networking', 'https://openai.com/careers/revenue-operations-leader']\n"
     ]
    }
   ],
   "source": [
    "# %%\n",
    "# Use the Firecrawl Map API to get the sitemap\n",
    "api_url = \"https://api.firecrawl.dev/v1/map\"\n",
    "payload = {\n",
    "    \"url\": jobs_page_url,\n",
    "    \"search\": \"\",  # Empty search term to get all pages\n",
    "    \"limit\": 15\n",
    "}\n",
    "headers = {\n",
    "    \"Authorization\": f\"Bearer {firecrawl_api_key}\",\n",
    "    \"Content-Type\": \"application/json\"\n",
    "}\n",
    "response = requests.post(api_url, json=payload, headers=headers)\n",
    "\n",
    "if response.status_code == 200:\n",
    "    map_result = response.json()\n",
    "    if map_result.get('success'):\n",
    "        links = [link for link in map_result.get('links', []) if link != jobs_page_url]\n",
    "        print(f\"Total pages mapped (excluding original URL): {len(links)}\")\n",
    "        print(links)\n",
    "    else:\n",
    "        print(\"Map API request was not successful\")\n",
    "        exit(1)\n",
    "else:\n",
    "    print(f\"Error: {response.status_code}\")\n",
    "    print(response.text)\n",
    "    exit(1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error 500 for page 0: {\"success\":false,\"error\":\"(Internal server error) - JSON parsing error(s): must be object\\n\\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.  - Could be due to LLM parsing issues\"}\n",
      "Data extracted for page 1\n",
      "Data extracted for page 2\n",
      "Data extracted for page 3\n",
      "Data extracted for page 4\n",
      "Data extracted for page 5\n",
      "Data extracted for page 6\n",
      "Data extracted for page 7\n",
      "Data extracted for page 8\n",
      "Data extracted for page 9\n",
      "Data extracted for page 10\n",
      "Data extracted for page 11\n",
      "Data extracted for page 12\n",
      "Data extracted for page 13\n"
     ]
    }
   ],
   "source": [
    "# %%\n",
    "# Define the extraction schema\n",
    "extract_schema = {\n",
    "    \"type\": \"object\",\n",
    "    \"properties\": {\n",
    "        \"job_title\": {\n",
    "            \"type\": \"string\"\n",
    "        },\n",
    "        \"sub_division_of_organization\": {\n",
    "            \"type\": \"string\"\n",
    "        },\n",
    "        \"key_skills\": {\n",
    "            \"type\": \"array\",\n",
    "            \"items\": {\n",
    "                \"type\": \"string\"\n",
    "            }\n",
    "        },\n",
    "        \"compensation\": {\n",
    "            \"type\": \"string\"\n",
    "        },\n",
    "        \"apply_link\": {\n",
    "            \"type\": \"string\"\n",
    "        }\n",
    "    },\n",
    "    \"required\": [\"job_title\", \"sub_division_of_organization\", \"key_skills\", \"compensation\", \"apply_link\"]\n",
    "}\n",
    "\n",
    "# Initialize a list to store the extracted data\n",
    "extracted_data = []\n",
    "\n",
    "# Process each link in the map result\n",
    "for index, link in enumerate(links):\n",
    "    try:\n",
    "        response = requests.post(\n",
    "            \"https://api.firecrawl.dev/v1/scrape\",\n",
    "            headers={\n",
    "                \"Content-Type\": \"application/json\",\n",
    "                \"Authorization\": f\"Bearer {firecrawl_api_key}\"\n",
    "            },\n",
    "            json={\n",
    "                \"url\": link,\n",
    "                \"formats\": [\"extract\"],\n",
    "                \"extract\": {\n",
    "                    \"schema\": extract_schema\n",
    "                }\n",
    "            }\n",
    "        )\n",
    "        \n",
    "        if response.status_code == 200:\n",
    "            result = response.json()\n",
    "            if result.get('success'):\n",
    "                extracted_data.append(result['data']['extract'])\n",
    "                print(f\"Data extracted for page {index}\")\n",
    "            else:\n",
    "                print(f\"No data extracted for page {index}\")\n",
    "        else:\n",
    "            print(f\"Error {response.status_code} for page {index}: {response.text}\")\n",
    "    except Exception as e:\n",
    "        print(f\"An error occurred for page {index}: {str(e)}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracted data:\n",
      "{\n",
      "  \"job_title\": \"Analytics Engineer\",\n",
      "  \"sub_division_of_organization\": \"Growth\",\n",
      "  \"key_skills\": [\n",
      "    \"SQL\",\n",
      "    \"Python\",\n",
      "    \"business intelligence tools\",\n",
      "    \"ETL workflows\",\n",
      "    \"data analysis\",\n",
      "    \"dashboards\",\n",
      "    \"data storytelling\"\n",
      "  ],\n",
      "  \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/340ef89c-a746-439a-888a-19580eb8c881/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"Solutions Architect\",\n",
      "  \"sub_division_of_organization\": \"Technical Success\",\n",
      "  \"key_skills\": [\n",
      "    \"technical consulting\",\n",
      "    \"Generative AI\",\n",
      "    \"ML solutions\",\n",
      "    \"network architecture\",\n",
      "    \"cloud architecture\",\n",
      "    \"Python\",\n",
      "    \"Javascript\"\n",
      "  ],\n",
      "  \"compensation\": \"\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/51721dfd-7bf5-4112-bb28-da5e4fd86e36/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"IAM Engineer\",\n",
      "  \"sub_division_of_organization\": \"IT\",\n",
      "  \"key_skills\": [\n",
      "    \"AzureAD\",\n",
      "    \"Python\",\n",
      "    \"PowerShell\",\n",
      "    \"identity governance\",\n",
      "    \"automation\",\n",
      "    \"Terraform\"\n",
      "  ],\n",
      "  \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/e798aa62-74f9-4f53-a890-716310926b70/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"Talent Partnerships\",\n",
      "  \"sub_division_of_organization\": \"Communications\",\n",
      "  \"key_skills\": [\n",
      "    \"relationship management\",\n",
      "    \"communication\",\n",
      "    \"adaptability\",\n",
      "    \"creativity\",\n",
      "    \"collaboration\",\n",
      "    \"transparency\"\n",
      "  ],\n",
      "  \"compensation\": \"$171K \\u2013 $240K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/84a4a8bb-7d5a-4989-9b5c-bd841db2698e/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"404 Error Page\",\n",
      "  \"sub_division_of_organization\": \"Web Development\",\n",
      "  \"key_skills\": [\n",
      "    \"Error Handling\",\n",
      "    \"Web Design\",\n",
      "    \"User Experience\"\n",
      "  ],\n",
      "  \"compensation\": \"N/A\",\n",
      "  \"apply_link\": \"N/A\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"\",\n",
      "  \"sub_division_of_organization\": \"\",\n",
      "  \"key_skills\": [],\n",
      "  \"compensation\": \"\",\n",
      "  \"apply_link\": \"\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"AV Specialist\",\n",
      "  \"sub_division_of_organization\": \"IT\",\n",
      "  \"key_skills\": [\n",
      "    \"AV support\",\n",
      "    \"Google Meet\",\n",
      "    \"Zoom\",\n",
      "    \"Cisco\",\n",
      "    \"ticket management\",\n",
      "    \"IT troubleshooting\",\n",
      "    \"problem-solving\",\n",
      "    \"interpersonal skills\"\n",
      "  ],\n",
      "  \"compensation\": \"$110K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/20fd0ff8-dd5e-4bec-a401-dd3f8263fe24/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"IT Support\",\n",
      "  \"sub_division_of_organization\": \"IT\",\n",
      "  \"key_skills\": [\n",
      "    \"Intermediate-to-expert understanding of IDP and MDM solutions\",\n",
      "    \"Familiarity with Windows or Linux\",\n",
      "    \"Understanding of Python, Bash, or Apple Script\",\n",
      "    \"Experience with collaboration software\",\n",
      "    \"Hands-on expertise implementing and managing AV and telecom systems\",\n",
      "    \"Complete Mac and macOS troubleshooting skills\",\n",
      "    \"Adept in orchestrating high-production events\"\n",
      "  ],\n",
      "  \"compensation\": \"$110K \\u2013 $140K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/ca263679-08d5-4492-9a56-32fbcb7318a5/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"404\",\n",
      "  \"sub_division_of_organization\": \"OpenAI\",\n",
      "  \"key_skills\": [],\n",
      "  \"compensation\": \"\",\n",
      "  \"apply_link\": \"\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"Research Engineer\",\n",
      "  \"sub_division_of_organization\": \"Research\",\n",
      "  \"key_skills\": [\n",
      "    \"strong programming skills\",\n",
      "    \"experience working in large distributed systems\"\n",
      "  ],\n",
      "  \"compensation\": \"$295K \\u2013 $440K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/240d459b-696d-43eb-8497-fab3e56ecd9b/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"Solutions Engineer\",\n",
      "  \"sub_division_of_organization\": \"Technical Success\",\n",
      "  \"key_skills\": [\n",
      "    \"7+ years of experience in a technical pre-sales role\",\n",
      "    \"Understanding of IT security principles\",\n",
      "    \"Experience with programming languages like Python or Javascript\",\n",
      "    \"Knowledge of network/cloud architecture\",\n",
      "    \"Effective presentation and communication skills\",\n",
      "    \"Ability to manage C-level technical and business relationships\"\n",
      "  ],\n",
      "  \"compensation\": \"\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/dbfef1b0-9a77-46bd-ad36-67f3d0286924/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"Software Engineer, Networking\",\n",
      "  \"sub_division_of_organization\": \"Platform\",\n",
      "  \"key_skills\": [\n",
      "    \"C++\",\n",
      "    \"CUDA\",\n",
      "    \"distributed algorithms\",\n",
      "    \"RDMA\",\n",
      "    \"network simulation techniques\"\n",
      "  ],\n",
      "  \"compensation\": \"$360K \\u2013 $530K\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/340c0c22-8d8f-4232-b17e-f642b64c25c3/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "{\n",
      "  \"job_title\": \"Revenue Operations Leader\",\n",
      "  \"sub_division_of_organization\": \"Revenue Operations\",\n",
      "  \"key_skills\": [\n",
      "    \"Extensive experience in revenue operations or strategy at a high-growth, technology company\",\n",
      "    \"Proficiency with GTM systems, namely SFDC, Gong\",\n",
      "    \"Experience managing a large team of 15+ operational team members\",\n",
      "    \"Highly analytical\",\n",
      "    \"Exceptional project management skills with experience leading complex, cross-functional initiatives\",\n",
      "    \"Deep experience designing & executing on a territory strategy for 100+ GTM orgs\",\n",
      "    \"Strong communication skills and executive presence\",\n",
      "    \"An understanding of the AI landscape, our applications, and the problems they solve for our customers\",\n",
      "    \"The ability to thrive in ambiguity and work autonomously\"\n",
      "  ],\n",
      "  \"compensation\": \"$325K + Offers Equity\",\n",
      "  \"apply_link\": \"https://jobs.ashbyhq.com/openai/61a484e5-4723-4031-92c1-068dfe4b069f/application\"\n",
      "}\n",
      "--------------------------------------------------\n",
      "Extracted data saved to /Users/ericciarla/Documents/GitHub/firecrawl/examples/getting_latest_openai_jobs/openai_jobs.csv\n"
     ]
    }
   ],
   "source": [
    "# %%\n",
    "# Print the extracted data\n",
    "print(\"Extracted data:\")\n",
    "for job in extracted_data:\n",
    "    print(json.dumps(job, indent=2))\n",
    "    print(\"-\" * 50)  # Separator between jobs\n",
    "\n",
    "# Save as CSV\n",
    "import csv\n",
    "import os\n",
    "\n",
    "# Get the current directory\n",
    "current_dir = os.getcwd()\n",
    "\n",
    "# Create the full path for the CSV file\n",
    "csv_file = os.path.join(current_dir, \"openai_jobs.csv\")\n",
    "\n",
    "try:\n",
    "    with open(csv_file, \"w\", newline=\"\") as f:\n",
    "        if extracted_data:\n",
    "            writer = csv.DictWriter(f, fieldnames=extracted_data[0].keys())\n",
    "            writer.writeheader()\n",
    "            for job in extracted_data:\n",
    "                writer.writerow(job)\n",
    "            print(f\"Extracted data saved to {csv_file}\")\n",
    "        else:\n",
    "            print(\"No data to save.\")\n",
    "except IOError as e:\n",
    "    print(f\"Error saving CSV file: {e}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recommended jobs:\n",
      "[\n",
      "  {\n",
      "    \"job_title\": \"Analytics Engineer\",\n",
      "    \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
      "    \"apply_link\": \"https://jobs.ashbyhq.com/openai/340ef89c-a746-439a-888a-19580eb8c881/application\"\n",
      "  },\n",
      "  {\n",
      "    \"job_title\": \"Solutions Architect\",\n",
      "    \"compensation\": \"\",\n",
      "    \"apply_link\": \"https://jobs.ashbyhq.com/openai/51721dfd-7bf5-4112-bb28-da5e4fd86e36/application\"\n",
      "  },\n",
      "  {\n",
      "    \"job_title\": \"Research Engineer\",\n",
      "    \"compensation\": \"$295K \\u2013 $440K + Offers Equity\",\n",
      "    \"apply_link\": \"https://jobs.ashbyhq.com/openai/240d459b-696d-43eb-8497-fab3e56ecd9b/application\"\n",
      "  },\n",
      "  {\n",
      "    \"job_title\": \"Solutions Engineer\",\n",
      "    \"compensation\": \"\",\n",
      "    \"apply_link\": \"https://jobs.ashbyhq.com/openai/dbfef1b0-9a77-46bd-ad36-67f3d0286924/application\"\n",
      "  }\n",
      "]\n"
     ]
    }
   ],
   "source": [
    "from openai import OpenAI\n",
    "\n",
    "# Resume\n",
    "resume_paste = \"\"\"\"\n",
    "Eric Ciarla\n",
    "Co-Founder @ Firecrawl\n",
    "San Francisco, California, United States\n",
    "Summary\n",
    "Building…\n",
    "Experience\n",
    "Firecrawl\n",
    "Co-Founder\n",
    "April 2024 - Present (6 months)\n",
    "San Francisco, California, United States\n",
    "Firecrawl by Mendable. Building data extraction infrastructure for AI. Used by\n",
    "Amazon, Zapier, and Nvidia (YC S22)\n",
    "Mendable\n",
    "2 years 7 months\n",
    "Co-Founder @ Mendable.ai\n",
    "March 2022 - Present (2 years 7 months)\n",
    "San Francisco, California, United States\n",
    "- Built an AI powered search platform that that served millions of queries for\n",
    "hundreds of customers (YC S22)\n",
    "- We were one of the first LLM powered apps adopted by industry leaders like\n",
    "Coinbase, Snap, DoorDash, and MongoDB\n",
    "Co-Founder @ SideGuide\n",
    "March 2022 - Present (2 years 7 months)\n",
    "San Francisco, California, United States\n",
    "- Built and scaled an online course platform with a community of over 50,000\n",
    "developers\n",
    "- Selected for Y Combinator S22 batch, 2% acceptance rate\n",
    "Fracta\n",
    "Data Engineer\n",
    "2022 - 2022 (less than a year)\n",
    "Palo Alto, California, United States\n",
    "- Demoed tool during sales calls and provided technical support during the\n",
    "entire customer lifecycle\n",
    "Page 1 of 2\n",
    "- Mined, wrangled, & visualized geospatial and water utility data for predictive\n",
    "analytics & ML workflows (Python, QGIS)\n",
    "Ford Motor Company\n",
    "Data Scientist\n",
    "2021 - 2021 (less than a year)\n",
    "Dearborn, Michigan, United States\n",
    "- Extracted, cleaned, and joined data from multiple sources using SQL,\n",
    "Hadoop, and Alteryx\n",
    "- Used Bayesian Network Structure Learning (BNLearn, R) to uncover the\n",
    "relationships between survey free response verbatim topics (derived from\n",
    "natural language processing models) and numerical customer experience\n",
    "scores\n",
    "MDRemindME\n",
    "Co-Founder\n",
    "2018 - 2020 (2 years)\n",
    "Durham, New Hampshire, United States\n",
    "- Founded and led a healthtech startup aimed at improving patient adherence\n",
    "to treatment plans through an innovative engagement and retention tool\n",
    "- Piloted the product with healthcare providers and patients, gathering critical\n",
    "insights to refine functionality and enhance user experience\n",
    "- Secured funding through National Science Foundation I-CORPS Grant and\n",
    "UNH Entrepreneurship Center Seed Grant\n",
    "Education\n",
    "Y Combinator\n",
    "S22\n",
    "University of New Hampshire\n",
    "Economics and Philosophy\n",
    "\"\"\"\n",
    "\n",
    "# Use o1-preview to choose which jobs should be applied to based on the resume\n",
    "client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
    "\n",
    "prompt = f\"\"\"\n",
    "Please analyze the resume and job listings, and return a JSON list of the top 3 roles that best fit the candidate's experience and skills. Include only the job title, compensation, and apply link for each recommended role. The output should be a valid JSON array of objects in the following format, with no additional text:\n",
    "\n",
    "[\n",
    "  {{\n",
    "    \"job_title\": \"Job Title\",\n",
    "    \"compensation\": \"Compensation (if available, otherwise empty string)\",\n",
    "    \"apply_link\": \"Application URL\"\n",
    "  }},\n",
    "  ...\n",
    "]\n",
    "\n",
    "Based on the following resume:\n",
    "{resume_paste}\n",
    "\n",
    "And the following job listings:\n",
    "{json.dumps(extracted_data, indent=2)}\n",
    "\"\"\"\n",
    "\n",
    "completion = client.chat.completions.create(\n",
    "    model=\"o1-preview\",\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": [\n",
    "                {\n",
    "                    \"type\": \"text\",\n",
    "                    \"text\": prompt\n",
    "                }\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    ")\n",
    "\n",
    "recommended_jobs = json.loads(completion.choices[0].message.content.strip())\n",
    "\n",
    "print(\"Recommended jobs:\")\n",
    "print(json.dumps(recommended_jobs, indent=2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# scrape each of the apply links with firecrawl /v1/scrape\n",
    "import requests\n",
    "\n",
    "firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
    "\n",
    "def scrape_apply_link(url):\n",
    "    api_url = \"https://api.firecrawl.dev/v1/scrape\"\n",
    "    headers = {\n",
    "        \"Authorization\": f\"Bearer {firecrawl_api_key}\",\n",
    "        \"Content-Type\": \"application/json\"\n",
    "    }\n",
    "    payload = {\n",
    "        \"url\": url\n",
    "    }\n",
    "    \n",
    "    response = requests.post(api_url, json=payload, headers=headers)\n",
    "    if response.status_code == 200:\n",
    "        return response.json()\n",
    "    else:\n",
    "        print(f\"Error scraping {url}: {response.status_code}\")\n",
    "        return None\n",
    "\n",
    "scraped_job_data = []\n",
    "for job in recommended_jobs:\n",
    "    apply_link = job.get('apply_link')\n",
    "    if apply_link:\n",
    "        scraped_data = scrape_apply_link(apply_link)\n",
    "        if scraped_data:\n",
    "            scraped_job_data.append({\n",
    "                'job_title': job['job_title'],\n",
    "                'compensation': job['compensation'],\n",
    "                'apply_link': apply_link,\n",
    "                'scraped_content': scraped_data\n",
    "            })\n",
    "\n",
    "print(f\"Scraped {len(scraped_job_data)} job application pages\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# use o1 to write the application for you and return in json\n",
    "import json\n",
    "\n",
    "\n",
    "def generate_application(job_data, resume_paste):\n",
    "    # Extract relevant information from scraped content\n",
    "    scraped_text = job_data['scraped_content'].get('text', '')\n",
    "    \n",
    "    prompt = f\"\"\"\n",
    "    Based on the following job information, scraped content from the application page, and the provided resume, write a tailored job application:\n",
    "\n",
    "    Job Title: {job_data['job_title']}\n",
    "    Compensation: {job_data['compensation']}\n",
    "    Scraped Content: {scraped_text[:1000]}  # Limit to first 1000 characters to avoid token limits\n",
    "\n",
    "    Resume:\n",
    "    {resume_paste}\n",
    "\n",
    "    Please format the application as a JSON object with the following fields:\n",
    "    - cover_letter: A personalized cover letter addressing key points from the scraped content and highlighting relevant experience from the resume\n",
    "    - resume_highlights: Key points from the resume that align with the job requirements mentioned in the scraped content\n",
    "    - questions: Any questions you have about the position, derived from the available information\n",
    "\n",
    "    Ensure the content is specifically tailored to the information provided in the scraped content and leverages the experience detailed in the resume.\n",
    "    \"\"\"\n",
    "\n",
    "    try:\n",
    "        completion = client.chat.completions.create(\n",
    "            model=\"o1-preview\",\n",
    "            messages=[\n",
    "    \n",
    "                {\"role\": \"user\", \"content\": prompt}\n",
    "            ]\n",
    "        )\n",
    "        return json.loads(completion.choices[0].message.content)\n",
    "    except Exception as e:\n",
    "        print(f\"Error generating application: {str(e)}\")\n",
    "        return None\n",
    "\n",
    "\n",
    "\n",
    "applications = []\n",
    "for job in scraped_job_data:\n",
    "    application = generate_application(job, resume_paste)\n",
    "    if application:\n",
    "        applications.append({\n",
    "            \"job_title\": job[\"job_title\"],\n",
    "            \"apply_link\": job[\"apply_link\"],\n",
    "            \"application\": application\n",
    "        })\n",
    "\n",
    "print(f\"Generated {len(applications)} job applications based on scraped content and resume\")\n",
    "print(json.dumps(applications, indent=2))\n",
    "\n",
    "# Save the JSON to a file\n",
    "output_file = \"generated_applications.json\"\n",
    "with open(output_file, \"w\") as f:\n",
    "    json.dump(applications, f, indent=2)\n",
    "\n",
    "print(f\"Saved generated applications to {output_file}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}