mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
673 lines
26 KiB
Plaintext
673 lines
26 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# %%\n",
|
|
"import os\n",
|
|
"import datetime\n",
|
|
"import time\n",
|
|
"import requests\n",
|
|
"import json\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from firecrawl import FirecrawlApp\n",
|
|
"from pydantic import BaseModel, Field\n",
|
|
"from typing import List\n",
|
|
"\n",
|
|
"# Load environment variables\n",
|
|
"load_dotenv()\n",
|
|
"\n",
|
|
"# Retrieve API keys from environment variables\n",
|
|
"firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
|
|
"\n",
|
|
"# Initialize the FirecrawlApp with your API key\n",
|
|
"app = FirecrawlApp(api_key=firecrawl_api_key)\n",
|
|
"\n",
|
|
"# Set the jobs page URL\n",
|
|
"jobs_page_url = \"https://openai.com/careers\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Total pages mapped (excluding original URL): 14\n",
|
|
"['https://openai.com/careers/research-scientist', 'https://openai.com/careers/analytics-engineer', 'https://openai.com/careers/solutions-architect', 'https://openai.com/careers/iam-engineer', 'https://openai.com/careers/talent-partnerships', 'https://openai.com/careers/product-designer', 'https://openai.com/careers/recruiting-coordinator', 'https://openai.com/careers/av-specialist', 'https://openai.com/careers/it-support', 'https://openai.com/careers/director-edu', 'https://openai.com/careers/research-engineer', 'https://openai.com/careers/solutions-engineer', 'https://openai.com/careers/software-engineer-networking', 'https://openai.com/careers/revenue-operations-leader']\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# %%\n",
|
|
"# Use the Firecrawl Map API to get the sitemap\n",
|
|
"api_url = \"https://api.firecrawl.dev/v1/map\"\n",
|
|
"payload = {\n",
|
|
" \"url\": jobs_page_url,\n",
|
|
" \"search\": \"\", # Empty search term to get all pages\n",
|
|
" \"limit\": 15\n",
|
|
"}\n",
|
|
"headers = {\n",
|
|
" \"Authorization\": f\"Bearer {firecrawl_api_key}\",\n",
|
|
" \"Content-Type\": \"application/json\"\n",
|
|
"}\n",
|
|
"response = requests.post(api_url, json=payload, headers=headers)\n",
|
|
"\n",
|
|
"if response.status_code == 200:\n",
|
|
" map_result = response.json()\n",
|
|
" if map_result.get('success'):\n",
|
|
" links = [link for link in map_result.get('links', []) if link != jobs_page_url]\n",
|
|
" print(f\"Total pages mapped (excluding original URL): {len(links)}\")\n",
|
|
" print(links)\n",
|
|
" else:\n",
|
|
" print(\"Map API request was not successful\")\n",
|
|
" exit(1)\n",
|
|
"else:\n",
|
|
" print(f\"Error: {response.status_code}\")\n",
|
|
" print(response.text)\n",
|
|
" exit(1)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Error 500 for page 0: {\"success\":false,\"error\":\"(Internal server error) - JSON parsing error(s): must be object\\n\\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. - Could be due to LLM parsing issues\"}\n",
|
|
"Data extracted for page 1\n",
|
|
"Data extracted for page 2\n",
|
|
"Data extracted for page 3\n",
|
|
"Data extracted for page 4\n",
|
|
"Data extracted for page 5\n",
|
|
"Data extracted for page 6\n",
|
|
"Data extracted for page 7\n",
|
|
"Data extracted for page 8\n",
|
|
"Data extracted for page 9\n",
|
|
"Data extracted for page 10\n",
|
|
"Data extracted for page 11\n",
|
|
"Data extracted for page 12\n",
|
|
"Data extracted for page 13\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# %%\n",
|
|
"# Define the extraction schema\n",
|
|
"extract_schema = {\n",
|
|
" \"type\": \"object\",\n",
|
|
" \"properties\": {\n",
|
|
" \"job_title\": {\n",
|
|
" \"type\": \"string\"\n",
|
|
" },\n",
|
|
" \"sub_division_of_organization\": {\n",
|
|
" \"type\": \"string\"\n",
|
|
" },\n",
|
|
" \"key_skills\": {\n",
|
|
" \"type\": \"array\",\n",
|
|
" \"items\": {\n",
|
|
" \"type\": \"string\"\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"compensation\": {\n",
|
|
" \"type\": \"string\"\n",
|
|
" },\n",
|
|
" \"apply_link\": {\n",
|
|
" \"type\": \"string\"\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"required\": [\"job_title\", \"sub_division_of_organization\", \"key_skills\", \"compensation\", \"apply_link\"]\n",
|
|
"}\n",
|
|
"\n",
|
|
"# Initialize a list to store the extracted data\n",
|
|
"extracted_data = []\n",
|
|
"\n",
|
|
"# Process each link in the map result\n",
|
|
"for index, link in enumerate(links):\n",
|
|
" try:\n",
|
|
" response = requests.post(\n",
|
|
" \"https://api.firecrawl.dev/v1/scrape\",\n",
|
|
" headers={\n",
|
|
" \"Content-Type\": \"application/json\",\n",
|
|
" \"Authorization\": f\"Bearer {firecrawl_api_key}\"\n",
|
|
" },\n",
|
|
" json={\n",
|
|
" \"url\": link,\n",
|
|
" \"formats\": [\"extract\"],\n",
|
|
" \"extract\": {\n",
|
|
" \"schema\": extract_schema\n",
|
|
" }\n",
|
|
" }\n",
|
|
" )\n",
|
|
" \n",
|
|
" if response.status_code == 200:\n",
|
|
" result = response.json()\n",
|
|
" if result.get('success'):\n",
|
|
" extracted_data.append(result['data']['extract'])\n",
|
|
" print(f\"Data extracted for page {index}\")\n",
|
|
" else:\n",
|
|
" print(f\"No data extracted for page {index}\")\n",
|
|
" else:\n",
|
|
" print(f\"Error {response.status_code} for page {index}: {response.text}\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"An error occurred for page {index}: {str(e)}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Extracted data:\n",
|
|
"{\n",
|
|
" \"job_title\": \"Analytics Engineer\",\n",
|
|
" \"sub_division_of_organization\": \"Growth\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"SQL\",\n",
|
|
" \"Python\",\n",
|
|
" \"business intelligence tools\",\n",
|
|
" \"ETL workflows\",\n",
|
|
" \"data analysis\",\n",
|
|
" \"dashboards\",\n",
|
|
" \"data storytelling\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/340ef89c-a746-439a-888a-19580eb8c881/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"Solutions Architect\",\n",
|
|
" \"sub_division_of_organization\": \"Technical Success\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"technical consulting\",\n",
|
|
" \"Generative AI\",\n",
|
|
" \"ML solutions\",\n",
|
|
" \"network architecture\",\n",
|
|
" \"cloud architecture\",\n",
|
|
" \"Python\",\n",
|
|
" \"Javascript\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/51721dfd-7bf5-4112-bb28-da5e4fd86e36/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"IAM Engineer\",\n",
|
|
" \"sub_division_of_organization\": \"IT\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"AzureAD\",\n",
|
|
" \"Python\",\n",
|
|
" \"PowerShell\",\n",
|
|
" \"identity governance\",\n",
|
|
" \"automation\",\n",
|
|
" \"Terraform\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/e798aa62-74f9-4f53-a890-716310926b70/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"Talent Partnerships\",\n",
|
|
" \"sub_division_of_organization\": \"Communications\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"relationship management\",\n",
|
|
" \"communication\",\n",
|
|
" \"adaptability\",\n",
|
|
" \"creativity\",\n",
|
|
" \"collaboration\",\n",
|
|
" \"transparency\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$171K \\u2013 $240K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/84a4a8bb-7d5a-4989-9b5c-bd841db2698e/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"404 Error Page\",\n",
|
|
" \"sub_division_of_organization\": \"Web Development\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"Error Handling\",\n",
|
|
" \"Web Design\",\n",
|
|
" \"User Experience\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"N/A\",\n",
|
|
" \"apply_link\": \"N/A\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"\",\n",
|
|
" \"sub_division_of_organization\": \"\",\n",
|
|
" \"key_skills\": [],\n",
|
|
" \"compensation\": \"\",\n",
|
|
" \"apply_link\": \"\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"AV Specialist\",\n",
|
|
" \"sub_division_of_organization\": \"IT\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"AV support\",\n",
|
|
" \"Google Meet\",\n",
|
|
" \"Zoom\",\n",
|
|
" \"Cisco\",\n",
|
|
" \"ticket management\",\n",
|
|
" \"IT troubleshooting\",\n",
|
|
" \"problem-solving\",\n",
|
|
" \"interpersonal skills\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$110K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/20fd0ff8-dd5e-4bec-a401-dd3f8263fe24/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"IT Support\",\n",
|
|
" \"sub_division_of_organization\": \"IT\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"Intermediate-to-expert understanding of IDP and MDM solutions\",\n",
|
|
" \"Familiarity with Windows or Linux\",\n",
|
|
" \"Understanding of Python, Bash, or Apple Script\",\n",
|
|
" \"Experience with collaboration software\",\n",
|
|
" \"Hands-on expertise implementing and managing AV and telecom systems\",\n",
|
|
" \"Complete Mac and macOS troubleshooting skills\",\n",
|
|
" \"Adept in orchestrating high-production events\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$110K \\u2013 $140K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/ca263679-08d5-4492-9a56-32fbcb7318a5/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"404\",\n",
|
|
" \"sub_division_of_organization\": \"OpenAI\",\n",
|
|
" \"key_skills\": [],\n",
|
|
" \"compensation\": \"\",\n",
|
|
" \"apply_link\": \"\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"Research Engineer\",\n",
|
|
" \"sub_division_of_organization\": \"Research\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"strong programming skills\",\n",
|
|
" \"experience working in large distributed systems\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$295K \\u2013 $440K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/240d459b-696d-43eb-8497-fab3e56ecd9b/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"Solutions Engineer\",\n",
|
|
" \"sub_division_of_organization\": \"Technical Success\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"7+ years of experience in a technical pre-sales role\",\n",
|
|
" \"Understanding of IT security principles\",\n",
|
|
" \"Experience with programming languages like Python or Javascript\",\n",
|
|
" \"Knowledge of network/cloud architecture\",\n",
|
|
" \"Effective presentation and communication skills\",\n",
|
|
" \"Ability to manage C-level technical and business relationships\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/dbfef1b0-9a77-46bd-ad36-67f3d0286924/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"Software Engineer, Networking\",\n",
|
|
" \"sub_division_of_organization\": \"Platform\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"C++\",\n",
|
|
" \"CUDA\",\n",
|
|
" \"distributed algorithms\",\n",
|
|
" \"RDMA\",\n",
|
|
" \"network simulation techniques\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$360K \\u2013 $530K\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/340c0c22-8d8f-4232-b17e-f642b64c25c3/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"{\n",
|
|
" \"job_title\": \"Revenue Operations Leader\",\n",
|
|
" \"sub_division_of_organization\": \"Revenue Operations\",\n",
|
|
" \"key_skills\": [\n",
|
|
" \"Extensive experience in revenue operations or strategy at a high-growth, technology company\",\n",
|
|
" \"Proficiency with GTM systems, namely SFDC, Gong\",\n",
|
|
" \"Experience managing a large team of 15+ operational team members\",\n",
|
|
" \"Highly analytical\",\n",
|
|
" \"Exceptional project management skills with experience leading complex, cross-functional initiatives\",\n",
|
|
" \"Deep experience designing & executing on a territory strategy for 100+ GTM orgs\",\n",
|
|
" \"Strong communication skills and executive presence\",\n",
|
|
" \"An understanding of the AI landscape, our applications, and the problems they solve for our customers\",\n",
|
|
" \"The ability to thrive in ambiguity and work autonomously\"\n",
|
|
" ],\n",
|
|
" \"compensation\": \"$325K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/61a484e5-4723-4031-92c1-068dfe4b069f/application\"\n",
|
|
"}\n",
|
|
"--------------------------------------------------\n",
|
|
"Extracted data saved to /Users/ericciarla/Documents/GitHub/firecrawl/examples/getting_latest_openai_jobs/openai_jobs.csv\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# %%\n",
|
|
"# Print the extracted data\n",
|
|
"print(\"Extracted data:\")\n",
|
|
"for job in extracted_data:\n",
|
|
" print(json.dumps(job, indent=2))\n",
|
|
" print(\"-\" * 50) # Separator between jobs\n",
|
|
"\n",
|
|
"# Save as CSV\n",
|
|
"import csv\n",
|
|
"import os\n",
|
|
"\n",
|
|
"# Get the current directory\n",
|
|
"current_dir = os.getcwd()\n",
|
|
"\n",
|
|
"# Create the full path for the CSV file\n",
|
|
"csv_file = os.path.join(current_dir, \"openai_jobs.csv\")\n",
|
|
"\n",
|
|
"try:\n",
|
|
" with open(csv_file, \"w\", newline=\"\") as f:\n",
|
|
" if extracted_data:\n",
|
|
" writer = csv.DictWriter(f, fieldnames=extracted_data[0].keys())\n",
|
|
" writer.writeheader()\n",
|
|
" for job in extracted_data:\n",
|
|
" writer.writerow(job)\n",
|
|
" print(f\"Extracted data saved to {csv_file}\")\n",
|
|
" else:\n",
|
|
" print(\"No data to save.\")\n",
|
|
"except IOError as e:\n",
|
|
" print(f\"Error saving CSV file: {e}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Recommended jobs:\n",
|
|
"[\n",
|
|
" {\n",
|
|
" \"job_title\": \"Analytics Engineer\",\n",
|
|
" \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/340ef89c-a746-439a-888a-19580eb8c881/application\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"job_title\": \"Solutions Architect\",\n",
|
|
" \"compensation\": \"\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/51721dfd-7bf5-4112-bb28-da5e4fd86e36/application\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"job_title\": \"Research Engineer\",\n",
|
|
" \"compensation\": \"$295K \\u2013 $440K + Offers Equity\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/240d459b-696d-43eb-8497-fab3e56ecd9b/application\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"job_title\": \"Solutions Engineer\",\n",
|
|
" \"compensation\": \"\",\n",
|
|
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/dbfef1b0-9a77-46bd-ad36-67f3d0286924/application\"\n",
|
|
" }\n",
|
|
"]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from openai import OpenAI\n",
|
|
"\n",
|
|
"# Resume\n",
|
|
"resume_paste = \"\"\"\"\n",
|
|
"Eric Ciarla\n",
|
|
"Co-Founder @ Firecrawl\n",
|
|
"San Francisco, California, United States\n",
|
|
"Summary\n",
|
|
"Building…\n",
|
|
"Experience\n",
|
|
"Firecrawl\n",
|
|
"Co-Founder\n",
|
|
"April 2024 - Present (6 months)\n",
|
|
"San Francisco, California, United States\n",
|
|
"Firecrawl by Mendable. Building data extraction infrastructure for AI. Used by\n",
|
|
"Amazon, Zapier, and Nvidia (YC S22)\n",
|
|
"Mendable\n",
|
|
"2 years 7 months\n",
|
|
"Co-Founder @ Mendable.ai\n",
|
|
"March 2022 - Present (2 years 7 months)\n",
|
|
"San Francisco, California, United States\n",
|
|
"- Built an AI powered search platform that that served millions of queries for\n",
|
|
"hundreds of customers (YC S22)\n",
|
|
"- We were one of the first LLM powered apps adopted by industry leaders like\n",
|
|
"Coinbase, Snap, DoorDash, and MongoDB\n",
|
|
"Co-Founder @ SideGuide\n",
|
|
"March 2022 - Present (2 years 7 months)\n",
|
|
"San Francisco, California, United States\n",
|
|
"- Built and scaled an online course platform with a community of over 50,000\n",
|
|
"developers\n",
|
|
"- Selected for Y Combinator S22 batch, 2% acceptance rate\n",
|
|
"Fracta\n",
|
|
"Data Engineer\n",
|
|
"2022 - 2022 (less than a year)\n",
|
|
"Palo Alto, California, United States\n",
|
|
"- Demoed tool during sales calls and provided technical support during the\n",
|
|
"entire customer lifecycle\n",
|
|
"Page 1 of 2\n",
|
|
"- Mined, wrangled, & visualized geospatial and water utility data for predictive\n",
|
|
"analytics & ML workflows (Python, QGIS)\n",
|
|
"Ford Motor Company\n",
|
|
"Data Scientist\n",
|
|
"2021 - 2021 (less than a year)\n",
|
|
"Dearborn, Michigan, United States\n",
|
|
"- Extracted, cleaned, and joined data from multiple sources using SQL,\n",
|
|
"Hadoop, and Alteryx\n",
|
|
"- Used Bayesian Network Structure Learning (BNLearn, R) to uncover the\n",
|
|
"relationships between survey free response verbatim topics (derived from\n",
|
|
"natural language processing models) and numerical customer experience\n",
|
|
"scores\n",
|
|
"MDRemindME\n",
|
|
"Co-Founder\n",
|
|
"2018 - 2020 (2 years)\n",
|
|
"Durham, New Hampshire, United States\n",
|
|
"- Founded and led a healthtech startup aimed at improving patient adherence\n",
|
|
"to treatment plans through an innovative engagement and retention tool\n",
|
|
"- Piloted the product with healthcare providers and patients, gathering critical\n",
|
|
"insights to refine functionality and enhance user experience\n",
|
|
"- Secured funding through National Science Foundation I-CORPS Grant and\n",
|
|
"UNH Entrepreneurship Center Seed Grant\n",
|
|
"Education\n",
|
|
"Y Combinator\n",
|
|
"S22\n",
|
|
"University of New Hampshire\n",
|
|
"Economics and Philosophy\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"# Use o1-preview to choose which jobs should be applied to based on the resume\n",
|
|
"client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
|
|
"\n",
|
|
"prompt = f\"\"\"\n",
|
|
"Please analyze the resume and job listings, and return a JSON list of the top 3 roles that best fit the candidate's experience and skills. Include only the job title, compensation, and apply link for each recommended role. The output should be a valid JSON array of objects in the following format, with no additional text:\n",
|
|
"\n",
|
|
"[\n",
|
|
" {{\n",
|
|
" \"job_title\": \"Job Title\",\n",
|
|
" \"compensation\": \"Compensation (if available, otherwise empty string)\",\n",
|
|
" \"apply_link\": \"Application URL\"\n",
|
|
" }},\n",
|
|
" ...\n",
|
|
"]\n",
|
|
"\n",
|
|
"Based on the following resume:\n",
|
|
"{resume_paste}\n",
|
|
"\n",
|
|
"And the following job listings:\n",
|
|
"{json.dumps(extracted_data, indent=2)}\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"completion = client.chat.completions.create(\n",
|
|
" model=\"o1-preview\",\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": [\n",
|
|
" {\n",
|
|
" \"type\": \"text\",\n",
|
|
" \"text\": prompt\n",
|
|
" }\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" ]\n",
|
|
")\n",
|
|
"\n",
|
|
"recommended_jobs = json.loads(completion.choices[0].message.content.strip())\n",
|
|
"\n",
|
|
"print(\"Recommended jobs:\")\n",
|
|
"print(json.dumps(recommended_jobs, indent=2))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# scrape each of the apply links with firecrawl /v1/scrape\n",
|
|
"import requests\n",
|
|
"\n",
|
|
"firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
|
|
"\n",
|
|
"def scrape_apply_link(url):\n",
|
|
" api_url = \"https://api.firecrawl.dev/v1/scrape\"\n",
|
|
" headers = {\n",
|
|
" \"Authorization\": f\"Bearer {firecrawl_api_key}\",\n",
|
|
" \"Content-Type\": \"application/json\"\n",
|
|
" }\n",
|
|
" payload = {\n",
|
|
" \"url\": url\n",
|
|
" }\n",
|
|
" \n",
|
|
" response = requests.post(api_url, json=payload, headers=headers)\n",
|
|
" if response.status_code == 200:\n",
|
|
" return response.json()\n",
|
|
" else:\n",
|
|
" print(f\"Error scraping {url}: {response.status_code}\")\n",
|
|
" return None\n",
|
|
"\n",
|
|
"scraped_job_data = []\n",
|
|
"for job in recommended_jobs:\n",
|
|
" apply_link = job.get('apply_link')\n",
|
|
" if apply_link:\n",
|
|
" scraped_data = scrape_apply_link(apply_link)\n",
|
|
" if scraped_data:\n",
|
|
" scraped_job_data.append({\n",
|
|
" 'job_title': job['job_title'],\n",
|
|
" 'compensation': job['compensation'],\n",
|
|
" 'apply_link': apply_link,\n",
|
|
" 'scraped_content': scraped_data\n",
|
|
" })\n",
|
|
"\n",
|
|
"print(f\"Scraped {len(scraped_job_data)} job application pages\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# use o1 to write the application for you and return in json\n",
|
|
"import json\n",
|
|
"\n",
|
|
"\n",
|
|
"def generate_application(job_data, resume_paste):\n",
|
|
" # Extract relevant information from scraped content\n",
|
|
" scraped_text = job_data['scraped_content'].get('text', '')\n",
|
|
" \n",
|
|
" prompt = f\"\"\"\n",
|
|
" Based on the following job information, scraped content from the application page, and the provided resume, write a tailored job application:\n",
|
|
"\n",
|
|
" Job Title: {job_data['job_title']}\n",
|
|
" Compensation: {job_data['compensation']}\n",
|
|
" Scraped Content: {scraped_text[:1000]} # Limit to first 1000 characters to avoid token limits\n",
|
|
"\n",
|
|
" Resume:\n",
|
|
" {resume_paste}\n",
|
|
"\n",
|
|
" Please format the application as a JSON object with the following fields:\n",
|
|
" - cover_letter: A personalized cover letter addressing key points from the scraped content and highlighting relevant experience from the resume\n",
|
|
" - resume_highlights: Key points from the resume that align with the job requirements mentioned in the scraped content\n",
|
|
" - questions: Any questions you have about the position, derived from the available information\n",
|
|
"\n",
|
|
" Ensure the content is specifically tailored to the information provided in the scraped content and leverages the experience detailed in the resume.\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" try:\n",
|
|
" completion = client.chat.completions.create(\n",
|
|
" model=\"o1-preview\",\n",
|
|
" messages=[\n",
|
|
" \n",
|
|
" {\"role\": \"user\", \"content\": prompt}\n",
|
|
" ]\n",
|
|
" )\n",
|
|
" return json.loads(completion.choices[0].message.content)\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Error generating application: {str(e)}\")\n",
|
|
" return None\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"applications = []\n",
|
|
"for job in scraped_job_data:\n",
|
|
" application = generate_application(job, resume_paste)\n",
|
|
" if application:\n",
|
|
" applications.append({\n",
|
|
" \"job_title\": job[\"job_title\"],\n",
|
|
" \"apply_link\": job[\"apply_link\"],\n",
|
|
" \"application\": application\n",
|
|
" })\n",
|
|
"\n",
|
|
"print(f\"Generated {len(applications)} job applications based on scraped content and resume\")\n",
|
|
"print(json.dumps(applications, indent=2))\n",
|
|
"\n",
|
|
"# Save the JSON to a file\n",
|
|
"output_file = \"generated_applications.json\"\n",
|
|
"with open(output_file, \"w\") as f:\n",
|
|
" json.dump(applications, f, indent=2)\n",
|
|
"\n",
|
|
"print(f\"Saved generated applications to {output_file}\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|