mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Delete o1_job_recommender.ipynb
This commit is contained in:
parent
5c4d436f1e
commit
20b998e66a
|
@ -1,672 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %%\n",
|
||||
"import os\n",
|
||||
"import datetime\n",
|
||||
"import time\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from firecrawl import FirecrawlApp\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"# Load environment variables\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"# Retrieve API keys from environment variables\n",
|
||||
"firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
|
||||
"\n",
|
||||
"# Initialize the FirecrawlApp with your API key\n",
|
||||
"app = FirecrawlApp(api_key=firecrawl_api_key)\n",
|
||||
"\n",
|
||||
"# Set the jobs page URL\n",
|
||||
"jobs_page_url = \"https://openai.com/careers\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Total pages mapped (excluding original URL): 14\n",
|
||||
"['https://openai.com/careers/research-scientist', 'https://openai.com/careers/analytics-engineer', 'https://openai.com/careers/solutions-architect', 'https://openai.com/careers/iam-engineer', 'https://openai.com/careers/talent-partnerships', 'https://openai.com/careers/product-designer', 'https://openai.com/careers/recruiting-coordinator', 'https://openai.com/careers/av-specialist', 'https://openai.com/careers/it-support', 'https://openai.com/careers/director-edu', 'https://openai.com/careers/research-engineer', 'https://openai.com/careers/solutions-engineer', 'https://openai.com/careers/software-engineer-networking', 'https://openai.com/careers/revenue-operations-leader']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# %%\n",
|
||||
"# Use the Firecrawl Map API to get the sitemap\n",
|
||||
"api_url = \"https://api.firecrawl.dev/v1/map\"\n",
|
||||
"payload = {\n",
|
||||
" \"url\": jobs_page_url,\n",
|
||||
" \"search\": \"\", # Empty search term to get all pages\n",
|
||||
" \"limit\": 15\n",
|
||||
"}\n",
|
||||
"headers = {\n",
|
||||
" \"Authorization\": f\"Bearer {firecrawl_api_key}\",\n",
|
||||
" \"Content-Type\": \"application/json\"\n",
|
||||
"}\n",
|
||||
"response = requests.post(api_url, json=payload, headers=headers)\n",
|
||||
"\n",
|
||||
"if response.status_code == 200:\n",
|
||||
" map_result = response.json()\n",
|
||||
" if map_result.get('success'):\n",
|
||||
" links = [link for link in map_result.get('links', []) if link != jobs_page_url]\n",
|
||||
" print(f\"Total pages mapped (excluding original URL): {len(links)}\")\n",
|
||||
" print(links)\n",
|
||||
" else:\n",
|
||||
" print(\"Map API request was not successful\")\n",
|
||||
" exit(1)\n",
|
||||
"else:\n",
|
||||
" print(f\"Error: {response.status_code}\")\n",
|
||||
" print(response.text)\n",
|
||||
" exit(1)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error 500 for page 0: {\"success\":false,\"error\":\"(Internal server error) - JSON parsing error(s): must be object\\n\\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support. - Could be due to LLM parsing issues\"}\n",
|
||||
"Data extracted for page 1\n",
|
||||
"Data extracted for page 2\n",
|
||||
"Data extracted for page 3\n",
|
||||
"Data extracted for page 4\n",
|
||||
"Data extracted for page 5\n",
|
||||
"Data extracted for page 6\n",
|
||||
"Data extracted for page 7\n",
|
||||
"Data extracted for page 8\n",
|
||||
"Data extracted for page 9\n",
|
||||
"Data extracted for page 10\n",
|
||||
"Data extracted for page 11\n",
|
||||
"Data extracted for page 12\n",
|
||||
"Data extracted for page 13\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# %%\n",
|
||||
"# Define the extraction schema\n",
|
||||
"extract_schema = {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"job_title\": {\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" },\n",
|
||||
" \"sub_division_of_organization\": {\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" },\n",
|
||||
" \"key_skills\": {\n",
|
||||
" \"type\": \"array\",\n",
|
||||
" \"items\": {\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"compensation\": {\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" },\n",
|
||||
" \"apply_link\": {\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"required\": [\"job_title\", \"sub_division_of_organization\", \"key_skills\", \"compensation\", \"apply_link\"]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Initialize a list to store the extracted data\n",
|
||||
"extracted_data = []\n",
|
||||
"\n",
|
||||
"# Process each link in the map result\n",
|
||||
"for index, link in enumerate(links):\n",
|
||||
" try:\n",
|
||||
" response = requests.post(\n",
|
||||
" \"https://api.firecrawl.dev/v1/scrape\",\n",
|
||||
" headers={\n",
|
||||
" \"Content-Type\": \"application/json\",\n",
|
||||
" \"Authorization\": f\"Bearer {firecrawl_api_key}\"\n",
|
||||
" },\n",
|
||||
" json={\n",
|
||||
" \"url\": link,\n",
|
||||
" \"formats\": [\"extract\"],\n",
|
||||
" \"extract\": {\n",
|
||||
" \"schema\": extract_schema\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" if response.status_code == 200:\n",
|
||||
" result = response.json()\n",
|
||||
" if result.get('success'):\n",
|
||||
" extracted_data.append(result['data']['extract'])\n",
|
||||
" print(f\"Data extracted for page {index}\")\n",
|
||||
" else:\n",
|
||||
" print(f\"No data extracted for page {index}\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Error {response.status_code} for page {index}: {response.text}\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"An error occurred for page {index}: {str(e)}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Extracted data:\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Analytics Engineer\",\n",
|
||||
" \"sub_division_of_organization\": \"Growth\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"SQL\",\n",
|
||||
" \"Python\",\n",
|
||||
" \"business intelligence tools\",\n",
|
||||
" \"ETL workflows\",\n",
|
||||
" \"data analysis\",\n",
|
||||
" \"dashboards\",\n",
|
||||
" \"data storytelling\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/340ef89c-a746-439a-888a-19580eb8c881/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Solutions Architect\",\n",
|
||||
" \"sub_division_of_organization\": \"Technical Success\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"technical consulting\",\n",
|
||||
" \"Generative AI\",\n",
|
||||
" \"ML solutions\",\n",
|
||||
" \"network architecture\",\n",
|
||||
" \"cloud architecture\",\n",
|
||||
" \"Python\",\n",
|
||||
" \"Javascript\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/51721dfd-7bf5-4112-bb28-da5e4fd86e36/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"IAM Engineer\",\n",
|
||||
" \"sub_division_of_organization\": \"IT\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"AzureAD\",\n",
|
||||
" \"Python\",\n",
|
||||
" \"PowerShell\",\n",
|
||||
" \"identity governance\",\n",
|
||||
" \"automation\",\n",
|
||||
" \"Terraform\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/e798aa62-74f9-4f53-a890-716310926b70/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Talent Partnerships\",\n",
|
||||
" \"sub_division_of_organization\": \"Communications\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"relationship management\",\n",
|
||||
" \"communication\",\n",
|
||||
" \"adaptability\",\n",
|
||||
" \"creativity\",\n",
|
||||
" \"collaboration\",\n",
|
||||
" \"transparency\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$171K \\u2013 $240K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/84a4a8bb-7d5a-4989-9b5c-bd841db2698e/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"404 Error Page\",\n",
|
||||
" \"sub_division_of_organization\": \"Web Development\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"Error Handling\",\n",
|
||||
" \"Web Design\",\n",
|
||||
" \"User Experience\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"N/A\",\n",
|
||||
" \"apply_link\": \"N/A\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"\",\n",
|
||||
" \"sub_division_of_organization\": \"\",\n",
|
||||
" \"key_skills\": [],\n",
|
||||
" \"compensation\": \"\",\n",
|
||||
" \"apply_link\": \"\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"AV Specialist\",\n",
|
||||
" \"sub_division_of_organization\": \"IT\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"AV support\",\n",
|
||||
" \"Google Meet\",\n",
|
||||
" \"Zoom\",\n",
|
||||
" \"Cisco\",\n",
|
||||
" \"ticket management\",\n",
|
||||
" \"IT troubleshooting\",\n",
|
||||
" \"problem-solving\",\n",
|
||||
" \"interpersonal skills\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$110K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/20fd0ff8-dd5e-4bec-a401-dd3f8263fe24/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"IT Support\",\n",
|
||||
" \"sub_division_of_organization\": \"IT\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"Intermediate-to-expert understanding of IDP and MDM solutions\",\n",
|
||||
" \"Familiarity with Windows or Linux\",\n",
|
||||
" \"Understanding of Python, Bash, or Apple Script\",\n",
|
||||
" \"Experience with collaboration software\",\n",
|
||||
" \"Hands-on expertise implementing and managing AV and telecom systems\",\n",
|
||||
" \"Complete Mac and macOS troubleshooting skills\",\n",
|
||||
" \"Adept in orchestrating high-production events\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$110K \\u2013 $140K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/ca263679-08d5-4492-9a56-32fbcb7318a5/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"404\",\n",
|
||||
" \"sub_division_of_organization\": \"OpenAI\",\n",
|
||||
" \"key_skills\": [],\n",
|
||||
" \"compensation\": \"\",\n",
|
||||
" \"apply_link\": \"\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Research Engineer\",\n",
|
||||
" \"sub_division_of_organization\": \"Research\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"strong programming skills\",\n",
|
||||
" \"experience working in large distributed systems\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$295K \\u2013 $440K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/240d459b-696d-43eb-8497-fab3e56ecd9b/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Solutions Engineer\",\n",
|
||||
" \"sub_division_of_organization\": \"Technical Success\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"7+ years of experience in a technical pre-sales role\",\n",
|
||||
" \"Understanding of IT security principles\",\n",
|
||||
" \"Experience with programming languages like Python or Javascript\",\n",
|
||||
" \"Knowledge of network/cloud architecture\",\n",
|
||||
" \"Effective presentation and communication skills\",\n",
|
||||
" \"Ability to manage C-level technical and business relationships\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/dbfef1b0-9a77-46bd-ad36-67f3d0286924/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Software Engineer, Networking\",\n",
|
||||
" \"sub_division_of_organization\": \"Platform\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"C++\",\n",
|
||||
" \"CUDA\",\n",
|
||||
" \"distributed algorithms\",\n",
|
||||
" \"RDMA\",\n",
|
||||
" \"network simulation techniques\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$360K \\u2013 $530K\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/340c0c22-8d8f-4232-b17e-f642b64c25c3/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"{\n",
|
||||
" \"job_title\": \"Revenue Operations Leader\",\n",
|
||||
" \"sub_division_of_organization\": \"Revenue Operations\",\n",
|
||||
" \"key_skills\": [\n",
|
||||
" \"Extensive experience in revenue operations or strategy at a high-growth, technology company\",\n",
|
||||
" \"Proficiency with GTM systems, namely SFDC, Gong\",\n",
|
||||
" \"Experience managing a large team of 15+ operational team members\",\n",
|
||||
" \"Highly analytical\",\n",
|
||||
" \"Exceptional project management skills with experience leading complex, cross-functional initiatives\",\n",
|
||||
" \"Deep experience designing & executing on a territory strategy for 100+ GTM orgs\",\n",
|
||||
" \"Strong communication skills and executive presence\",\n",
|
||||
" \"An understanding of the AI landscape, our applications, and the problems they solve for our customers\",\n",
|
||||
" \"The ability to thrive in ambiguity and work autonomously\"\n",
|
||||
" ],\n",
|
||||
" \"compensation\": \"$325K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/61a484e5-4723-4031-92c1-068dfe4b069f/application\"\n",
|
||||
"}\n",
|
||||
"--------------------------------------------------\n",
|
||||
"Extracted data saved to /Users/ericciarla/Documents/GitHub/firecrawl/examples/getting_latest_openai_jobs/openai_jobs.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# %%\n",
|
||||
"# Print the extracted data\n",
|
||||
"print(\"Extracted data:\")\n",
|
||||
"for job in extracted_data:\n",
|
||||
" print(json.dumps(job, indent=2))\n",
|
||||
" print(\"-\" * 50) # Separator between jobs\n",
|
||||
"\n",
|
||||
"# Save as CSV\n",
|
||||
"import csv\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Get the current directory\n",
|
||||
"current_dir = os.getcwd()\n",
|
||||
"\n",
|
||||
"# Create the full path for the CSV file\n",
|
||||
"csv_file = os.path.join(current_dir, \"openai_jobs.csv\")\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" with open(csv_file, \"w\", newline=\"\") as f:\n",
|
||||
" if extracted_data:\n",
|
||||
" writer = csv.DictWriter(f, fieldnames=extracted_data[0].keys())\n",
|
||||
" writer.writeheader()\n",
|
||||
" for job in extracted_data:\n",
|
||||
" writer.writerow(job)\n",
|
||||
" print(f\"Extracted data saved to {csv_file}\")\n",
|
||||
" else:\n",
|
||||
" print(\"No data to save.\")\n",
|
||||
"except IOError as e:\n",
|
||||
" print(f\"Error saving CSV file: {e}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Recommended jobs:\n",
|
||||
"[\n",
|
||||
" {\n",
|
||||
" \"job_title\": \"Analytics Engineer\",\n",
|
||||
" \"compensation\": \"$245K \\u2013 $385K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/340ef89c-a746-439a-888a-19580eb8c881/application\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"job_title\": \"Solutions Architect\",\n",
|
||||
" \"compensation\": \"\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/51721dfd-7bf5-4112-bb28-da5e4fd86e36/application\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"job_title\": \"Research Engineer\",\n",
|
||||
" \"compensation\": \"$295K \\u2013 $440K + Offers Equity\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/240d459b-696d-43eb-8497-fab3e56ecd9b/application\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"job_title\": \"Solutions Engineer\",\n",
|
||||
" \"compensation\": \"\",\n",
|
||||
" \"apply_link\": \"https://jobs.ashbyhq.com/openai/dbfef1b0-9a77-46bd-ad36-67f3d0286924/application\"\n",
|
||||
" }\n",
|
||||
"]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# Resume\n",
|
||||
"resume_paste = \"\"\"\"\n",
|
||||
"Eric Ciarla\n",
|
||||
"Co-Founder @ Firecrawl\n",
|
||||
"San Francisco, California, United States\n",
|
||||
"Summary\n",
|
||||
"Building…\n",
|
||||
"Experience\n",
|
||||
"Firecrawl\n",
|
||||
"Co-Founder\n",
|
||||
"April 2024 - Present (6 months)\n",
|
||||
"San Francisco, California, United States\n",
|
||||
"Firecrawl by Mendable. Building data extraction infrastructure for AI. Used by\n",
|
||||
"Amazon, Zapier, and Nvidia (YC S22)\n",
|
||||
"Mendable\n",
|
||||
"2 years 7 months\n",
|
||||
"Co-Founder @ Mendable.ai\n",
|
||||
"March 2022 - Present (2 years 7 months)\n",
|
||||
"San Francisco, California, United States\n",
|
||||
"- Built an AI powered search platform that that served millions of queries for\n",
|
||||
"hundreds of customers (YC S22)\n",
|
||||
"- We were one of the first LLM powered apps adopted by industry leaders like\n",
|
||||
"Coinbase, Snap, DoorDash, and MongoDB\n",
|
||||
"Co-Founder @ SideGuide\n",
|
||||
"March 2022 - Present (2 years 7 months)\n",
|
||||
"San Francisco, California, United States\n",
|
||||
"- Built and scaled an online course platform with a community of over 50,000\n",
|
||||
"developers\n",
|
||||
"- Selected for Y Combinator S22 batch, 2% acceptance rate\n",
|
||||
"Fracta\n",
|
||||
"Data Engineer\n",
|
||||
"2022 - 2022 (less than a year)\n",
|
||||
"Palo Alto, California, United States\n",
|
||||
"- Demoed tool during sales calls and provided technical support during the\n",
|
||||
"entire customer lifecycle\n",
|
||||
"Page 1 of 2\n",
|
||||
"- Mined, wrangled, & visualized geospatial and water utility data for predictive\n",
|
||||
"analytics & ML workflows (Python, QGIS)\n",
|
||||
"Ford Motor Company\n",
|
||||
"Data Scientist\n",
|
||||
"2021 - 2021 (less than a year)\n",
|
||||
"Dearborn, Michigan, United States\n",
|
||||
"- Extracted, cleaned, and joined data from multiple sources using SQL,\n",
|
||||
"Hadoop, and Alteryx\n",
|
||||
"- Used Bayesian Network Structure Learning (BNLearn, R) to uncover the\n",
|
||||
"relationships between survey free response verbatim topics (derived from\n",
|
||||
"natural language processing models) and numerical customer experience\n",
|
||||
"scores\n",
|
||||
"MDRemindME\n",
|
||||
"Co-Founder\n",
|
||||
"2018 - 2020 (2 years)\n",
|
||||
"Durham, New Hampshire, United States\n",
|
||||
"- Founded and led a healthtech startup aimed at improving patient adherence\n",
|
||||
"to treatment plans through an innovative engagement and retention tool\n",
|
||||
"- Piloted the product with healthcare providers and patients, gathering critical\n",
|
||||
"insights to refine functionality and enhance user experience\n",
|
||||
"- Secured funding through National Science Foundation I-CORPS Grant and\n",
|
||||
"UNH Entrepreneurship Center Seed Grant\n",
|
||||
"Education\n",
|
||||
"Y Combinator\n",
|
||||
"S22\n",
|
||||
"University of New Hampshire\n",
|
||||
"Economics and Philosophy\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Use o1-preview to choose which jobs should be applied to based on the resume\n",
|
||||
"client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
|
||||
"\n",
|
||||
"prompt = f\"\"\"\n",
|
||||
"Please analyze the resume and job listings, and return a JSON list of the top 3 roles that best fit the candidate's experience and skills. Include only the job title, compensation, and apply link for each recommended role. The output should be a valid JSON array of objects in the following format, with no additional text:\n",
|
||||
"\n",
|
||||
"[\n",
|
||||
" {{\n",
|
||||
" \"job_title\": \"Job Title\",\n",
|
||||
" \"compensation\": \"Compensation (if available, otherwise empty string)\",\n",
|
||||
" \"apply_link\": \"Application URL\"\n",
|
||||
" }},\n",
|
||||
" ...\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"Based on the following resume:\n",
|
||||
"{resume_paste}\n",
|
||||
"\n",
|
||||
"And the following job listings:\n",
|
||||
"{json.dumps(extracted_data, indent=2)}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"completion = client.chat.completions.create(\n",
|
||||
" model=\"o1-preview\",\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": prompt\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"recommended_jobs = json.loads(completion.choices[0].message.content.strip())\n",
|
||||
"\n",
|
||||
"print(\"Recommended jobs:\")\n",
|
||||
"print(json.dumps(recommended_jobs, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# scrape each of the apply links with firecrawl /v1/scrape\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
|
||||
"\n",
|
||||
"def scrape_apply_link(url):\n",
|
||||
" api_url = \"https://api.firecrawl.dev/v1/scrape\"\n",
|
||||
" headers = {\n",
|
||||
" \"Authorization\": f\"Bearer {firecrawl_api_key}\",\n",
|
||||
" \"Content-Type\": \"application/json\"\n",
|
||||
" }\n",
|
||||
" payload = {\n",
|
||||
" \"url\": url\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" response = requests.post(api_url, json=payload, headers=headers)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" return response.json()\n",
|
||||
" else:\n",
|
||||
" print(f\"Error scraping {url}: {response.status_code}\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"scraped_job_data = []\n",
|
||||
"for job in recommended_jobs:\n",
|
||||
" apply_link = job.get('apply_link')\n",
|
||||
" if apply_link:\n",
|
||||
" scraped_data = scrape_apply_link(apply_link)\n",
|
||||
" if scraped_data:\n",
|
||||
" scraped_job_data.append({\n",
|
||||
" 'job_title': job['job_title'],\n",
|
||||
" 'compensation': job['compensation'],\n",
|
||||
" 'apply_link': apply_link,\n",
|
||||
" 'scraped_content': scraped_data\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
"print(f\"Scraped {len(scraped_job_data)} job application pages\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# use o1 to write the application for you and return in json\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def generate_application(job_data, resume_paste):\n",
|
||||
" # Extract relevant information from scraped content\n",
|
||||
" scraped_text = job_data['scraped_content'].get('text', '')\n",
|
||||
" \n",
|
||||
" prompt = f\"\"\"\n",
|
||||
" Based on the following job information, scraped content from the application page, and the provided resume, write a tailored job application:\n",
|
||||
"\n",
|
||||
" Job Title: {job_data['job_title']}\n",
|
||||
" Compensation: {job_data['compensation']}\n",
|
||||
" Scraped Content: {scraped_text[:1000]} # Limit to first 1000 characters to avoid token limits\n",
|
||||
"\n",
|
||||
" Resume:\n",
|
||||
" {resume_paste}\n",
|
||||
"\n",
|
||||
" Please format the application as a JSON object with the following fields:\n",
|
||||
" - cover_letter: A personalized cover letter addressing key points from the scraped content and highlighting relevant experience from the resume\n",
|
||||
" - resume_highlights: Key points from the resume that align with the job requirements mentioned in the scraped content\n",
|
||||
" - questions: Any questions you have about the position, derived from the available information\n",
|
||||
"\n",
|
||||
" Ensure the content is specifically tailored to the information provided in the scraped content and leverages the experience detailed in the resume.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" completion = client.chat.completions.create(\n",
|
||||
" model=\"o1-preview\",\n",
|
||||
" messages=[\n",
|
||||
" \n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return json.loads(completion.choices[0].message.content)\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error generating application: {str(e)}\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"applications = []\n",
|
||||
"for job in scraped_job_data:\n",
|
||||
" application = generate_application(job, resume_paste)\n",
|
||||
" if application:\n",
|
||||
" applications.append({\n",
|
||||
" \"job_title\": job[\"job_title\"],\n",
|
||||
" \"apply_link\": job[\"apply_link\"],\n",
|
||||
" \"application\": application\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
"print(f\"Generated {len(applications)} job applications based on scraped content and resume\")\n",
|
||||
"print(json.dumps(applications, indent=2))\n",
|
||||
"\n",
|
||||
"# Save the JSON to a file\n",
|
||||
"output_file = \"generated_applications.json\"\n",
|
||||
"with open(output_file, \"w\") as f:\n",
|
||||
" json.dump(applications, f, indent=2)\n",
|
||||
"\n",
|
||||
"print(f\"Saved generated applications to {output_file}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue
Block a user