mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Create o1_job_recommender.py
This commit is contained in:
parent
51bc2f25fe
commit
5c4d436f1e
283
examples/o1_job_recommender/o1_job_recommender.py
Normal file
283
examples/o1_job_recommender/o1_job_recommender.py
Normal file
|
@ -0,0 +1,283 @@
|
|||
# %%
|
||||
# %%
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from openai import OpenAI
|
||||
|
||||
# ANSI color codes
|
||||
class Colors:
|
||||
CYAN = '\033[96m'
|
||||
YELLOW = '\033[93m'
|
||||
GREEN = '\033[92m'
|
||||
RED = '\033[91m'
|
||||
MAGENTA = '\033[95m'
|
||||
BLUE = '\033[94m'
|
||||
RESET = '\033[0m'
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the FirecrawlApp with your API key
|
||||
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
|
||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
# Set the jobs page URL
|
||||
jobs_page_url = "https://openai.com/careers/search"
|
||||
|
||||
# Resume
|
||||
resume_paste = """"
|
||||
Eric Ciarla
|
||||
Co-Founder @ Firecrawl
|
||||
San Francisco, California, United States
|
||||
Summary
|
||||
Building…
|
||||
Experience
|
||||
Firecrawl
|
||||
Co-Founder
|
||||
April 2024 - Present (6 months)
|
||||
San Francisco, California, United States
|
||||
Firecrawl by Mendable. Building data extraction infrastructure for AI. Used by
|
||||
Amazon, Zapier, and Nvidia (YC S22)
|
||||
Mendable
|
||||
2 years 7 months
|
||||
Co-Founder @ Mendable.ai
|
||||
March 2022 - Present (2 years 7 months)
|
||||
San Francisco, California, United States
|
||||
- Built an AI powered search platform that that served millions of queries for
|
||||
hundreds of customers (YC S22)
|
||||
- We were one of the first LLM powered apps adopted by industry leaders like
|
||||
Coinbase, Snap, DoorDash, and MongoDB
|
||||
Co-Founder @ SideGuide
|
||||
March 2022 - Present (2 years 7 months)
|
||||
San Francisco, California, United States
|
||||
- Built and scaled an online course platform with a community of over 50,000
|
||||
developers
|
||||
- Selected for Y Combinator S22 batch, 2% acceptance rate
|
||||
Fracta
|
||||
Data Engineer
|
||||
2022 - 2022 (less than a year)
|
||||
Palo Alto, California, United States
|
||||
- Demoed tool during sales calls and provided technical support during the
|
||||
entire customer lifecycle
|
||||
Page 1 of 2
|
||||
- Mined, wrangled, & visualized geospatial and water utility data for predictive
|
||||
analytics & ML workflows (Python, QGIS)
|
||||
Ford Motor Company
|
||||
Data Scientist
|
||||
2021 - 2021 (less than a year)
|
||||
Dearborn, Michigan, United States
|
||||
- Extracted, cleaned, and joined data from multiple sources using SQL,
|
||||
Hadoop, and Alteryx
|
||||
- Used Bayesian Network Structure Learning (BNLearn, R) to uncover the
|
||||
relationships between survey free response verbatim topics (derived from
|
||||
natural language processing models) and numerical customer experience
|
||||
scores
|
||||
MDRemindME
|
||||
Co-Founder
|
||||
2018 - 2020 (2 years)
|
||||
Durham, New Hampshire, United States
|
||||
- Founded and led a healthtech startup aimed at improving patient adherence
|
||||
to treatment plans through an innovative engagement and retention tool
|
||||
- Piloted the product with healthcare providers and patients, gathering critical
|
||||
insights to refine functionality and enhance user experience
|
||||
- Secured funding through National Science Foundation I-CORPS Grant and
|
||||
UNH Entrepreneurship Center Seed Grant
|
||||
Education
|
||||
Y Combinator
|
||||
S22
|
||||
University of New Hampshire
|
||||
Economics and Philosophy
|
||||
"""
|
||||
|
||||
# First, scrape the jobs page using Firecrawl
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.firecrawl.dev/v1/scrape",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {firecrawl_api_key}"
|
||||
},
|
||||
json={
|
||||
"url": jobs_page_url,
|
||||
"formats": ["markdown"]
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get('success'):
|
||||
html_content = result['data']['markdown']
|
||||
# Define the O1 prompt for extracting apply links
|
||||
prompt = f"""
|
||||
Extract up to 30 job application links from the given markdown content.
|
||||
Return the result as a JSON object with a single key 'apply_links' containing an array of strings (the links).
|
||||
The output should be a valid JSON object, with no additional text.
|
||||
Do not include any JSON markdown formatting or code block indicators.
|
||||
Provide only the raw JSON object as the response.
|
||||
|
||||
Example of the expected format:
|
||||
{{"apply_links": ["https://example.com/job1", "https://example.com/job2", ...]}}
|
||||
|
||||
Markdown content:
|
||||
{html_content[:100000]}
|
||||
"""
|
||||
print(f"{Colors.GREEN}Successfully scraped the jobs page{Colors.RESET}")
|
||||
else:
|
||||
print(f"{Colors.RED}Failed to scrape the jobs page: {result.get('message', 'Unknown error')}{Colors.RESET}")
|
||||
html_content = ""
|
||||
else:
|
||||
print(f"{Colors.RED}Error {response.status_code}: {response.text}{Colors.RESET}")
|
||||
html_content = ""
|
||||
except requests.RequestException as e:
|
||||
print(f"{Colors.RED}An error occurred while scraping: {str(e)}{Colors.RESET}")
|
||||
html_content = ""
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"{Colors.RED}Error decoding JSON response: {str(e)}{Colors.RESET}")
|
||||
html_content = ""
|
||||
except Exception as e:
|
||||
print(f"{Colors.RED}An unexpected error occurred while scraping: {str(e)}{Colors.RESET}")
|
||||
html_content = ""
|
||||
|
||||
# Extract apply links from the scraped HTML using O1
|
||||
apply_links = []
|
||||
if html_content:
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
if completion.choices:
|
||||
print(completion.choices[0].message.content)
|
||||
result = json.loads(completion.choices[0].message.content.strip())
|
||||
|
||||
apply_links = result['apply_links']
|
||||
print(f"{Colors.GREEN}Successfully extracted {len(apply_links)} apply links{Colors.RESET}")
|
||||
else:
|
||||
print(f"{Colors.RED}No apply links extracted{Colors.RESET}")
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"{Colors.RED}Error decoding JSON from OpenAI response: {str(e)}{Colors.RESET}")
|
||||
except KeyError as e:
|
||||
print(f"{Colors.RED}Expected key not found in OpenAI response: {str(e)}{Colors.RESET}")
|
||||
except Exception as e:
|
||||
print(f"{Colors.RED}An unexpected error occurred during extraction: {str(e)}{Colors.RESET}")
|
||||
else:
|
||||
print(f"{Colors.RED}No HTML content to process{Colors.RESET}")
|
||||
|
||||
# Initialize a list to store the extracted data
|
||||
extracted_data = []
|
||||
|
||||
|
||||
# %%
|
||||
print(f"{Colors.CYAN}Apply links:{Colors.RESET}")
|
||||
for link in apply_links:
|
||||
print(f"{Colors.YELLOW}{link}{Colors.RESET}")
|
||||
|
||||
# %%
|
||||
# Process each apply link
|
||||
for index, link in enumerate(apply_links):
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.firecrawl.dev/v1/scrape",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {firecrawl_api_key}"
|
||||
},
|
||||
json={
|
||||
"url": link,
|
||||
"formats": ["extract"],
|
||||
"actions": [{
|
||||
"type": "click",
|
||||
"selector": "#job-overview"
|
||||
}],
|
||||
"extract": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"job_title": {"type": "string"},
|
||||
"sub_division_of_organization": {"type": "string"},
|
||||
"key_skills": {"type": "array", "items": {"type": "string"}},
|
||||
"compensation": {"type": "string"},
|
||||
"location": {"type": "string"},
|
||||
"apply_link": {"type": "string"}
|
||||
},
|
||||
"required": ["job_title", "sub_division_of_organization", "key_skills", "compensation", "location", "apply_link"]
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get('success'):
|
||||
extracted_data.append(result['data']['extract'])
|
||||
print(f"{Colors.GREEN}Data extracted for job {index}{Colors.RESET}")
|
||||
else:
|
||||
print(f"")
|
||||
else:
|
||||
print(f"")
|
||||
except Exception as e:
|
||||
print(f"")
|
||||
|
||||
|
||||
# %%
|
||||
# %%
|
||||
# Print the extracted data
|
||||
print(f"{Colors.CYAN}Extracted data:{Colors.RESET}")
|
||||
for job in extracted_data:
|
||||
print(json.dumps(job, indent=2))
|
||||
print(f"{Colors.MAGENTA}{'-' * 50}{Colors.RESET}")
|
||||
|
||||
|
||||
# %%
|
||||
|
||||
|
||||
|
||||
|
||||
# Use o1-preview to choose which jobs should be applied to based on the resume
|
||||
prompt = f"""
|
||||
Please analyze the resume and job listings, and return a JSON list of the top 3 roles that best fit the candidate's experience and skills. Include only the job title, compensation, and apply link for each recommended role. The output should be a valid JSON array of objects in the following format, with no additional text:
|
||||
|
||||
[
|
||||
{{
|
||||
"job_title": "Job Title",
|
||||
"compensation": "Compensation (if available, otherwise empty string)",
|
||||
"apply_link": "Application URL"
|
||||
}},
|
||||
...
|
||||
]
|
||||
|
||||
Based on the following resume:
|
||||
{resume_paste}
|
||||
|
||||
And the following job listings:
|
||||
{json.dumps(extracted_data, indent=2)}
|
||||
"""
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="o1-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
recommended_jobs = json.loads(completion.choices[0].message.content.strip())
|
||||
|
||||
print(f"{Colors.CYAN}Recommended jobs:{Colors.RESET}")
|
||||
print(json.dumps(recommended_jobs, indent=2))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user