diff --git a/.gitignore b/.gitignore index 240d6937..dcfd499a 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ apps/js-sdk/firecrawl/dist /examples/o1_web_crawler/firecrawl_env /examples/crm_lead_enrichment/crm_lead_enrichment_env +/.venv diff --git a/examples/openai_swarm_firecrawl_web_extractor/.env.example b/examples/openai_swarm_firecrawl_web_extractor/.env.example new file mode 100644 index 00000000..9385c44f --- /dev/null +++ b/examples/openai_swarm_firecrawl_web_extractor/.env.example @@ -0,0 +1,3 @@ +OPENAI_API_KEY= +FIRECRAWL_API_KEY= +SERP_API_KEY= \ No newline at end of file diff --git a/examples/openai_swarm_firecrawl_web_extractor/README.md b/examples/openai_swarm_firecrawl_web_extractor/README.md new file mode 100644 index 00000000..b256ae7d --- /dev/null +++ b/examples/openai_swarm_firecrawl_web_extractor/README.md @@ -0,0 +1,37 @@ +# Swarm Firecrawl Marketing Agent + +A multi-agent system using [OpenAI Swarm](https://github.com/openai/swarm) for AI-powered marketing strategies using [Firecrawl](https://firecrawl.dev) for web scraping. + +## Agents + +1. User Interface: Manages user interactions +2. Website Scraper: Extracts clean LLM-ready content via Firecrawl API +3. Analyst: Provides marketing insights +4. Campaign Idea: Generates marketing campaign concepts +5. Copywriter: Creates compelling marketing copy + +## Requirements + +- [Firecrawl](https://firecrawl.dev) API key +- [OpenAI](https://platform.openai.com/api-keys) API key + +## Setup + +1. Install the required packages: + ``` + pip install -r requirements.txt + ``` + +2. Set up your environment variables in a `.env` file: + ``` + OPENAI_API_KEY=your_openai_api_key + FIRECRAWL_API_KEY=your_firecrawl_api_key + ``` + +## Usage + +Run the main script to start the interactive demo: + +``` +python main.py +``` \ No newline at end of file diff --git a/examples/openai_swarm_firecrawl_web_extractor/main.py b/examples/openai_swarm_firecrawl_web_extractor/main.py new file mode 100644 index 00000000..b96bf5ac --- /dev/null +++ b/examples/openai_swarm_firecrawl_web_extractor/main.py @@ -0,0 +1,120 @@ +import os +from firecrawl import FirecrawlApp +from swarm import Agent +from swarm.repl import run_demo_loop +import dotenv +from serpapi import GoogleSearch +from openai import OpenAI + +dotenv.load_dotenv() + +# Initialize FirecrawlApp and OpenAI +app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY")) +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +def search_google(query, objective): + """Search Google using SerpAPI.""" + print(f"Parameters: query={query}, objective={objective}") + search = GoogleSearch({"q": query, "api_key": os.getenv("SERP_API_KEY")}) + results = search.get_dict().get("organic_results", []) + return {"objective": objective, "results": results} + +def map_url_pages(url, objective): + """Map a website's pages using Firecrawl.""" + + search_query = generate_completion( + "website search query generator", + f"Generate a 1-2 word search query for the website: {url} based on the objective", + "Objective: " + objective + ) + print(f"Parameters: url={url}, objective={objective}, search_query={search_query}") + map_status = app.map_url(url, params={'search': search_query}) + if map_status.get('status') == 'success': + links = map_status.get('links', []) + top_link = links[0] if links else None + return {"objective": objective, "results": [top_link] if top_link else []} + else: + return {"objective": objective, "results": []} + +def scrape_url(url, objective): + """Scrape a website using Firecrawl.""" + print(f"Parameters: url={url}, objective={objective}") + scrape_status = app.scrape_url( + url, + params={'formats': ['markdown']} + ) + return {"objective": objective, "results": scrape_status} + +def analyze_website_content(content, objective): + """Analyze the scraped website content using OpenAI.""" + print(f"Parameters: content={content[:50]}..., objective={objective}") + analysis = generate_completion( + "website data extractor", + f"Analyze the following website content and extract a JSON object based on the objective.", + "Objective: " + objective + "\nContent: " + content + ) + return {"objective": objective, "results": analysis} + +def generate_completion(role, task, content): + """Generate a completion using OpenAI.""" + print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...") + response = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": f"You are a {role}. {task}"}, + {"role": "user", "content": content} + ] + ) + return response.choices[0].message.content + +def handoff_to_search_google(): + """Hand off the search query to the search google agent.""" + return google_search_agent + +def handoff_to_map_url(): + """Hand off the url to the map url agent.""" + return map_url_agent + +def handoff_to_website_scraper(): + """Hand off the url to the website scraper agent.""" + return website_scraper_agent + +def handoff_to_analyst(): + """Hand off the website content to the analyst agent.""" + return analyst_agent + + + +user_interface_agent = Agent( + name="User Interface Agent", + instructions="You are a user interface agent that handles all interactions with the user. You need to always start with an web data extraction objective that the user wants to achieve by searching the web, mapping the web pages, and extracting the content from a specific page. Be concise.", + functions=[handoff_to_search_google], +) + +google_search_agent = Agent( + name="Google Search Agent", + instructions="You are a google search agent specialized in searching the web. Only search for the website not any specific page. When you are done, you must hand off to the map agent.", + functions=[search_google, handoff_to_map_url], +) + +map_url_agent = Agent( + name="Map URL Agent", + instructions="You are a map url agent specialized in mapping the web pages. When you are done, you must hand off the results to the website scraper agent.", + functions=[map_url_pages, handoff_to_website_scraper], +) + +website_scraper_agent = Agent( + name="Website Scraper Agent", + instructions="You are a website scraper agent specialized in scraping website content. When you are done, you must hand off the website content to the analyst agent to extract the data based on the objective.", + functions=[scrape_url, handoff_to_analyst], +) + +analyst_agent = Agent( + name="Analyst Agent", + instructions="You are an analyst agent that examines website content and returns a JSON object. When you are done, you must return a JSON object.", + functions=[analyze_website_content], +) + +if __name__ == "__main__": + # Run the demo loop with the user interface agent + run_demo_loop(user_interface_agent, stream=True) \ No newline at end of file diff --git a/examples/openai_swarm_firecrawl_web_extractor/requirements.txt b/examples/openai_swarm_firecrawl_web_extractor/requirements.txt new file mode 100644 index 00000000..60bc9b84 --- /dev/null +++ b/examples/openai_swarm_firecrawl_web_extractor/requirements.txt @@ -0,0 +1,3 @@ +firecrawl-py +openai +serpapi \ No newline at end of file