Update app.py

2024-11-16 11:42:24 +08:00 · 2024-10-20 18:08:38 +05:30 · 2024-10-20 18:08:38 +05:30 · d113199a29
commit d113199a29
parent 8a4ee4482d
1 changed files with 58 additions and 34 deletions
--- a/examples/sales_web_crawler/app.py
+++ b/examples/sales_web_crawler/app.py
@ -1,13 +1,13 @@
 import csv
 import json
 import os
-import uuid

 from dotenv import load_dotenv
 from firecrawl import FirecrawlApp
 from openai import OpenAI
 from serpapi import GoogleSearch
-from tqdm import tqdm
+from swarm import Agent
+from swarm.repl import run_demo_loop

 load_dotenv()

@ -17,14 +17,14 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

 def search_google(query, objective):
    """Search Google using SerpAPI."""
-    # print(f"Parameters: query={query}, objective={objective}")
+    print(f"Parameters: query={query}, objective={objective}")
    search = GoogleSearch({"q": query, "api_key": os.getenv("SERP_API_KEY")})
    results = search.get_dict().get("organic_results", [])
    return {"objective": objective, "results": results}

 def scrape_url(url, objective):
    """Scrape a website using Firecrawl."""
-    # print(f"Parameters: url={url}, objective={objective}")
+    print(f"Parameters: url={url}, objective={objective}")
    scrape_status = app.scrape_url(
        url,
        params={'formats': ['markdown']}
@ -33,29 +33,29 @@ def scrape_url(url, objective):

 def crawl_url(url, objective):
    """Crawl a website using Firecrawl."""
-    # print(f"Parameters: url={url}, objective={objective}")
+    print(f"Parameters: url={url}, objective={objective}")
    # If using a crawled url set, pass the ID in the function call below
    # scrape_status = app.check_crawl_status("c99c9598-5a21-46d3-bced-3444a8b1942d")
    # scrape_status['results'] = scrape_status['data']
    scrape_status = app.crawl_url(
        url,
-        params={'limit': 5, 'scrapeOptions': {'formats': ['markdown']}}
+        params={'limit': 10, 'scrapeOptions': {'formats': ['markdown']}}
    )
    return {"objective": objective, "results": scrape_status}

 def analyze_website_content(content, objective):
    """Analyze the scraped website content using OpenAI."""
-    # print(f"Parameters: content={content[:50]}..., objective={objective}")
+    print(f"Parameters: content={content[:50]}..., objective={objective}")
    analysis = generate_completion(
        "website data extractor",
        f"Analyze the following website content and extract a JSON object based on the objective. Do not write the ```json and ``` to denote a JSON when returning a response",
        "Objective: " + objective + "\nContent: " + content
    )
-    return {"objective": objective, "results": analysis}
+    return {"objective": objective, "results": json.loads(analysis)}

 def generate_completion(role, task, content):
    """Generate a completion using OpenAI."""
-    # print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...")
+    print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...")
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
@ -76,31 +76,55 @@ def read_websites_from_csv(file_path):

 def write_results_to_json(results, file_path):
    """Write results to a JSON file."""
-    with open(file_path, mode='w') as file:
-        json.dump(results, file, indent=4)
+    with open(file_path, mode='w', encoding='utf-8') as file:
+        json.dump(json.loads(results), file, ensure_ascii=False)

-def process_websites(file_path):
-    """Process websites from a CSV file and write results to a new JSON file."""
-    results = []
-    websites = read_websites_from_csv(file_path)
-    for website in websites:
-        search_results = search_google(website, "Search website")
-        if search_results['results']:
-            top_result = search_results['results'][0]
-            url = top_result['link']
-            unique_filename = f'output_{uuid.uuid4()}.json'
-            crawl_results = crawl_url(url, "Crawl website")
-            if crawl_results['results']:
-                for each_result in tqdm(crawl_results['results']['data'], desc="Analyzing crawl results"):
-                    analysis_results = analyze_website_content(each_result['markdown'], "Extract emails, names, and titles of the people and companies found.")
-                    try:
-                        result = json.loads(analysis_results['results'])
-                        if result:
-                            results.append(result)
-                            write_results_to_json(results, unique_filename)
-                    except:
-                        continue
+def handoff_to_search_google():
+    """Hand off the search query to the search google agent."""
+    return google_search_agent
+
+def handoff_to_map_url():
+    """Hand off the url to the map url agent."""
+    return crawl_website_agent
+
+def handoff_to_analyst():
+    """Hand off the website content to the analyst agent."""
+    return analyst_agent
+
+def handoff_to_writer():
+    """Hand off the results to the writer agent."""
+    return writer_agent
+
+user_interface_agent = Agent(
+    name="User Interface Agent",
+    instructions="You are a user interface agent that handles all interactions with the user. You need to always start with reading a CSV, then perform web data extraction objective that the user wants to achieve by searching the web, crawling the website URL, and extracting the content from a specific page. Be concise.",
+    functions=[read_websites_from_csv, handoff_to_search_google],
+)
+
+google_search_agent = Agent(
+    name="Google Search Agent",
+    instructions="You are a google search agent specialized in searching the web. Only search for the website not any specific page. When you are done, you must hand off to the crawl agent.",
+    functions=[search_google, handoff_to_map_url],
+)
+
+crawl_website_agent = Agent(
+    name="Crawl Website Agent",
+    instructions="You are a crawl url agent specialized in crawling the web pages. When you are done, you must hand off the results to the analyst agent.",
+    functions=[crawl_url, handoff_to_analyst],
+)
+
+analyst_agent = Agent(
+    name="Analyst Agent",
+    instructions="You are an analyst agent that examines website content and returns a JSON object. When you are done, you must hand off the results to the writer agent.",
+    functions=[analyze_website_content, handoff_to_writer],
+)
+
+writer_agent = Agent(
+    name="Writer Agent",
+    instructions="You are a writer agent that writes the final results to a JSON file.",
+    functions=[write_results_to_json],
+)

 if __name__ == "__main__":
-    # Process websites from the CSV file
-    process_websites('websites.csv')
+    # Run the demo loop with the user interface agent
+    run_demo_loop(user_interface_agent, stream=True)