import os import json from firecrawl import FirecrawlApp from dotenv import load_dotenv from openai import OpenAI # Load environment variables load_dotenv() # Retrieve API keys from environment variables firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY") # Initialize the FirecrawlApp and set OpenAI API key app = FirecrawlApp(api_key=firecrawl_api_key) client = OpenAI(api_key=openai_api_key) def main(): # Get user input blog_url = input("Enter the blog URL: ") if not blog_url.strip(): blog_url = "https://www.firecrawl.dev/blog/how-to-use-openai-o1-reasoning-models-in-applications" # Scrape the blog content print("Scraping the blog content...") blog_scrape_result = app.scrape_url(blog_url, params={'formats': ['markdown']}) # Get the blog content in markdown format blog_content = blog_scrape_result.get('markdown', '') # Turn the blog URL into a top-level domain top_level_domain = '/'.join(blog_url.split('/')[:3]) # Map the website to get all links print("Mapping the website to get all links...") site_map = app.map_url(top_level_domain) # Get the list of URLs from the site map site_links = site_map.get('links', []) prompt = f""" You are an AI assistant helping to improve a blog post. Here is the original blog post content: {blog_content} Here is a list of other pages on the website: {json.dumps(site_links, indent=2)} Please revise the blog post to include internal links to some of these pages where appropriate. Make sure the internal links are relevant and enhance the content. Only return the revised blog post in markdown format. """ import re # Function to count links in a markdown content def count_links(markdown_content): return len(re.findall(r'\[.*?\]\(.*?\)', markdown_content)) # Use OpenAI API to get the revised blog post print("Generating the revised blog post with internal links...") completion = client.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": prompt } ], prediction={ "type": "content", "content": blog_content } ); revised_blog_post = completion.choices[0].message.content # Count links in the original and revised blog post original_links_count = count_links(blog_content) revised_links_count = count_links(revised_blog_post) # Output a portion of the revised blog post and link counts print("\nRevised blog post (first 500 characters):") print(revised_blog_post[:500]) print(f"\nNumber of links in the original blog post: {original_links_count}") print(f"Number of links in the revised blog post: {revised_links_count}") if __name__ == "__main__": main()