Website Sitemap

This is the live Blue website sitemap with URL references:
Python Code

This is the Python code to generate the sitemap:
import requests
from bs4 import BeautifulSoup

def download_sitemap(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.text

def parse_sitemap(sitemap_xml, base_domain):
    soup = BeautifulSoup(sitemap_xml, 'xml')
    urls = soup.find_all('loc')
    page_structure = {base_domain: {}}  # Initialize with base domain

    def insert_into_structure(structure, parts):
        if len(parts) == 0:  # Base case: No more parts
            return
        if parts[0] not in structure:
            structure[parts[0]] = {}  # Initialize a new sub-structure
        insert_into_structure(structure[parts[0]], parts[1:])  # Recursive call for the next part

    for url in urls:
        url_text = url.text.strip()
        url_parts = url_text.replace('https://www.', '').replace('http://www.', '').replace('https://', '').replace('http://', '').split('/')[1:]  # Split path
        insert_into_structure(page_structure[base_domain], url_parts)

    return page_structure

def capitalize_first_letter(text):
    """Capitalizes the first letter of each segment in a URL path."""
    return '/'.join(segment.capitalize() for segment in text.split('/'))

def generate_markdown(structure, base_url, parent_path='', output_file='output.md', prefix=''):
    lines = []
    for page, subpages in structure.items():
        # Adjust for whether this is the base domain or a subpage
        if page != base_url:
            page_path = f"{parent_path}/{page}" if parent_path else page
            page_display = capitalize_first_letter(page)
            full_url = f"https://{base_url}/{page_path}"
        else:
            page_display = base_url.capitalize()
            full_url = f"https://{base_url}"
            page_path = ''

        line = f"{prefix}- [{page_display}]({full_url})"
        lines.append(line)
        
        if subpages:  # If there are subpages, recursively generate their lines with increased indentation
            lines.extend(generate_markdown(subpages, base_url, page_path, output_file=None, prefix=prefix + "    "))
    
    if output_file:
        with open(output_file, 'w') as f:
            f.write('\n'.join(lines))
    else:
        return lines

base_domain = 'blue.cc'
sitemap_url = f'https://{base_domain}/sitemap.xml'  # Your sitemap URL
sitemap_xml = download_sitemap(sitemap_url)
page_structure = parse_sitemap(sitemap_xml, base_domain)
generate_markdown(page_structure, base_domain)

print(f"Markdown file generated.")
PreviousWebsite NextSEO
Last updated 1 year ago
Was this helpful?