Customer Logos
import pandas as pd
# Load the CSV file
file_path = 'path_to_your_csv_file.csv' # Replace with your file path
data = pd.read_csv(file_path)
# List of possible column names for email
email_column_variations = ['Email', 'e-mail', 'E-mail', 'EMAIL', 'E-MAIL', 'emails', 'Emails', 'E-mails', 'E-MAILS']
# Find the actual column name used in the CSV
email_column = None
for col in email_column_variations:
if col in data.columns:
email_column = col
break
# Raise error if no email column is found
if not email_column:
raise ValueError("The CSV file does not have an 'Email' column or any variation of it.")
# Extract domain from each email address
data['Domain'] = data[email_column].str.extract(r'@([\w\.-]+)')
# Identify unique domains
unique_domains = data['Domain'].unique()
# Create a new DataFrame with unique domains
unique_domains_df = pd.DataFrame(unique_domains, columns=['Unique Domains'])
# Save to a new CSV file
output_file = 'unique_domains.csv'
unique_domains_df.to_csv(output_file, index=False)
print(f"Unique domains have been saved to {output_file}")Last updated
Was this helpful?

