2024-11-18 10:59:21 +00:00
|
|
|
import pandas as pd
|
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
# Constants
|
|
|
|
FILE_PATH = 'ps (30).xlsx'
|
|
|
|
SHEET_NAME = 'ps (30)'
|
|
|
|
COLUMN_NAME = 'Student-ID'
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
def find_duplicates(file_path, sheet_name, column_name):
|
|
|
|
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
|
|
|
duplicate_ids = df[df.duplicated(subset=[column_name], keep=False)][column_name]
|
|
|
|
unique_duplicate_ids = duplicate_ids.drop_duplicates()
|
|
|
|
return unique_duplicate_ids
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
def main():
|
|
|
|
unique_duplicate_ids = find_duplicates(FILE_PATH, SHEET_NAME, COLUMN_NAME)
|
|
|
|
num_duplicates = len(unique_duplicate_ids)
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
if not unique_duplicate_ids.empty:
|
|
|
|
print(f"Duplicated {COLUMN_NAME} values (count: {num_duplicates}):")
|
|
|
|
print(unique_duplicate_ids)
|
|
|
|
else:
|
|
|
|
print("No duplicates found.")
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|