1
0
intelektikos-pagrindai/cleanup_data.py
2024-02-25 23:05:52 +02:00

28 lines
909 B
Python

import pandas as pd
def to_categorical(column, subdivisions):
min_value = column.min()
max_value = column.max()
return ((column - min_value) / (max_value - min_value) * subdivisions).round(0)
def normalize_column(column):
min_value = min(column)
max_value = max(column)
return (column-min_value)/(max_value-min_value)
apples = pd.read_csv("apple_quality.csv")
del apples['A_id']
apples['Quality'] = apples['Quality'].map({'good':1, 'bad':0})
apples['Juiciness_categorical'] = to_categorical(apples['Juiciness'], 10)
apples['Ripeness_categorical'] = to_categorical(apples['Ripeness'], 10)
normalized_apples = pd.DataFrame()
for column_name in apples.drop(columns=["Quality"]):
normalized_apples[column_name] = normalize_column(apples[column_name])
normalized_apples["Quality"] = apples["Quality"]
normalized_apples.to_csv("apple_quality_clean.csv", index=False)