Files
FH/WS24_25/PyCharm/pythonProject/P4/data.py
2024-10-29 14:11:37 +01:00

126 lines
3.9 KiB
Python

import matplotlib.pyplot as plt
from collections import Counter
def frequency(input_file: str):
data = []
frequencies = {}
# Step 1: Read the CSV file manually
with open(input_file, 'r') as file:
lines = file.readlines()
# Step 2: Parse the first line to get the headers
headers = lines[0].strip().split(';')
# Step 3: Parse the subsequent lines to get the data
for line in lines[1:]:
values = line.strip().split(';')
row_dict = {headers[i]: values[i] for i in range(len(headers))}
data.append(row_dict)
# Step 4: Compute frequencies for each discrete variable
discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style',
'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold',
'Sale Type', 'Sale Condition']
for var in discrete_variables:
var_values = [row[var] for row in data]
frequencies[var] = Counter(var_values)
# Step 5: Print the frequency counts to the console
for var, freq_dict in frequencies.items():
print(f'Frequencies for {var}:')
for value, count in freq_dict.items():
print(f'{value}: {count}')
print() # Blank line for readability
def plot_frequency(input_file: str):
data = []
frequencies = {}
# Step 1: Read the CSV file manually
with open(input_file, 'r') as file:
lines = file.readlines()
# Step 2: Parse the first line to get the headers
headers = lines[0].strip().split(';')
# Step 3: Parse the subsequent lines to get the data
for line in lines[1:]:
values = line.strip().split(';')
row_dict = {headers[i]: values[i] for i in range(len(headers))}
data.append(row_dict)
# Step 4: Compute frequencies for each discrete variable
discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style',
'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold',
'Sale Type', 'Sale Condition']
for var in discrete_variables:
var_values = [row[var] for row in data]
frequencies[var] = Counter(var_values)
# Step 5: Plot the frequencies using bar charts and pie charts
for var, freq_dict in frequencies.items():
labels = list(freq_dict.keys())
counts = list(freq_dict.values())
# Bar chart
plt.figure(figsize=(10, 6))
plt.bar(labels, counts, color='skyblue')
plt.title(f'Bar Chart for {var}')
plt.xlabel(var)
plt.ylabel('Frequency')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
# Pie chart
plt.figure(figsize=(8, 8))
plt.pie(counts, labels=labels, autopct='%1.1f%%', colors=plt.cm.Paired.colors)
plt.title(f'Pie Chart for {var}')
plt.show()
def plot_histogram(input_file: str):
data = []
# Step 1: Read the CSV file manually
with open(input_file, 'r') as file:
lines = file.readlines()
# Step 2: Parse the first line to get the headers
headers = lines[0].strip().split(';')
# Step 3: Parse the subsequent lines to get the data
for line in lines[1:]:
values = line.strip().split(';')
row_dict = {headers[i]: values[i] for i in range(len(headers))}
data.append(row_dict)
# Step 4: Extract the "SalePrice" variable and convert it to numeric values
sale_prices = [int(row['SalePrice']) for row in data]
# Step 5: Plot the histogram for the "SalePrice" variable
plt.figure(figsize=(10, 6))
plt.hist(sale_prices, bins=20, color='skyblue', edgecolor='black')
plt.title('Histogram of SalePrice')
plt.xlabel('SalePrice')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)
plt.tight_layout()
plt.show()
# Example usage
#frequency('AmesHousing.csv')
# Example usage 2
#plot_frequency('AmesHousing.csv')
# Example usage 3
plot_histogram('AmesHousing.csv')