import matplotlib.pyplot as plt from collections import Counter def frequency(input_file: str): data = [] frequencies = {} # Step 1: Read the CSV file manually with open(input_file, 'r') as file: lines = file.readlines() # Step 2: Parse the first line to get the headers headers = lines[0].strip().split(';') # Step 3: Parse the subsequent lines to get the data for line in lines[1:]: values = line.strip().split(';') row_dict = {headers[i]: values[i] for i in range(len(headers))} data.append(row_dict) # Step 4: Compute frequencies for each discrete variable discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style', 'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold', 'Sale Type', 'Sale Condition'] for var in discrete_variables: var_values = [row[var] for row in data] frequencies[var] = Counter(var_values) # Step 5: Print the frequency counts to the console for var, freq_dict in frequencies.items(): print(f'Frequencies for {var}:') for value, count in freq_dict.items(): print(f'{value}: {count}') print() # Blank line for readability def plot_frequency(input_file: str): data = [] frequencies = {} # Step 1: Read the CSV file manually with open(input_file, 'r') as file: lines = file.readlines() # Step 2: Parse the first line to get the headers headers = lines[0].strip().split(';') # Step 3: Parse the subsequent lines to get the data for line in lines[1:]: values = line.strip().split(';') row_dict = {headers[i]: values[i] for i in range(len(headers))} data.append(row_dict) # Step 4: Compute frequencies for each discrete variable discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style', 'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold', 'Sale Type', 'Sale Condition'] for var in discrete_variables: var_values = [row[var] for row in data] frequencies[var] = Counter(var_values) # Step 5: Plot the frequencies using bar charts and pie charts for var, freq_dict in frequencies.items(): labels = list(freq_dict.keys()) counts = list(freq_dict.values()) # Bar chart plt.figure(figsize=(10, 6)) plt.bar(labels, counts, color='skyblue') plt.title(f'Bar Chart for {var}') plt.xlabel(var) plt.ylabel('Frequency') plt.xticks(rotation=45, ha='right') plt.tight_layout() plt.show() # Pie chart plt.figure(figsize=(8, 8)) plt.pie(counts, labels=labels, autopct='%1.1f%%', colors=plt.cm.Paired.colors) plt.title(f'Pie Chart for {var}') plt.show() def plot_histogram(input_file: str): data = [] # Step 1: Read the CSV file manually with open(input_file, 'r') as file: lines = file.readlines() # Step 2: Parse the first line to get the headers headers = lines[0].strip().split(';') # Step 3: Parse the subsequent lines to get the data for line in lines[1:]: values = line.strip().split(';') row_dict = {headers[i]: values[i] for i in range(len(headers))} data.append(row_dict) # Step 4: Extract the "SalePrice" variable and convert it to numeric values sale_prices = [int(row['SalePrice']) for row in data] # Step 5: Plot the histogram for the "SalePrice" variable plt.figure(figsize=(10, 6)) plt.hist(sale_prices, bins=20, color='skyblue', edgecolor='black') plt.title('Histogram of SalePrice') plt.xlabel('SalePrice') plt.ylabel('Frequency') plt.grid(axis='y', alpha=0.75) plt.tight_layout() plt.show() # Example usage #frequency('AmesHousing.csv') # Example usage 2 #plot_frequency('AmesHousing.csv') # Example usage 3 plot_histogram('AmesHousing.csv')