a
This commit is contained in:
@@ -0,0 +1,125 @@
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import Counter
|
||||
|
||||
def frequency(input_file: str):
|
||||
data = []
|
||||
frequencies = {}
|
||||
|
||||
# Step 1: Read the CSV file manually
|
||||
with open(input_file, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
# Step 2: Parse the first line to get the headers
|
||||
headers = lines[0].strip().split(';')
|
||||
|
||||
# Step 3: Parse the subsequent lines to get the data
|
||||
for line in lines[1:]:
|
||||
values = line.strip().split(';')
|
||||
row_dict = {headers[i]: values[i] for i in range(len(headers))}
|
||||
data.append(row_dict)
|
||||
|
||||
# Step 4: Compute frequencies for each discrete variable
|
||||
discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style',
|
||||
'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold',
|
||||
'Sale Type', 'Sale Condition']
|
||||
|
||||
for var in discrete_variables:
|
||||
var_values = [row[var] for row in data]
|
||||
frequencies[var] = Counter(var_values)
|
||||
|
||||
# Step 5: Print the frequency counts to the console
|
||||
for var, freq_dict in frequencies.items():
|
||||
print(f'Frequencies for {var}:')
|
||||
for value, count in freq_dict.items():
|
||||
print(f'{value}: {count}')
|
||||
print() # Blank line for readability
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def plot_frequency(input_file: str):
|
||||
data = []
|
||||
frequencies = {}
|
||||
|
||||
# Step 1: Read the CSV file manually
|
||||
with open(input_file, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
# Step 2: Parse the first line to get the headers
|
||||
headers = lines[0].strip().split(';')
|
||||
|
||||
# Step 3: Parse the subsequent lines to get the data
|
||||
for line in lines[1:]:
|
||||
values = line.strip().split(';')
|
||||
row_dict = {headers[i]: values[i] for i in range(len(headers))}
|
||||
data.append(row_dict)
|
||||
|
||||
# Step 4: Compute frequencies for each discrete variable
|
||||
discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style',
|
||||
'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold',
|
||||
'Sale Type', 'Sale Condition']
|
||||
|
||||
for var in discrete_variables:
|
||||
var_values = [row[var] for row in data]
|
||||
frequencies[var] = Counter(var_values)
|
||||
|
||||
# Step 5: Plot the frequencies using bar charts and pie charts
|
||||
for var, freq_dict in frequencies.items():
|
||||
labels = list(freq_dict.keys())
|
||||
counts = list(freq_dict.values())
|
||||
|
||||
# Bar chart
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.bar(labels, counts, color='skyblue')
|
||||
plt.title(f'Bar Chart for {var}')
|
||||
plt.xlabel(var)
|
||||
plt.ylabel('Frequency')
|
||||
plt.xticks(rotation=45, ha='right')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
# Pie chart
|
||||
plt.figure(figsize=(8, 8))
|
||||
plt.pie(counts, labels=labels, autopct='%1.1f%%', colors=plt.cm.Paired.colors)
|
||||
plt.title(f'Pie Chart for {var}')
|
||||
plt.show()
|
||||
|
||||
def plot_histogram(input_file: str):
|
||||
data = []
|
||||
|
||||
# Step 1: Read the CSV file manually
|
||||
with open(input_file, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
# Step 2: Parse the first line to get the headers
|
||||
headers = lines[0].strip().split(';')
|
||||
|
||||
# Step 3: Parse the subsequent lines to get the data
|
||||
for line in lines[1:]:
|
||||
values = line.strip().split(';')
|
||||
row_dict = {headers[i]: values[i] for i in range(len(headers))}
|
||||
data.append(row_dict)
|
||||
|
||||
# Step 4: Extract the "SalePrice" variable and convert it to numeric values
|
||||
sale_prices = [int(row['SalePrice']) for row in data]
|
||||
|
||||
# Step 5: Plot the histogram for the "SalePrice" variable
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.hist(sale_prices, bins=20, color='skyblue', edgecolor='black')
|
||||
plt.title('Histogram of SalePrice')
|
||||
plt.xlabel('SalePrice')
|
||||
plt.ylabel('Frequency')
|
||||
plt.grid(axis='y', alpha=0.75)
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
# Example usage
|
||||
#frequency('AmesHousing.csv')
|
||||
|
||||
# Example usage 2
|
||||
#plot_frequency('AmesHousing.csv')
|
||||
|
||||
# Example usage 3
|
||||
plot_histogram('AmesHousing.csv')
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
def int_list(input_file: str, vars: []):
|
||||
data = []
|
||||
with open(input_file, 'r') as file:
|
||||
lines = file.readlines()
|
||||
headers = lines[0].strip().split(';')
|
||||
for line in lines[1:]:
|
||||
values = line.strip().split(';')
|
||||
row_dict = {headers[i]: values[i] for i in range(len(headers))}
|
||||
data.append(row_dict)
|
||||
discrete_variables = vars
|
||||
for var in discrete_variables:
|
||||
var_values = [row[var] for row in data]
|
||||
int_values = [int(numeric_string) for numeric_string in var_values]
|
||||
return int_values
|
||||
|
||||
def do_stuff(input_file: str):
|
||||
int_values = int_list(input_file, ['SalePrice'])
|
||||
s = pd.Series(int_values)
|
||||
print(f'Median: {s.median()}')
|
||||
print(f'Mean: {s.mean()}')
|
||||
print(f'Quartile: {s.quantile(0.25)} {s.quantile(0.75)}')
|
||||
print(f'Decile: {s.quantile(0.1)} {s.quantile(0.9)}')
|
||||
print()
|
||||
print(f'Range: {s.max() - s.min()}')
|
||||
print(f'Qartile diff: {s.quantile(0.75) - s.quantile(0.25)}')
|
||||
print(f'STD: {s.std()}')
|
||||
|
||||
def do_stuff2(input_file: str):
|
||||
print()
|
||||
# plt.boxplot(int_list(input_file, ['Year Built']))
|
||||
# plt.show()
|
||||
# plt.boxplot(int_list(input_file, ['Year Remod/Add']))
|
||||
# plt.show()
|
||||
data = [int_list(input_file, ['Year Built']),int_list(input_file, ['Year Remod/Add'])]
|
||||
fig = plt.figure(figsize=(11, 7))
|
||||
|
||||
# Creating axes instance
|
||||
ax = fig.add_axes([0, 0, 1, 1])
|
||||
|
||||
|
||||
# Creating plot
|
||||
bp = ax.boxplot(data)
|
||||
|
||||
# show plot
|
||||
plt.show()
|
||||
|
||||
|
||||
def test():
|
||||
print(int_list("AmesHousing.csv",['SalePrice, Year Built, Year Remod/Add']))
|
||||
|
||||
do_stuff2("AmesHousing.csv")
|
||||
#test()
|
||||
|
||||
Reference in New Issue
Block a user