56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
|
|
def int_list(input_file: str, vars: []):
|
|
data = []
|
|
with open(input_file, 'r') as file:
|
|
lines = file.readlines()
|
|
headers = lines[0].strip().split(';')
|
|
for line in lines[1:]:
|
|
values = line.strip().split(';')
|
|
row_dict = {headers[i]: values[i] for i in range(len(headers))}
|
|
data.append(row_dict)
|
|
discrete_variables = vars
|
|
for var in discrete_variables:
|
|
var_values = [row[var] for row in data]
|
|
int_values = [int(numeric_string) for numeric_string in var_values]
|
|
return int_values
|
|
|
|
def do_stuff(input_file: str):
|
|
int_values = int_list(input_file, ['SalePrice'])
|
|
s = pd.Series(int_values)
|
|
print(f'Median: {s.median()}')
|
|
print(f'Mean: {s.mean()}')
|
|
print(f'Quartile: {s.quantile(0.25)} {s.quantile(0.75)}')
|
|
print(f'Decile: {s.quantile(0.1)} {s.quantile(0.9)}')
|
|
print()
|
|
print(f'Range: {s.max() - s.min()}')
|
|
print(f'Qartile diff: {s.quantile(0.75) - s.quantile(0.25)}')
|
|
print(f'STD: {s.std()}')
|
|
|
|
def do_stuff2(input_file: str):
|
|
print()
|
|
# plt.boxplot(int_list(input_file, ['Year Built']))
|
|
# plt.show()
|
|
# plt.boxplot(int_list(input_file, ['Year Remod/Add']))
|
|
# plt.show()
|
|
data = [int_list(input_file, ['Year Built']),int_list(input_file, ['Year Remod/Add'])]
|
|
fig = plt.figure(figsize=(11, 7))
|
|
|
|
# Creating axes instance
|
|
ax = fig.add_axes([0, 0, 1, 1])
|
|
|
|
|
|
# Creating plot
|
|
bp = ax.boxplot(data)
|
|
|
|
# show plot
|
|
plt.show()
|
|
|
|
|
|
def test():
|
|
print(int_list("AmesHousing.csv",['SalePrice, Year Built, Year Remod/Add']))
|
|
|
|
do_stuff2("AmesHousing.csv")
|
|
#test()
|