a

2024-10-29 14:11:37 +01:00
parent 24cffc3c6c
commit 3954d3b1bf
10 changed files with 364 additions and 0 deletions
--- a/WS24_25/PyCharm/pythonProject/P4/data.py
+++ b/WS24_25/PyCharm/pythonProject/P4/data.py
@@ -0,0 +1,125 @@
+import matplotlib.pyplot as plt
+from collections import Counter
+
+def frequency(input_file: str):
+    data = []
+    frequencies = {}
+
+    # Step 1: Read the CSV file manually
+    with open(input_file, 'r') as file:
+        lines = file.readlines()
+
+    # Step 2: Parse the first line to get the headers
+    headers = lines[0].strip().split(';')
+
+    # Step 3: Parse the subsequent lines to get the data
+    for line in lines[1:]:
+        values = line.strip().split(';')
+        row_dict = {headers[i]: values[i] for i in range(len(headers))}
+        data.append(row_dict)
+
+    # Step 4: Compute frequencies for each discrete variable
+    discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style',
+                          'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold',
+                          'Sale Type', 'Sale Condition']
+
+    for var in discrete_variables:
+        var_values = [row[var] for row in data]
+        frequencies[var] = Counter(var_values)
+
+    # Step 5: Print the frequency counts to the console
+    for var, freq_dict in frequencies.items():
+        print(f'Frequencies for {var}:')
+        for value, count in freq_dict.items():
+            print(f'{value}: {count}')
+        print()  # Blank line for readability
+
+
+
+
+
+def plot_frequency(input_file: str):
+    data = []
+    frequencies = {}
+
+    # Step 1: Read the CSV file manually
+    with open(input_file, 'r') as file:
+        lines = file.readlines()
+
+    # Step 2: Parse the first line to get the headers
+    headers = lines[0].strip().split(';')
+
+    # Step 3: Parse the subsequent lines to get the data
+    for line in lines[1:]:
+        values = line.strip().split(';')
+        row_dict = {headers[i]: values[i] for i in range(len(headers))}
+        data.append(row_dict)
+
+    # Step 4: Compute frequencies for each discrete variable
+    discrete_variables = ['Street', 'Neighborhood', 'Bldg Type', 'House Style',
+                          'Overall Qual', 'Overall Cond', 'Mo Sold', 'Yr Sold',
+                          'Sale Type', 'Sale Condition']
+
+    for var in discrete_variables:
+        var_values = [row[var] for row in data]
+        frequencies[var] = Counter(var_values)
+
+    # Step 5: Plot the frequencies using bar charts and pie charts
+    for var, freq_dict in frequencies.items():
+        labels = list(freq_dict.keys())
+        counts = list(freq_dict.values())
+
+        # Bar chart
+        plt.figure(figsize=(10, 6))
+        plt.bar(labels, counts, color='skyblue')
+        plt.title(f'Bar Chart for {var}')
+        plt.xlabel(var)
+        plt.ylabel('Frequency')
+        plt.xticks(rotation=45, ha='right')
+        plt.tight_layout()
+        plt.show()
+
+        # Pie chart
+        plt.figure(figsize=(8, 8))
+        plt.pie(counts, labels=labels, autopct='%1.1f%%', colors=plt.cm.Paired.colors)
+        plt.title(f'Pie Chart for {var}')
+        plt.show()
+
+def plot_histogram(input_file: str):
+    data = []
+
+    # Step 1: Read the CSV file manually
+    with open(input_file, 'r') as file:
+        lines = file.readlines()
+
+    # Step 2: Parse the first line to get the headers
+    headers = lines[0].strip().split(';')
+
+    # Step 3: Parse the subsequent lines to get the data
+    for line in lines[1:]:
+        values = line.strip().split(';')
+        row_dict = {headers[i]: values[i] for i in range(len(headers))}
+        data.append(row_dict)
+
+    # Step 4: Extract the "SalePrice" variable and convert it to numeric values
+    sale_prices = [int(row['SalePrice']) for row in data]
+
+    # Step 5: Plot the histogram for the "SalePrice" variable
+    plt.figure(figsize=(10, 6))
+    plt.hist(sale_prices, bins=20, color='skyblue', edgecolor='black')
+    plt.title('Histogram of SalePrice')
+    plt.xlabel('SalePrice')
+    plt.ylabel('Frequency')
+    plt.grid(axis='y', alpha=0.75)
+    plt.tight_layout()
+    plt.show()
+
+
+# Example usage
+#frequency('AmesHousing.csv')
+
+# Example usage 2
+#plot_frequency('AmesHousing.csv')
+
+# Example usage 3
+plot_histogram('AmesHousing.csv')
--- a/WS24_25/PyCharm/pythonProject/P5/compute.py
+++ b/WS24_25/PyCharm/pythonProject/P5/compute.py
@@ -0,0 +1,55 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+def int_list(input_file: str, vars: []):
+    data = []
+    with open(input_file, 'r') as file:
+        lines = file.readlines()
+    headers = lines[0].strip().split(';')
+    for line in lines[1:]:
+        values = line.strip().split(';')
+        row_dict = {headers[i]: values[i] for i in range(len(headers))}
+        data.append(row_dict)
+    discrete_variables = vars
+    for var in discrete_variables:
+        var_values = [row[var] for row in data]
+    int_values = [int(numeric_string) for numeric_string in var_values]
+    return int_values
+
+def do_stuff(input_file: str):
+    int_values = int_list(input_file, ['SalePrice'])
+    s = pd.Series(int_values)
+    print(f'Median: {s.median()}')
+    print(f'Mean: {s.mean()}')
+    print(f'Quartile: {s.quantile(0.25)} {s.quantile(0.75)}')
+    print(f'Decile:  {s.quantile(0.1)} {s.quantile(0.9)}')
+    print()
+    print(f'Range: {s.max() - s.min()}')
+    print(f'Qartile diff: {s.quantile(0.75) - s.quantile(0.25)}')
+    print(f'STD: {s.std()}')
+
+def do_stuff2(input_file: str):
+    print()
+    # plt.boxplot(int_list(input_file, ['Year Built']))
+    # plt.show()
+    # plt.boxplot(int_list(input_file, ['Year Remod/Add']))
+    # plt.show()
+    data = [int_list(input_file, ['Year Built']),int_list(input_file, ['Year Remod/Add'])]
+    fig = plt.figure(figsize=(11, 7))
+
+    # Creating axes instance
+    ax = fig.add_axes([0, 0, 1, 1])
+
+
+    # Creating plot
+    bp = ax.boxplot(data)
+
+    # show plot
+    plt.show()
+
+
+def test():
+    print(int_list("AmesHousing.csv",['SalePrice, Year Built, Year Remod/Add']))
+
+do_stuff2("AmesHousing.csv")
+#test()