Merge branch 'master' of https://gitea.lachscloud.com/Jordi/FH

a
2025-02-14 08:05:26 +01:00 · 2025-02-14 08:05:24 +01:00 · 2025-02-14 07:58:45 +01:00 · 2025-02-14 07:58:32 +01:00
9 changed files with 3005 additions and 6 deletions
--- a/.idea/libraries/openjfx_javafx_base.xml
+++ b/.idea/libraries/openjfx_javafx_base.xml
@@ -3,7 +3,7 @@
    <properties maven-id="org.openjfx:javafx-base:20-ea+2" />
    <CLASSES>
      <root url="jar://$MAVEN_REPOSITORY$/org/openjfx/javafx-base/20-ea+2/javafx-base-20-ea+2.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/openjfx/javafx-base/20-ea+2/javafx-base-20-ea+2-linux.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/openjfx/javafx-base/20-ea+2/javafx-base-20-ea+2-win-x86-monocle.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/WS24_25/PyCharm/pythonProject/.idea/misc.xml
+++ b/WS24_25/PyCharm/pythonProject/.idea/misc.xml
@@ -3,5 +3,5 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.12 (pythonProject)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (pythonProject)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
 </project>
--- a/WS24_25/PyCharm/pythonProject/.idea/pythonProject.iml
+++ b/WS24_25/PyCharm/pythonProject/.idea/pythonProject.iml
@@ -5,7 +5,7 @@
      <excludeFolder url="file://$MODULE_DIR$/.venv" />
      <excludeFolder url="file://$MODULE_DIR$/venv" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.11 (pythonProject)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/WS24_25/PyCharm/pythonProject/P11/AmesHousing.csv
+++ b/WS24_25/PyCharm/pythonProject/P11/AmesHousing.csv
--- a/WS24_25/PyCharm/pythonProject/P11/excercise.py
+++ b/WS24_25/PyCharm/pythonProject/P11/excercise.py
@@ -0,0 +1,70 @@
+import pandas as pd
+
+# Load the dataset: Order;Lot Area;Street;Neighborhood;Bldg Type;House Style;Overall Qual;Overall Cond;Year Built;Year Remod/Add;1st Flr over Lot Area;1st Flr SF;Mo Sold;Yr Sold;Sale Type;Sale Condition;SalePrice
+data = pd.read_csv("AmesHousing.csv", sep=';')
+# View dataset structure
+print(data.head())
+print(data.info())
+
+# Compute "years since built"
+data['Years Since Built'] = data['Yr Sold'] - data['Year Built']
+
+# Compute "years since remod/add"
+data['Years Since Remod/Add'] = data['Yr Sold'] - data['Year Remod/Add']
+
+# View the updated dataset
+print(data[['Years Since Built', 'Years Since Remod/Add']].head())
+
+# Categorize SalePrice into "cheap" and "expensive"
+data['Price Category'] = data['SalePrice'].apply(lambda x: 'cheap' if x <= 160000 else 'expensive')
+
+# View the updated dataset
+print(data[['SalePrice', 'Price Category']].head())
+
+# Define a threshold for low-frequency values
+threshold = 5
+
+# Iterate through each column
+for column in data.columns:
+    # Only process categorical columns (non-numeric, or treat numeric as categorical if needed)
+    if data[column].dtype == 'object' or data[column].nunique() < 20:  # Customize this condition for your use case
+        # Count frequency of each value
+        frequencies = data[column].value_counts()
+
+        # Identify categories with few occurrences
+        low_frequency_values = frequencies[frequencies < threshold].index
+
+        # Replace infrequent values with "Other"
+        data[column] = data[column].apply(lambda x: 'Other' if x in low_frequency_values else x)
+
+# View the dataframe after reclassification
+print(data.head())
+
+# Threshold for imbalance percentage (e.g., any class with >99% of the data)
+imbalance_threshold = 0.99
+
+# Identify columns to drop
+columns_to_drop = []
+
+# Loop through each column in the DataFrame
+for column in data.columns:
+    # Only analyze categorical variables
+    if data[column].dtype == 'object' or data[column].nunique() < 20:
+        # Compute class distribution
+        class_distribution = data[column].value_counts(normalize=True)
+
+        # Check if any single class exceeds the imbalance threshold
+        if class_distribution.max() > imbalance_threshold:
+            print(f"Extreme imbalance found in '{column}' (Dropping column)")
+            columns_to_drop.append(column)
+
+    # You might want to drop other irrelevant variables explicitly
+    # Add them to columns_to_drop if not needed
+    # Example: columns_to_drop.append('Unnamed_column')
+
+# Drop the identified columns
+data = data.drop(columns=columns_to_drop)
+
+# Output the cleaned dataset
+print(f"Columns dropped: {columns_to_drop}")
+print(data.head())
--- a/WS24_25/PyCharm/pythonProject/P5/AmesHousing.csv
+++ b/WS24_25/PyCharm/pythonProject/P5/AmesHousing.csv
@@ -1,4 +1,4 @@
-Order;Lot Area;Street;Neighborhood;Bldg Type;House Style;Overall Qual;Overall Cond;Year Built;Year Remod/Add;1st Flr over Lot Area;1st Flr SF;Mo Sold;Yr Sold;Sale Type;Sale Condition;SalePrice
+Order;Lot Area;Street;Neighborhood;Bldg Type;House Style;Overall Qual;Overall Cond;Year Built;Year Remod/Add;1st Flr over Lot Area;1st Flr SF;Mo Sold;Yr_Sold;Sale Type;Sale Condition;SalePrice
 1;31770;Pave;NAmes;1Fam;1Story;6;5;1960;1960;0,05;1656;5;2010;WD ;Normal;215000
 2;11622;Pave;NAmes;1Fam;1Story;5;6;1961;1961;0,08;896;6;2010;WD ;Normal;105000
 3;14267;Pave;NAmes;1Fam;1Story;6;6;1958;1958;0,09;1329;6;2010;WD ;Normal;172000
--- a/WS24_25/SWTD/Angebot.java
+++ b/WS24_25/SWTD/Angebot.java
--- a/WS24_25/SWTD/BuchRedesign
+++ b/WS24_25/SWTD/BuchRedesign
--- a/WS24_25/SWTD/buchv1
+++ b/WS24_25/SWTD/buchv1
Author	SHA1	Message	Date
jordi	8f50cee48b	Merge branch 'master' of https://gitea.lachscloud.com/Jordi/FH	2025-02-14 08:05:26 +01:00
jordi	42b3b2891c	a a	2025-02-14 08:05:24 +01:00
jordi	c3a717ccbe	a	2025-02-14 07:58:45 +01:00
jordi	0cd4403cc2	a	2025-02-14 07:58:32 +01:00