Update app.py
Browse files
app.py
CHANGED
@@ -10,10 +10,19 @@ def plot_real_estate_correlation(state):
|
|
10 |
# Filter for the given state
|
11 |
df = df[df['State'] == state.upper()]
|
12 |
|
13 |
-
# Extract the list of ZIP codes and
|
14 |
zip_codes = df['RegionName'].unique()
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Initialize a DataFrame to hold price data for correlation calculation
|
18 |
price_matrix = []
|
19 |
|
@@ -21,15 +30,15 @@ def plot_real_estate_correlation(state):
|
|
21 |
for zip_code in zip_codes:
|
22 |
df_zip = df[df['RegionName'] == zip_code]
|
23 |
|
24 |
-
# Extract only the columns with date data (price values)
|
25 |
-
prices = df_zip.
|
26 |
|
27 |
# Append prices to the matrix if there are no missing values
|
28 |
if not np.isnan(prices).all():
|
29 |
price_matrix.append(prices)
|
30 |
|
31 |
# Convert to DataFrame for easier manipulation
|
32 |
-
price_matrix_df = pd.DataFrame(price_matrix, index=zip_codes)
|
33 |
|
34 |
# Transpose to align for correlation calculation (each column = ZIP code)
|
35 |
price_matrix_df = price_matrix_df.T.dropna()
|
|
|
10 |
# Filter for the given state
|
11 |
df = df[df['State'] == state.upper()]
|
12 |
|
13 |
+
# Extract the list of ZIP codes and filter only columns that are date strings
|
14 |
zip_codes = df['RegionName'].unique()
|
15 |
+
|
16 |
+
# Extract columns that are valid date strings only
|
17 |
+
date_columns = []
|
18 |
+
for col in df.columns[7:]:
|
19 |
+
try:
|
20 |
+
# Try to parse column names as dates
|
21 |
+
pd.to_datetime(col)
|
22 |
+
date_columns.append(col)
|
23 |
+
except:
|
24 |
+
continue
|
25 |
+
|
26 |
# Initialize a DataFrame to hold price data for correlation calculation
|
27 |
price_matrix = []
|
28 |
|
|
|
30 |
for zip_code in zip_codes:
|
31 |
df_zip = df[df['RegionName'] == zip_code]
|
32 |
|
33 |
+
# Extract only the columns with valid date data (price values)
|
34 |
+
prices = df_zip.loc[:, date_columns].values.flatten()
|
35 |
|
36 |
# Append prices to the matrix if there are no missing values
|
37 |
if not np.isnan(prices).all():
|
38 |
price_matrix.append(prices)
|
39 |
|
40 |
# Convert to DataFrame for easier manipulation
|
41 |
+
price_matrix_df = pd.DataFrame(price_matrix, index=zip_codes, columns=date_columns)
|
42 |
|
43 |
# Transpose to align for correlation calculation (each column = ZIP code)
|
44 |
price_matrix_df = price_matrix_df.T.dropna()
|