dibend commited on
Commit
615dbda
1 Parent(s): 7ed2a2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -10,10 +10,19 @@ def plot_real_estate_correlation(state):
10
  # Filter for the given state
11
  df = df[df['State'] == state.upper()]
12
 
13
- # Extract the list of ZIP codes and dates
14
  zip_codes = df['RegionName'].unique()
15
- dates = pd.to_datetime(df.columns[7:]) # Assuming price data starts from the 8th column
16
-
 
 
 
 
 
 
 
 
 
17
  # Initialize a DataFrame to hold price data for correlation calculation
18
  price_matrix = []
19
 
@@ -21,15 +30,15 @@ def plot_real_estate_correlation(state):
21
  for zip_code in zip_codes:
22
  df_zip = df[df['RegionName'] == zip_code]
23
 
24
- # Extract only the columns with date data (price values)
25
- prices = df_zip.iloc[0, 7:].values # Extract price values starting from the 8th column
26
 
27
  # Append prices to the matrix if there are no missing values
28
  if not np.isnan(prices).all():
29
  price_matrix.append(prices)
30
 
31
  # Convert to DataFrame for easier manipulation
32
- price_matrix_df = pd.DataFrame(price_matrix, index=zip_codes)
33
 
34
  # Transpose to align for correlation calculation (each column = ZIP code)
35
  price_matrix_df = price_matrix_df.T.dropna()
 
10
  # Filter for the given state
11
  df = df[df['State'] == state.upper()]
12
 
13
+ # Extract the list of ZIP codes and filter only columns that are date strings
14
  zip_codes = df['RegionName'].unique()
15
+
16
+ # Extract columns that are valid date strings only
17
+ date_columns = []
18
+ for col in df.columns[7:]:
19
+ try:
20
+ # Try to parse column names as dates
21
+ pd.to_datetime(col)
22
+ date_columns.append(col)
23
+ except:
24
+ continue
25
+
26
  # Initialize a DataFrame to hold price data for correlation calculation
27
  price_matrix = []
28
 
 
30
  for zip_code in zip_codes:
31
  df_zip = df[df['RegionName'] == zip_code]
32
 
33
+ # Extract only the columns with valid date data (price values)
34
+ prices = df_zip.loc[:, date_columns].values.flatten()
35
 
36
  # Append prices to the matrix if there are no missing values
37
  if not np.isnan(prices).all():
38
  price_matrix.append(prices)
39
 
40
  # Convert to DataFrame for easier manipulation
41
+ price_matrix_df = pd.DataFrame(price_matrix, index=zip_codes, columns=date_columns)
42
 
43
  # Transpose to align for correlation calculation (each column = ZIP code)
44
  price_matrix_df = price_matrix_df.T.dropna()