gera-richarte commited on
Commit
67fe5dd
1 Parent(s): d31681b

feat(earthview): Sentinel_2 support

Browse files
Files changed (2) hide show
  1. earthview.py +12 -4
  2. utils.py +12 -8
earthview.py CHANGED
@@ -46,7 +46,10 @@ def load_dataset(subset, dataset="satellogic/EarthView", split="train", shards =
46
  if shards is None:
47
  data_files = None
48
  else:
49
- data_files = [f"{path}/{split}-{shard:05d}-of-{nshards:05d}.parquet" for shard in shards]
 
 
 
50
  data_files = {split: data_files}
51
 
52
  ds = _load_dataset(
@@ -127,12 +130,17 @@ def item_to_images(subset, item):
127
  ]
128
  count = len(item["10m"])
129
  elif subset == "sentinel_2":
130
- for channel in ['10m', '20m', '40m', 'rgb']: #, 'scl', 'metadata']:
131
  data = item[channel]
132
  count = len(data)
133
- data = np.asarray(data).astype("uint8")
134
- images = [Image.fromarray(data[i,:,:,:].transpose(1,2,0)) for i in range(count)]
 
 
 
135
  item[channel] = images
 
 
136
  elif subset == "neon":
137
  item["rgb"] = [
138
  Image.fromarray(image.transpose(1,2,0))
 
46
  if shards is None:
47
  data_files = None
48
  else:
49
+ if subset == "sentinel_2":
50
+ data_files = [f"{path}/sentinel_2-{shard//10}/{split}-{shard % 10:05d}-of-00010.parquet" for shard in shards]
51
+ else:
52
+ data_files = [f"{path}/{split}-{shard:05d}-of-{nshards:05d}.parquet" for shard in shards]
53
  data_files = {split: data_files}
54
 
55
  ds = _load_dataset(
 
130
  ]
131
  count = len(item["10m"])
132
  elif subset == "sentinel_2":
133
+ for channel in ['10m', '20m', 'rgb', 'scl']: #, '40m']:
134
  data = item[channel]
135
  count = len(data)
136
+ data = np.asarray(data).astype("uint8").transpose(0,2,3,1)
137
+ if channel == "20m":
138
+ data = data[:,:,:,[0,2,4]]
139
+ mode = "L" if channel in ["10m", "scl"] else "RGB"
140
+ images = [Image.fromarray(data[i].squeeze(), mode=mode) for i in range(count)]
141
  item[channel] = images
142
+ for field in ["solarAngles", "tileGeometry", "viewIncidenceAngles"]:
143
+ item["metadata"][field] = [json.loads(s) for s in item["metadata"][field]]
144
  elif subset == "neon":
145
  item["rgb"] = [
146
  Image.fromarray(image.transpose(1,2,0))
utils.py CHANGED
@@ -16,23 +16,27 @@ def lat_lon_mid_pixel(item, subset: str):
16
  if subset == "satellogic":
17
  crs = metadata["crs"][0]
18
  bounds_crs = metadata["bounds"]
 
 
 
19
  elif subset == "sentinel_1":
20
- crs = metadata["crs"]
21
- bounds_crs = metadata["coordinates"][0]
 
 
 
 
 
 
 
22
  assert len(bounds_crs) == 5
23
- # bounds are a polygon with same first & last vertex
24
  bounds_crs = (
25
  bounds_crs[0][0],
26
  bounds_crs[0][1],
27
  bounds_crs[2][0],
28
  bounds_crs[2][1],
29
  )
30
- elif subset == "neon":
31
- crs = metadata["epsg"]
32
- bounds_crs = metadata["bounds"]
33
 
34
- else:
35
- raise ValueError("subset not known")
36
 
37
  bounds = pyproj.Transformer.from_crs(crs, "EPSG:4326").transform_bounds(*bounds_crs)
38
  # dumb average for now
 
16
  if subset == "satellogic":
17
  crs = metadata["crs"][0]
18
  bounds_crs = metadata["bounds"]
19
+ elif subset == "neon":
20
+ crs = metadata["epsg"]
21
+ bounds_crs = metadata["bounds"]
22
  elif subset == "sentinel_1":
23
+ geometry = metadata
24
+ elif subset == "sentinel_2":
25
+ geometry = metadata["tileGeometry"][0]
26
+ else:
27
+ raise ValueError("subset not known")
28
+
29
+ if subset.startswith("sentinel_"):
30
+ crs = geometry["crs"]
31
+ bounds_crs = geometry["coordinates"][0]
32
  assert len(bounds_crs) == 5
 
33
  bounds_crs = (
34
  bounds_crs[0][0],
35
  bounds_crs[0][1],
36
  bounds_crs[2][0],
37
  bounds_crs[2][1],
38
  )
 
 
 
39
 
 
 
40
 
41
  bounds = pyproj.Transformer.from_crs(crs, "EPSG:4326").transform_bounds(*bounds_crs)
42
  # dumb average for now