Skip to content

Commit ad3660b

Browse files
committed
Use limit value to minimize batches returned in to_arrow read path
1 parent e891bcd commit ad3660b

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,21 +1265,27 @@ def _task_to_table(
12651265
case_sensitive: bool,
12661266
name_mapping: Optional[NameMapping] = None,
12671267
use_large_types: bool = True,
1268+
limit: Optional[int] = None,
12681269
) -> Optional[pa.Table]:
1269-
batches = list(
1270-
_task_to_record_batches(
1271-
fs,
1272-
task,
1273-
bound_row_filter,
1274-
projected_schema,
1275-
projected_field_ids,
1276-
positional_deletes,
1277-
case_sensitive,
1278-
name_mapping,
1279-
use_large_types,
1280-
)
1270+
batches_iterator = _task_to_record_batches(
1271+
fs,
1272+
task,
1273+
bound_row_filter,
1274+
projected_schema,
1275+
projected_field_ids,
1276+
positional_deletes,
1277+
case_sensitive,
1278+
name_mapping,
1279+
use_large_types,
12811280
)
12821281

1282+
total_row_count = 0
1283+
batches = []
1284+
for batch in batches_iterator:
1285+
total_row_count += len(batch)
1286+
batches.append(batch)
1287+
if limit is not None and total_row_count >= limit:
1288+
break
12831289
if len(batches) > 0:
12841290
return pa.Table.from_batches(batches)
12851291
else:
@@ -1366,6 +1372,7 @@ def project_table(
13661372
case_sensitive,
13671373
table_metadata.name_mapping(),
13681374
use_large_types,
1375+
limit,
13691376
)
13701377
for task in tasks
13711378
]

0 commit comments

Comments
 (0)