S3 Select Object Content
# This example shows how to get data from S3 using a SQL query and the
# select_object_content operation. For more information, see
# https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html.
library(paws)
client <- s3()
bkt = "paws-dummy-bucket"
key = "dummy.csv"
exp = "SELECT * FROM s3object s"
resp <- client$select_object_content(
Bucket = bkt,
Key = key,
Expression = exp,
ExpressionType = "SQL",
InputSerialization = list(
'CSV' = list(
'RecordDelimiter' = '\n',
'FieldDelimiter' = ','
)
),
OutputSerialization = list(
'CSV' = list('RecordDelimiter' = '\n', 'FieldDelimiter' = ',')
)
)
# The response is a stream of records. We need to read the stream
# Please check out the documentation for more information:
# https://www.paws-r-sdk.com/docs/s3_select_object_content/
result <- resp$Payload(\(chunk) chunk$Records$Payload)
# Remove empty elements
buffer <- unlist(result[lengths(result) > 0])
# Size of the buffer
length(buffer)
#> [1] 305205
# Methods to parse the buffer
bench::mark(
base = read.csv(text = rawToChar(buffer)),
iotools = iotools::read.csv.raw(buffer),
readr = readr::read_csv(buffer),
arrow = arrow::read_csv_arrow(buffer),
check = F
)
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 base 4.52ms 5.01ms 200. 1.14MB 4.12
#> 2 iotools 971.08µs 1.06ms 898. 310.19KB 4.07
#> 3 readr 12.97ms 14.41ms 68.5 8.32MB 11.8
#> 4 arrow 1.82ms 2.02ms 492. 20.59MB 8.56