-
-
Save xrl/5b715cd5b8c114314d7e6b52ecf36d8b to your computer and use it in GitHub Desktop.
fn kinesis_pipeline_threadpool( | |
client: DefaultKinesisClient, | |
stream_name: String, | |
puts_threads: usize, | |
puts_size: usize, | |
) { | |
let (tx, rx) = std::sync::mpsc::sync_channel(1); | |
let rx = std::sync::Arc::new(std::sync::Mutex::new(rx)); | |
let workers : Vec<std::thread::JoinHandle<()>> = (0..puts_threads).map(|_|{ | |
let rx = rx.clone(); | |
let stream_name = stream_name.clone(); | |
std::thread::spawn(move ||{ | |
info!("spawning worker thread"); | |
let client = Arc::new(KinesisClient::simple(Region::UsWest2)); | |
loop { | |
let recv_res = { | |
rx.lock().unwrap().recv() | |
}; | |
match recv_res { | |
Ok(batch) => { | |
let put_res = client.put_records(&PutRecordsInput { | |
records: batch, | |
stream_name: stream_name.clone(), | |
}).sync(); | |
info!("put_res is ok {:?}", put_res.is_ok()); | |
}, | |
Err(e) => { | |
error!("error receving: {:?}", e); | |
} | |
} | |
} | |
}) | |
}).collect(); | |
let data = FauxData::new(); | |
let mut batch = Vec::with_capacity(500); | |
for datum in data { | |
batch.push(datum); | |
if batch.len() == puts_size { | |
tx.send(batch); | |
batch = Vec::with_capacity(puts_size); | |
} | |
} | |
} |
Ah, I saw just now that it seems like you want to submit more than one batch to Kinesis concurrently.
You should be able to do this by changing the publisher code to use Stream::buffer_unordered
method, like so:
static CONCURRENCY: usize = 4;
std::thread::spawn(move || {
let client = KinesisClient::simple(Region::UsWest2);
let puts = rx.chunks(500).map(|batch| {
client.put_records(&PutRecordsInput {
records: batch,
stream_name: stream_name.clone(),
}).then(|put_res| put_res)
}).buffer_unordered(CONCURRENCY);
for put_res in puts.wait() {
info!("put_res is ok {:?}", put_res.is_ok());
}
});
Reading over this again, here's a slight correction for the sending code:
let mut tx = ...
let data = FauxData::new();
for datum in data {
tx = tx.send(datum).wait()
.expect("failed to send record to publisher");
}
@srijs thanks! This did indeed get me further, the code compiles and it does fire off requests but the problem is that there only appears to be one open socket to the AWS service. Here is the code: https://github.com/tureus/kinesis-hyper/blob/a5d26485d37becc39806001f5438f0d424eb8976/src/main.rs#L208-L219, and when I run it, netstat shows:
root@doit-1800981089-sz6zt:~# netstat
Active Internet connections (w/o servers)
Proto Recv-Q Send-Q Local Address Foreign Address State
tcp 0 0 doit-1800981089-s:46532 52.94.210.214:https TIME_WAIT
tcp 0 546498 doit-1800981089-s:46534 52.94.210.214:https ESTABLISHED
Active UNIX domain sockets (w/o servers)
Proto RefCnt Flags Type State I-Node Path
The TIME_WAIT socket is the one that went and got the kinesis topic name, the second one is shown going up and down in SEND-Q, waiting for AWS to pull down the data. I expected to see 500 open connections (or at least more than 1). Is there a connection or threadpool setting I haven't found?
Okay, so here's roughly what I had in mind. The overall approach is based on futures primitives, so you'll need the
Future
,Stream
andSink
traits in scope.To facilitate communication between producers and consumer, let's set up a futures channel:
With that in place, we can spawn a single consumer thread that reads off the stream in batches of 500 records, and then uses the Kinesis client to persist these batches:
Now we should be able to put records onto the stream:
What do you think?