Created
April 24, 2016 10:00
-
-
Save akiradeveloper/41eb1859717f5e42237e095bbd909b66 to your computer and use it in GitHub Desktop.
dm-writeboost-work-2.2.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/dm-writeboost-target.c b/src/dm-writeboost-target.c | |
index 33c24d1..7b2d04f 100644 | |
--- a/src/dm-writeboost-target.c | |
+++ b/src/dm-writeboost-target.c | |
@@ -373,30 +373,18 @@ void dec_nr_dirty_caches(struct wb_device *wb) | |
wake_up_interruptible(&wb->wait_drop_caches); | |
} | |
-static bool taint_mb(struct wb_device *wb, struct metablock *mb, struct bio *bio) | |
+static bool taint_mb(struct wb_device *wb, struct metablock *mb, u8 data_bits) | |
{ | |
unsigned long flags; | |
bool flip = false; | |
+ BUG_ON(data_bits == 0); | |
spin_lock_irqsave(&wb->mb_lock, flags); | |
if (!mb->dirtiness.is_dirty) { | |
mb->dirtiness.is_dirty = true; | |
flip = true; | |
} | |
- | |
- if (likely(io_fullsize(bio))) { | |
- mb->dirtiness.data_bits = 255; | |
- } else { | |
- u8 i; | |
- u8 acc_bits = 0; | |
- for (i = io_offset(bio); i < (io_offset(bio) + bio_sectors(bio)); i++) | |
- acc_bits += (1 << i); | |
- | |
- mb->dirtiness.data_bits |= acc_bits; | |
- } | |
- | |
- BUG_ON(!bio_sectors(bio)); | |
- BUG_ON(!mb->dirtiness.data_bits); | |
+ mb->dirtiness.data_bits |= data_bits; | |
spin_unlock_irqrestore(&wb->mb_lock, flags); | |
return flip; | |
@@ -435,141 +423,9 @@ struct dirtiness read_mb_dirtiness(struct wb_device *wb, struct segment_header * | |
/*----------------------------------------------------------------------------*/ | |
-struct writeback_mb_context { | |
- struct wb_device *wb; | |
- atomic_t count; | |
- int err; | |
-}; | |
- | |
-static void writeback_mb_complete(int read_err, unsigned long write_err, void *__context) | |
-{ | |
- struct writeback_mb_context *context = __context; | |
- | |
- if (read_err || write_err) | |
- context->err = 1; | |
- | |
- if (atomic_dec_and_test(&context->count)) | |
- wake_up_active_wq(&context->wb->writeback_mb_wait_queue); | |
-} | |
- | |
-/* | |
- * Write back a cache from cache device to the backing device. | |
- * We don't need to make the data written back persistent because this segment | |
- * will be reused only after writeback daemon wrote this segment back. | |
- */ | |
-static int writeback_mb(struct wb_device *wb, struct segment_header *seg, | |
- struct metablock *mb, u8 data_bits, bool thread) | |
-{ | |
- struct writeback_mb_context context; | |
- context.wb = wb; | |
- context.err = 0; | |
- | |
- BUG_ON(!data_bits); | |
- | |
- if (data_bits == 255) { | |
- struct dm_io_region src, dest; | |
- | |
- atomic_set(&context.count, 1); | |
- | |
- src = (struct dm_io_region) { | |
- .bdev = wb->cache_dev->bdev, | |
- .sector = calc_mb_start_sector(wb, seg, mb->idx), | |
- .count = (1 << 3), | |
- }; | |
- dest = (struct dm_io_region) { | |
- .bdev = wb->backing_dev->bdev, | |
- .sector = mb->sector, | |
- .count = (1 << 3), | |
- }; | |
- if (dm_kcopyd_copy(wb->copier, &src, 1, &dest, 0, writeback_mb_complete, &context)) | |
- /* | |
- * dm_kcopyd_copy fails before actual processing | |
- * Granting this case as a write error is too much so be as a read error. | |
- */ | |
- writeback_mb_complete(1, 0, &context); | |
- } else { | |
- u8 i; | |
- | |
- u8 count = 0; | |
- for (i = 0; i < 8; i++) | |
- if (data_bits & (1 << i)) | |
- count++; | |
- | |
- atomic_set(&context.count, count); | |
- | |
- for (i = 0; i < 8; i++) { | |
- struct dm_io_region src, dest; | |
- | |
- if (!(data_bits & (1 << i))) | |
- continue; | |
- | |
- src = (struct dm_io_region) { | |
- .bdev = wb->cache_dev->bdev, | |
- .sector = calc_mb_start_sector(wb, seg, mb->idx) + i, | |
- .count = 1, | |
- }; | |
- dest = (struct dm_io_region) { | |
- .bdev = wb->backing_dev->bdev, | |
- .sector = mb->sector + i, | |
- .count = 1, | |
- }; | |
- if (dm_kcopyd_copy(wb->copier, &src, 1, &dest, 0, writeback_mb_complete, &context)) | |
- writeback_mb_complete(1, 0, &context); | |
- } | |
- } | |
- | |
- wait_event(wb->writeback_mb_wait_queue, !atomic_read(&context.count)); | |
- return context.err; | |
-} | |
- | |
-/* | |
- * Write back a cache on the RAM buffer to backing device. | |
- * Calling this function is really rare so the code needs not to be optimal. | |
- * There is no need to write them back with FUA flag because the cache isn't | |
- * flushed yet and thus isn't persistent. | |
- */ | |
-static int writeback_buffered_mb(struct wb_device *wb, struct metablock *mb, u8 data_bits) | |
-{ | |
- int r = 0; | |
- | |
- sector_t offset = ((mb_idx_inseg(wb, mb->idx) + 1) << 3); | |
- void *buf = mempool_alloc(wb->buf_1_pool, GFP_NOIO); | |
- | |
- u8 i; | |
- for (i = 0; i < 8; i++) { | |
- struct dm_io_request io_req; | |
- struct dm_io_region region; | |
- | |
- void *src; | |
- sector_t dest; | |
- | |
- if (!(data_bits & (1 << i))) | |
- continue; | |
- | |
- src = wb->current_rambuf->data + ((offset + i) << SECTOR_SHIFT); | |
- dest = mb->sector + i; | |
- | |
- memcpy(buf, src, 1 << SECTOR_SHIFT); | |
- io_req = (struct dm_io_request) { | |
- .client = wb->io_client, | |
- .bi_rw = WRITE, | |
- .notify.fn = NULL, | |
- .mem.type = DM_IO_KMEM, | |
- .mem.ptr.addr = buf, | |
- }; | |
- region = (struct dm_io_region) { | |
- .bdev = wb->backing_dev->bdev, | |
- .sector = dest, | |
- .count = 1, | |
- }; | |
- r |= wb_io(&io_req, 1, ®ion, NULL, true); | |
- } | |
- mempool_free(buf, wb->buf_1_pool); | |
- return r; | |
-} | |
- | |
-void prepare_overwrite(struct wb_device *wb, struct segment_header *seg, struct metablock *old_mb, bool overwrite_fullsize) | |
+void prepare_overwrite(struct wb_device *wb, struct segment_header *seg, struct metablock *old_mb, struct write_io* wio) | |
{ | |
+ bool overwrite_fullsize = wio->data_bits == 255; | |
struct dirtiness dirtiness = read_mb_dirtiness(wb, seg, old_mb); | |
/* | |
@@ -590,7 +446,17 @@ void prepare_overwrite(struct wb_device *wb, struct segment_header *seg, struct | |
if (unlikely(needs_writeback_prev_cache)) { | |
wait_for_flushing(wb, seg->id); | |
BUG_ON(!dirtiness.is_dirty); | |
- while (writeback_mb(wb, seg, old_mb, dirtiness.data_bits, true)) {} | |
+ | |
+ bool retry = true | |
+ do { | |
+ void *buf = read_mb(wb, seg, old_mb, dirtiness.data_bits); | |
+ if (!buf) | |
+ continue; | |
+ memcpy_masked(wio->data, buf, dirtiness.data_bits); | |
+ wio->data_bits != dirtiness.data_bits; | |
+ mempool_free(buf, wb->buf_8_pool); | |
+ retry = false; | |
+ } while (retry); | |
} | |
if (mark_clean_mb(wb, old_mb)) | |
@@ -631,11 +497,148 @@ static void copy_bio_payload(void *buf, struct bio *bio) | |
} | |
} | |
-static void write_on_rambuffer(struct wb_device *wb, struct metablock *write_pos, struct bio *bio) | |
+// if bio's offset is 0 | |
+// buf = 512B | |
+static void __copy_to_bio_payload(struct bio *bio, void *buf, u8 i) | |
+{ | |
+ size_t head = 0; | |
+ size_t tail; | |
+ | |
+ bv_vec vec; | |
+ bv_it it; | |
+ bio_for_each_segment(vec, bio, it) { | |
+ size_t l = bv_len(vec); | |
+ tail += l; | |
+ if ((i << 9) < tail) { | |
+ size_t offset = (i << 9) - head; | |
+ BUG_ON((l - offset) < (1 << 9)); | |
+ void *p = page_address(bv_page(vec)) + bv_offset(vec) + offset; | |
+ memcpy(p, buf, 1 << 9); | |
+ return; | |
+ } | |
+ head += l; | |
+ } | |
+ BUG(); | |
+} | |
+ | |
+// buf = 4KB | |
+static void copy_to_bio_payload(struct bio *bio, void *buf, u8 copy_bits) | |
+{ | |
+ u8 offset = io_offset(bio); | |
+ u8 i; | |
+ for (i = 0; i < bio_sectors(bio); i++) { | |
+ u8 i_offset = i + offset; | |
+ if (copy_bits && (1 << i_offset)) | |
+ __copy_to_bio_payload(bio, buf + (i_offset << 9), i); | |
+ } | |
+} | |
+ | |
+static u8 to_mask(u8 offset, u8 count) | |
+{ | |
+ u8 i; | |
+ u8 result = 0; | |
+ if (count == 8) { | |
+ result = 255; | |
+ } else { | |
+ for (i = 0; i < count; i++) | |
+ result += (i << offset); | |
+ } | |
+ return result; | |
+} | |
+ | |
+static int fill_payload_by_backing(struct wb_device *wb, struct bio *bio) | |
{ | |
- sector_t start_sector = ((mb_idx_inseg(wb, write_pos->idx) + 1) << 3) + io_offset(bio); | |
- size_t start_byte = start_sector << SECTOR_SHIFT; | |
- copy_bio_payload(wb->current_rambuf->data + start_byte, bio); | |
+ sector_t start = bi_sector(bio); | |
+ u8 offset = do_io_offset(start); | |
+ u8 len = bio_sectors(bio); | |
+ u8 copy_bits = to_mask(offset, len); | |
+ | |
+ void *buf = mempool_alloc(wb->buf_8_pool, GFP_NOIO); | |
+ if (!buf) | |
+ return -ENOMEM; | |
+ | |
+ struct dm_io_request io_req = { | |
+ .client = wb->io_client, | |
+ .bi_rw = READ, | |
+ .notify.fn = NULL, | |
+ .mem.type = DM_IO_KMEM, | |
+ .mem.ptr.addr = buf + (offset << 9), | |
+ }; | |
+ struct dm_io_region region = { | |
+ .bdev = wb->backing_dev->bdev, | |
+ .sector = start, | |
+ .count = len, | |
+ }; | |
+ int r = wb_io(&io_req, 1, ®ion, NULL, true); | |
+ if (r) | |
+ goto bad; | |
+ | |
+ copy_to_bio_payload(bio, buf, copy_bits); | |
+bad: | |
+ mempool_free(buf, wb->buf_8_pool); | |
+ return r; | |
+} | |
+ | |
+// no need to free | |
+static void *ref_bufferd_mb(struct wb_device *wb, struct metablock *mb) | |
+{ | |
+ sector_t offset = ((mb_idx_inseg(wb, mb->idx) + 1) << 3); | |
+ return wb->current_rambuf->data + (offset << 9); | |
+} | |
+ | |
+static void *read_mb(struct wb_device *wb, struct segment_header *seg, | |
+ struct metablock *mb, u8 data_bits) | |
+{ | |
+ void *result = mempool_alloc(wb->buf_8_pool, GFP_NOIO); | |
+ if (!result) | |
+ return -ENOMEM; | |
+ | |
+ u8 i; | |
+ for (i = 0; i < 8; i++) { | |
+ if (!(data_bits & (1 << i))) | |
+ continue; | |
+ | |
+ struct dm_io_request io_req = { | |
+ .client = wb->io_client, | |
+ .bi_rw = READ, | |
+ .notify.fn = NULL, | |
+ .mem.type = DM_IO_KMEM, | |
+ .mem.ptr.addr = result + (i << 9), | |
+ }; | |
+ | |
+ struct dm_io_region region = { | |
+ .bdev = wb->cache_dev->bdev, | |
+ .sector = calc_mb_start_sector(wb, seg, mb->idx) + i, | |
+ .count = 1, | |
+ }; | |
+ | |
+ int r = wb_io(&io_req, 1, ®ion, NULL, true); | |
+ if (r) { | |
+ mempool_free(result, wb->buf_8_pool); | |
+ return NULL; | |
+ } | |
+ } | |
+ return result; | |
+} | |
+ | |
+static void memcpy_masked(void *to, void *from, u8 mask) | |
+{ | |
+ u8 i; | |
+ for (i = 0; i < 8; i++) | |
+ if (mask & (1 << i)) { | |
+ size_t offset = (i << 9); | |
+ memcpy(to + offset, from + offset, 1 << 9); | |
+ } | |
+} | |
+ | |
+static void write_on_rambuffer(struct wb_device *wb, struct metablock *write_pos, struct write_io *wio) | |
+{ | |
+ size_t mb_offset = (mb_idx_inseg(wb, write_pos->idx) + 1) << 12; | |
+ void *mb_data = wb->current_rambuf->data + mb_offset; | |
+ if (wio->data_bits == 255) | |
+ memcpy(mb_data, wio->data, 1 << 12); | |
+ else | |
+ memcpy_masked(mb_data, wio->data, wio->data_bits); | |
} | |
/* | |
@@ -748,26 +751,42 @@ static void dec_inflight_ios(struct wb_device *wb, struct segment_header *seg) | |
wake_up_active_wq(&wb->inflight_ios_wq); | |
} | |
+struct write_io { | |
+ void *data; // 4KB | |
+ u8 data_bits; | |
+}; | |
+ | |
+static void initialize_write_io(struct write_io *wio, struct bio *bio) | |
+{ | |
+ u8 offset = io_offset(bio); | |
+ sector_t count = bio_sectors(bio); | |
+ copy_bio_payload(wio->data + (offset << 9), bio); | |
+ wio->data_bits = to_mask(offset, count); | |
+} | |
+ | |
static void might_cancel_read_cache_cell(struct wb_device *, struct bio *); | |
-static struct metablock *prepare_write_pos(struct wb_device *wb, struct bio *bio) | |
+static int do_process_write(struct wb_device *wb, struct bio *bio) | |
{ | |
- struct metablock *ret; | |
+ struct metablock *write_pos = NULL; | |
struct lookup_result res; | |
+ struct write_io wio; | |
+ wio.data = mempool_alloc(wb->buf_8_pool, GFP_NOIO) | |
+ if (!wio.data) | |
+ return -ENOMEM; | |
+ initialize_write_io(&wio, bio); | |
+ | |
mutex_lock(&wb->io_lock); | |
cache_lookup(wb, bio, &res); | |
+ | |
if (res.found) { | |
if (unlikely(res.on_buffer)) { | |
- /* Overwrite on the ram buffer */ | |
mutex_unlock(&wb->io_lock); | |
- return res.found_mb; | |
+ write_pos = res.found_mb; | |
+ goto do_write; | |
} else { | |
- /* | |
- * Invalidate the old cache on the cache device because | |
- * we can't overwrite cache block on the cache device. | |
- */ | |
- prepare_overwrite(wb, res.found_seg, res.found_mb, io_fullsize(bio)); | |
+ prepare_overwrite(wb, res.found_seg, res.found_mb, &wio); | |
dec_inflight_ios(wb, res.found_seg); | |
} | |
} else | |
@@ -777,25 +796,24 @@ static struct metablock *prepare_write_pos(struct wb_device *wb, struct bio *bio | |
might_queue_current_buffer(wb); | |
- ret = prepare_new_write_pos(wb); | |
+ write_pos = prepare_new_write_pos(wb); | |
+ | |
+do_write: | |
+ BUG_ON(write_pos == NULL); | |
+ write_on_rambuffer(wb, write_pos, &wio); | |
- ht_register(wb, res.head, ret, &res.key); | |
+ if (taint_mb(wb, write_pos, wio.data_bits)) | |
+ inc_nr_dirty_caches(wb); | |
+ | |
+ ht_register(wb, res.head, write_pos, &res.key); | |
mutex_unlock(&wb->io_lock); | |
- return ret; | |
+ return 0; | |
} | |
-/* | |
- * Write bio data to RAM buffer. | |
- */ | |
-static int do_process_write(struct wb_device *wb, struct metablock *write_pos, struct bio *bio) | |
+static int complete_process_write(struct wb_device *wb, struct bio *bio) | |
{ | |
- if (taint_mb(wb, write_pos, bio)) | |
- inc_nr_dirty_caches(wb); | |
- | |
- write_on_rambuffer(wb, write_pos, bio); | |
- | |
dec_inflight_ios(wb, wb->current_seg); | |
/* | |
@@ -833,19 +851,21 @@ static int do_process_write(struct wb_device *wb, struct metablock *write_pos, s | |
* 2) Wait for decrement outside the lock | |
* | |
* process_write: | |
- * prepare_write_pos: | |
+ * do_process_write: | |
* mutex_lock (to serialize write) | |
* inc in_flight_ios # refcount on the dst segment | |
* mutex_unlock | |
* | |
- * do_process_write: | |
+ * complete_process_write: | |
* dec in_flight_ios | |
* bio_endio(bio) | |
*/ | |
static int process_write_wb(struct wb_device *wb, struct bio *bio) | |
{ | |
- struct metablock *write_pos = prepare_write_pos(wb, bio); | |
- return do_process_write(wb, write_pos, bio); | |
+ int err = do_process_write(wb, bio); | |
+ if (err) | |
+ return err; | |
+ return complete_process_write(wb, bio); | |
} | |
static int process_write_wt(struct wb_device *wb, struct bio *bio) | |
@@ -889,8 +909,6 @@ struct per_bio_data { | |
static void reserve_read_cache_cell(struct wb_device *, struct bio *); | |
static int process_read(struct wb_device *wb, struct bio *bio) | |
{ | |
- int io_res = 0; | |
- | |
struct lookup_result res; | |
struct dirtiness dirtiness; | |
@@ -907,18 +925,20 @@ static int process_read(struct wb_device *wb, struct bio *bio) | |
dirtiness = read_mb_dirtiness(wb, res.found_seg, res.found_mb); | |
if (unlikely(res.on_buffer)) { | |
+ int err = fill_payload_by_backing(wb, bio); | |
+ if (err) | |
+ goto read_buffered_mb_exit; | |
+ | |
if (dirtiness.is_dirty) | |
- io_res = writeback_buffered_mb(wb, res.found_mb, dirtiness.data_bits); | |
+ copy_to_bio_payload(bio, ref_buffered_mb(wb, res.found_mb), dirtiness.data_bits); | |
+read_buffered_mb_exit: | |
dec_inflight_ios(wb, res.found_seg); | |
- bio_remap(bio, wb->backing_dev, bi_sector(bio)); | |
- if (unlikely(io_res)) { | |
+ if (unlikely(err)) | |
bio_io_error(bio); | |
- return DM_MAPIO_SUBMITTED; | |
- } | |
- return DM_MAPIO_REMAPPED; | |
+ return DM_MAPIO_SUBMITTED; | |
} | |
/* | |
@@ -927,31 +947,36 @@ static int process_read(struct wb_device *wb, struct bio *bio) | |
*/ | |
wait_for_flushing(wb, res.found_seg->id); | |
- BUG_ON(io_res); | |
- | |
- if (likely(dirtiness.data_bits == 255)) { | |
- struct per_bio_data *pbd = per_bio_data(wb, bio); | |
- pbd->type = PBD_READ_SEG; | |
- pbd->seg = res.found_seg; | |
+ if (unlikely(dirtiness.data_bits != 255)) { | |
+ int err = fill_payload_by_backing(wb, bio); | |
+ if (err) | |
+ goto read_mb_exit; | |
+ | |
+ if (dirtiness.is_dirty) { | |
+ void *buf = read_mb(wb, res.found_seg, res.found_mb, dirtiness.data_bits); | |
+ if (!buf) | |
+ goto read_mb_exit; | |
+ copy_to_bio_payload(bio, buf, dirtiness.data_bits); | |
+ mempool_free(buf, wb->buf_8_pool); | |
+ } | |
- bio_remap(bio, wb->cache_dev, | |
- calc_mb_start_sector(wb, res.found_seg, res.found_mb->idx) + | |
- io_offset(bio)); | |
- } else { | |
- if (dirtiness.is_dirty) | |
- io_res = writeback_mb(wb, res.found_seg, res.found_mb, dirtiness.data_bits, true); | |
- if (!io_res) | |
- if (mark_clean_mb(wb, res.found_mb)) | |
- dec_nr_dirty_caches(wb); | |
+read_mb_exit: | |
dec_inflight_ios(wb, res.found_seg); | |
- bio_remap(bio, wb->backing_dev, bi_sector(bio)); | |
- } | |
- if (unlikely(io_res)) { | |
- bio_io_error(bio); | |
+ if (unlikely(err)) | |
+ bio_io_error(bio); | |
+ | |
return DM_MAPIO_SUBMITTED; | |
} | |
+ struct per_bio_data *pbd = per_bio_data(wb, bio); | |
+ pbd->type = PBD_READ_SEG; | |
+ pbd->seg = res.found_seg; | |
+ | |
+ bio_remap(bio, wb->cache_dev, | |
+ calc_mb_start_sector(wb, res.found_seg, res.found_mb->idx) + | |
+ io_offset(bio)); | |
+ | |
return DM_MAPIO_REMAPPED; | |
} | |
@@ -1480,7 +1505,7 @@ static int init_core_struct(struct dm_target *ti) | |
ti->private = wb; | |
wb->ti = ti; | |
- init_waitqueue_head(&wb->writeback_mb_wait_queue); | |
+ // init_waitqueue_head(&wb->writeback_mb_wait_queue); | |
wb->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); | |
if (IS_ERR(wb->copier)) { | |
r = PTR_ERR(wb->copier); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment