Created
August 4, 2013 19:18
-
-
Save sleinen/6151553 to your computer and use it in GitHub Desktop.
We recently lost four OSDs (8,9,10,11) in the same server of our 64-OSD/10-server cluster. After reformatting the file systems, two objects remain unfound. Unfortunately it is not possible to declare them as "lost", because osd.9 remains in "querying" state (see line 123). Any idea on how to get this unstuck? I have already tried restarting the …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
: root@ineri[leinen]; ceph health detail | |
HEALTH_WARN 1 pgs degraded; 1 pgs recovering; 1 pgs stuck unclean; recovery 2158/19171654 degraded (0.011%); 2/9585827 unfound (0.000%) | |
pg 0.cfa is stuck unclean for 249687.042135, current state active+recovering+degraded, last acting [23,50] | |
pg 0.cfa is active+recovering+degraded, acting [23,50], 2 unfound | |
recovery 2158/19171654 degraded (0.011%); 2/9585827 unfound (0.000%) | |
: root@ineri[leinen]; ceph pg dump_stuck unclean | |
ok | |
pg_stat objects mip degr unf bytes log disklog state state_stamp v reported up acting last_scrub scrub_stamp last_deep_scrub deep_scrub_stamp | |
0.cfa 2178 2 2158 2 143697053 0 0 active+recovering+degraded 2013-08-02 14:26:53.965345 28074'7610 28074'41570 [23,50] [23,50] 20585'6801 2013-07-28 15:40:53.298786 20585'6801 2013-07-28 15:40:53.298786 | |
: root@ineri[leinen]; ceph pg 0.cfa list_missing | |
{ "offset": { "oid": "", | |
"key": "", | |
"snapid": 0, | |
"hash": 0, | |
"max": 0}, | |
"num_missing": 2, | |
"num_unfound": 2, | |
"objects": [ | |
{ "oid": { "oid": "100007e0341.00000000", | |
"key": "", | |
"snapid": -2, | |
"hash": 1032543482, | |
"max": 0}, | |
"need": "25004'7085", | |
"have": "0'0", | |
"locations": []}, | |
{ "oid": { "oid": "100007e031c.00000000", | |
"key": "", | |
"snapid": -2, | |
"hash": 137927930, | |
"max": 0}, | |
"need": "25004'7084", | |
"have": "0'0", | |
"locations": []}], | |
"more": 0} | |
: root@ineri[leinen]; ceph pg 0.cfa query | |
{ "state": "active+recovering+degraded", | |
"epoch": 28074, | |
"up": [ | |
23, | |
50], | |
"acting": [ | |
23, | |
50], | |
"info": { "pgid": "0.cfa", | |
"last_update": "28074'7610", | |
"last_complete": "23686'7083", | |
"log_tail": "14360'4061", | |
"last_backfill": "MAX", | |
"purged_snaps": "[]", | |
"history": { "epoch_created": 1, | |
"last_epoch_started": 26037, | |
"last_epoch_clean": 24810, | |
"last_epoch_split": 0, | |
"same_up_since": 26036, | |
"same_interval_since": 26036, | |
"same_primary_since": 26036, | |
"last_scrub": "20585'6801", | |
"last_scrub_stamp": "2013-07-28 15:40:53.298786", | |
"last_deep_scrub": "20585'6801", | |
"last_deep_scrub_stamp": "2013-07-28 15:40:53.298786", | |
"last_clean_scrub_stamp": "2013-07-28 15:40:53.298786"}, | |
"stats": { "version": "28074'7610", | |
"reported": "28074'41570", | |
"state": "active+recovering+degraded", | |
"last_fresh": "2013-08-04 21:09:57.138534", | |
"last_change": "2013-08-02 14:26:53.965345", | |
"last_active": "2013-08-04 21:09:57.138534", | |
"last_clean": "2013-08-01 23:50:18.414082", | |
"last_became_active": "2013-05-29 13:10:51.366237", | |
"last_unstale": "2013-08-04 21:09:57.138534", | |
"mapping_epoch": 26011, | |
"log_start": "14360'4061", | |
"ondisk_log_start": "14360'4061", | |
"created": 1, | |
"last_epoch_clean": 24810, | |
"parent": "0.0", | |
"parent_split_bits": 0, | |
"last_scrub": "20585'6801", | |
"last_scrub_stamp": "2013-07-28 15:40:53.298786", | |
"last_deep_scrub": "20585'6801", | |
"last_deep_scrub_stamp": "2013-07-28 15:40:53.298786", | |
"last_clean_scrub_stamp": "2013-07-28 15:40:53.298786", | |
"log_size": 0, | |
"ondisk_log_size": 0, | |
"stats_invalid": "0", | |
"stat_sum": { "num_bytes": 143697053, | |
"num_objects": 2178, | |
"num_object_clones": 0, | |
"num_object_copies": 0, | |
"num_objects_missing_on_primary": 0, | |
"num_objects_degraded": 0, | |
"num_objects_unfound": 0, | |
"num_read": 736, | |
"num_read_kb": 405270, | |
"num_write": 7610, | |
"num_write_kb": 1138964, | |
"num_scrub_errors": 0, | |
"num_shallow_scrub_errors": 0, | |
"num_deep_scrub_errors": 0, | |
"num_objects_recovered": 3998, | |
"num_bytes_recovered": 278803622, | |
"num_keys_recovered": 0}, | |
"stat_cat_sum": {}, | |
"up": [ | |
23, | |
50], | |
"acting": [ | |
23, | |
50]}, | |
"empty": 0, | |
"dne": 0, | |
"incomplete": 0, | |
"last_epoch_started": 26037}, | |
"recovery_state": [ | |
{ "name": "Started\/Primary\/Active", | |
"enter_time": "2013-08-02 14:26:51.676625", | |
"might_have_unfound": [ | |
{ "osd": 9, | |
"status": "querying"}, | |
{ "osd": 50, | |
"status": "already probed"}], | |
"recovery_progress": { "backfill_target": 50, | |
"waiting_on_backfill": 0, | |
"backfill_pos": "96220cfa\/10000799e82.00000000\/head\/\/0", | |
"backfill_info": { "begin": "0\/\/0\/\/-1", | |
"end": "0\/\/0\/\/-1", | |
"objects": []}, | |
"peer_backfill_info": { "begin": "0\/\/0\/\/-1", | |
"end": "0\/\/0\/\/-1", | |
"objects": []}, | |
"backfills_in_flight": [], | |
"pull_from_peer": [], | |
"pushing": []}, | |
"scrub": { "scrubber.epoch_start": "0", | |
"scrubber.active": 0, | |
"scrubber.block_writes": 0, | |
"scrubber.finalizing": 0, | |
"scrubber.waiting_on": 0, | |
"scrubber.waiting_on_whom": []}}, | |
{ "name": "Started", | |
"enter_time": "2013-08-02 14:26:49.990166"}]} | |
: root@ineri[leinen]; ceph pg 0.cfa mark_unfound_lost revert | |
pg has 2 objects but we haven't probed all sources, not marking lost | |
: 22root@ineri[leinen]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment