Skip to content

Instantly share code, notes, and snippets.

@sleinen
Created August 4, 2013 19:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sleinen/6151553 to your computer and use it in GitHub Desktop.
Save sleinen/6151553 to your computer and use it in GitHub Desktop.
We recently lost four OSDs (8,9,10,11) in the same server of our 64-OSD/10-server cluster. After reformatting the file systems, two objects remain unfound. Unfortunately it is not possible to declare them as "lost", because osd.9 remains in "querying" state (see line 123). Any idea on how to get this unstuck? I have already tried restarting the …
: root@ineri[leinen]; ceph health detail
HEALTH_WARN 1 pgs degraded; 1 pgs recovering; 1 pgs stuck unclean; recovery 2158/19171654 degraded (0.011%); 2/9585827 unfound (0.000%)
pg 0.cfa is stuck unclean for 249687.042135, current state active+recovering+degraded, last acting [23,50]
pg 0.cfa is active+recovering+degraded, acting [23,50], 2 unfound
recovery 2158/19171654 degraded (0.011%); 2/9585827 unfound (0.000%)
: root@ineri[leinen]; ceph pg dump_stuck unclean
ok
pg_stat objects mip degr unf bytes log disklog state state_stamp v reported up acting last_scrub scrub_stamp last_deep_scrub deep_scrub_stamp
0.cfa 2178 2 2158 2 143697053 0 0 active+recovering+degraded 2013-08-02 14:26:53.965345 28074'7610 28074'41570 [23,50] [23,50] 20585'6801 2013-07-28 15:40:53.298786 20585'6801 2013-07-28 15:40:53.298786
: root@ineri[leinen]; ceph pg 0.cfa list_missing
{ "offset": { "oid": "",
"key": "",
"snapid": 0,
"hash": 0,
"max": 0},
"num_missing": 2,
"num_unfound": 2,
"objects": [
{ "oid": { "oid": "100007e0341.00000000",
"key": "",
"snapid": -2,
"hash": 1032543482,
"max": 0},
"need": "25004'7085",
"have": "0'0",
"locations": []},
{ "oid": { "oid": "100007e031c.00000000",
"key": "",
"snapid": -2,
"hash": 137927930,
"max": 0},
"need": "25004'7084",
"have": "0'0",
"locations": []}],
"more": 0}
: root@ineri[leinen]; ceph pg 0.cfa query
{ "state": "active+recovering+degraded",
"epoch": 28074,
"up": [
23,
50],
"acting": [
23,
50],
"info": { "pgid": "0.cfa",
"last_update": "28074'7610",
"last_complete": "23686'7083",
"log_tail": "14360'4061",
"last_backfill": "MAX",
"purged_snaps": "[]",
"history": { "epoch_created": 1,
"last_epoch_started": 26037,
"last_epoch_clean": 24810,
"last_epoch_split": 0,
"same_up_since": 26036,
"same_interval_since": 26036,
"same_primary_since": 26036,
"last_scrub": "20585'6801",
"last_scrub_stamp": "2013-07-28 15:40:53.298786",
"last_deep_scrub": "20585'6801",
"last_deep_scrub_stamp": "2013-07-28 15:40:53.298786",
"last_clean_scrub_stamp": "2013-07-28 15:40:53.298786"},
"stats": { "version": "28074'7610",
"reported": "28074'41570",
"state": "active+recovering+degraded",
"last_fresh": "2013-08-04 21:09:57.138534",
"last_change": "2013-08-02 14:26:53.965345",
"last_active": "2013-08-04 21:09:57.138534",
"last_clean": "2013-08-01 23:50:18.414082",
"last_became_active": "2013-05-29 13:10:51.366237",
"last_unstale": "2013-08-04 21:09:57.138534",
"mapping_epoch": 26011,
"log_start": "14360'4061",
"ondisk_log_start": "14360'4061",
"created": 1,
"last_epoch_clean": 24810,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "20585'6801",
"last_scrub_stamp": "2013-07-28 15:40:53.298786",
"last_deep_scrub": "20585'6801",
"last_deep_scrub_stamp": "2013-07-28 15:40:53.298786",
"last_clean_scrub_stamp": "2013-07-28 15:40:53.298786",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": "0",
"stat_sum": { "num_bytes": 143697053,
"num_objects": 2178,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_read": 736,
"num_read_kb": 405270,
"num_write": 7610,
"num_write_kb": 1138964,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 3998,
"num_bytes_recovered": 278803622,
"num_keys_recovered": 0},
"stat_cat_sum": {},
"up": [
23,
50],
"acting": [
23,
50]},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 26037},
"recovery_state": [
{ "name": "Started\/Primary\/Active",
"enter_time": "2013-08-02 14:26:51.676625",
"might_have_unfound": [
{ "osd": 9,
"status": "querying"},
{ "osd": 50,
"status": "already probed"}],
"recovery_progress": { "backfill_target": 50,
"waiting_on_backfill": 0,
"backfill_pos": "96220cfa\/10000799e82.00000000\/head\/\/0",
"backfill_info": { "begin": "0\/\/0\/\/-1",
"end": "0\/\/0\/\/-1",
"objects": []},
"peer_backfill_info": { "begin": "0\/\/0\/\/-1",
"end": "0\/\/0\/\/-1",
"objects": []},
"backfills_in_flight": [],
"pull_from_peer": [],
"pushing": []},
"scrub": { "scrubber.epoch_start": "0",
"scrubber.active": 0,
"scrubber.block_writes": 0,
"scrubber.finalizing": 0,
"scrubber.waiting_on": 0,
"scrubber.waiting_on_whom": []}},
{ "name": "Started",
"enter_time": "2013-08-02 14:26:49.990166"}]}
: root@ineri[leinen]; ceph pg 0.cfa mark_unfound_lost revert
pg has 2 objects but we haven't probed all sources, not marking lost
: 22root@ineri[leinen];
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment