Skip to content

Instantly share code, notes, and snippets.

@laino
Last active July 23, 2016 23:26
Show Gist options
  • Save laino/fa00b77604404458dc634e959e325072 to your computer and use it in GitHub Desktop.
Save laino/fa00b77604404458dc634e959e325072 to your computer and use it in GitHub Desktop.
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 7731a82..301e395 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -662,7 +662,10 @@ static void wined3d_cs_exec_clear(struct wined3d_cs *cs, const void *data)
* In Counter-Strike: Source a frame difference of 3 causes noticable
* input delay that makes the game unplayable. */
while (pending > 1)
+ {
+ wined3d_cs_mt_yield();
pending = InterlockedCompareExchange(&cs->pending_presents, 0, 0);
+ }
}
static UINT wined3d_cs_exec_clear(struct wined3d_cs *cs, const void *data)
@@ -3285,7 +3288,8 @@ static void wined3d_cs_mt_finish(struct wined3d_cs *cs)
/* A busy wait should be fine, we're not supposed to have to wait very
* long. */
- while (!InterlockedCompareExchange(&fence, TRUE, TRUE));
+ while (!InterlockedCompareExchange(&fence, TRUE, TRUE))
+ wined3d_cs_mt_yield();
}
static void wined3d_cs_mt_finish_prio(struct wined3d_cs *cs)
@@ -3307,7 +3311,8 @@ static void wined3d_cs_mt_finish_prio(struct wined3d_cs *cs)
/* A busy wait should be fine, we're not supposed to have to wait very
* long. */
- while (!InterlockedCompareExchange(&fence, TRUE, TRUE));
+ while (!InterlockedCompareExchange(&fence, TRUE, TRUE))
+ wined3d_cs_mt_yield();
}
static const struct wined3d_cs_ops wined3d_cs_mt_ops =
@@ -3425,7 +3430,9 @@ static DWORD WINAPI wined3d_cs_run(void *thread_param)
LONG tail;
char poll = 0;
struct wined3d_cs_queue *queue;
- unsigned int spin_count = 0;
+
+ LARGE_INTEGER spin_time, now, diff;
+ spin_time.QuadPart = 0;
TRACE("Started.\n");
@@ -3448,19 +3455,37 @@ static DWORD WINAPI wined3d_cs_run(void *thread_param)
else if (!queue_is_empty(&cs->queue))
{
queue = &cs->queue;
- if (!queue_is_empty(&cs->prio_queue))
- queue = &cs->prio_queue;
+ }
+ else if (poll != 0)
+ {
+ poll = 10;
+ continue;
}
else
{
- spin_count++;
- if (spin_count >= WINED3D_CS_SPIN_COUNT && list_empty(&cs->query_poll_list))
+ NtQuerySystemTime(&now);
+
+ if (spin_time.QuadPart == 0)
+ {
+ spin_time.QuadPart = now.QuadPart;
+ }
+
+ diff.QuadPart = now.QuadPart - spin_time.QuadPart;
+
+ if (diff.QuadPart < WINED3D_CS_SPIN_TIME ||
+ !list_empty(&cs->query_poll_list))
+ {
+ wined3d_cs_mt_yield();
+ }
+ else
+ {
wined3d_cs_wait_event(cs);
+ }
continue;
}
- spin_count = 0;
+ spin_time.QuadPart = 0;
tail = queue->tail;
opcode = *(const enum wined3d_cs_op *)&queue->data[tail];
diff --git a/dlls/wined3d/wined3d_main.c b/dlls/wined3d/wined3d_main.c
index ac6aa6e..f262fca 100644
--- a/dlls/wined3d/wined3d_main.c
+++ b/dlls/wined3d/wined3d_main.c
@@ -93,6 +93,7 @@ struct wined3d_settings wined3d_settings =
FALSE, /* 3D support enabled by default. */
#if defined(STAGING_CSMT)
TRUE, /* Multithreaded CS by default. */
+ 2, /* 0 = Busy wait, 1 = usleep, 2 = SwitchToThread */
#endif /* STAGING_CSMT */
};
@@ -346,6 +347,22 @@ static BOOL wined3d_dll_init(HINSTANCE hInstDLL)
TRACE("Disabling multithreaded command stream.\n");
wined3d_settings.cs_multithreaded = FALSE;
}
+ if (!get_config_key(hkey, appkey, "CSMTYield", buffer, size))
+ {
+ if (!strcmp(buffer, "nothing"))
+ {
+ TRACE("CSMT will use busy-wait loops.\n");
+ wined3d_settings.cs_multithreaded_yield = 0;
+ } else if (!strcmp(buffer, "usleep"))
+ {
+ TRACE("CSMT will use usleep in wait loops.\n");
+ wined3d_settings.cs_multithreaded_yield = 1;
+ } else if (!strcmp(buffer, "SwitchToThread"))
+ {
+ TRACE("CSMT will use SwitchToThread in wait loops.\n");
+ wined3d_settings.cs_multithreaded_yield = 2;
+ }
+ }
}
FIXME_(winediag)("Experimental wined3d CSMT feature is currently %s.\n",
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 199731d..dee3a78 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -315,6 +315,7 @@ struct wined3d_settings
BOOL no_3d;
#if defined(STAGING_CSMT)
BOOL cs_multithreaded;
+ int cs_multithreaded_yield;
#endif /* STAGING_CSMT */
};
@@ -2667,9 +2668,26 @@ static inline void wined3d_resource_release(struct wined3d_resource *resource)
InterlockedDecrement(&resource->access_count);
}
+#if defined(STAGING_CSMT)
+static inline void wined3d_cs_mt_yield(void)
+{
+ int yield_strategy = wined3d_settings.cs_multithreaded_yield;
+
+ if (yield_strategy == 1)
+ usleep(0);
+ else if (yield_strategy == 2)
+ SwitchToThread();
+}
+#endif /* STAGING_CSMT */
+
static inline void wined3d_resource_wait_idle(struct wined3d_resource *resource)
{
- while (InterlockedCompareExchange(&resource->access_count, 0, 0));
+ while (InterlockedCompareExchange(&resource->access_count, 0, 0))
+ {
+#if defined(STAGING_CSMT)
+ wined3d_cs_mt_yield();
+#endif /* STAGING_CSMT */
+ }
}
void resource_cleanup(struct wined3d_resource *resource) DECLSPEC_HIDDEN;
@@ -3167,7 +3185,7 @@ struct wined3d_cs_list
};
#define WINED3D_CS_QUEUE_SIZE 0x100000
-#define WINED3D_CS_SPIN_COUNT 10000000
+#define WINED3D_CS_SPIN_TIME 100000
struct wined3d_cs_queue
{
@laino
Copy link
Author

laino commented Jul 23, 2016

I also added a yield to the main loop, while making sure that we call "poll_queries" at once before calling yield.

The default is now "SwitchToThread", which is basically sched_yield and the same thing the nvidia driver does. On laptops and in other situations where there are limiting factors like TDP, "usleep" might yield better overall performance.

HKCU/Software/Wine/Direct3D/CSMTYield can be set to one of the following values to control behavior:
nothing - this is the old behavior, use busy-waits
usleep - use usleep(0), recommended for machines that hit their TDP
SwitchToThread - this is basically sched_yield() on Linux

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment