summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-05-15 09:59:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-05-15 09:59:49 -0700
commite7cea7905815ac938e6e90b0cb6b91bcd22f6a15 (patch)
treec39f7337f6c3de37ab3db11805137942f354175d /drivers
parent1ae7efb388540adc1653a51a3bc3b2c9cef5ec1a (diff)
parent1d2a1eb13610a9c8ec95f6f1e02cef55000f28e3 (diff)
downloadlinux-sh-e7cea7905815ac938e6e90b0cb6b91bcd22f6a15.tar.gz
Merge tag 'drm-fixes-2020-05-15' of git://anongit.freedesktop.org/drm/drm
Pull drm fixes from Dave Airlie: "As mentioned last week an i915 PR came in late, but I left it, so the i915 bits of this cover 2 weeks, which is why it's likely a bit larger than usual. Otherwise it's mostly amdgpu fixes, one tegra fix, one meson fix. i915: - Handle idling during i915_gem_evict_something busy loops (Chris) - Mark current submissions with a weak-dependency (Chris) - Propagate error from completed fences (Chris) - Fixes on execlist to avoid GPU hang situation (Chris) - Fixes couple deadlocks (Chris) - Timeslice preemption fixes (Chris) - Fix Display Port interrupt handling on Tiger Lake (Imre) - Reduce debug noise around Frame Buffer Compression (Peter) - Fix logic around IPC W/a for Coffee Lake and Kaby Lake (Sultan) - Avoid dereferencing a dead context (Chris) tegra: - tegra120/4 smmu fixes amdgpu: - Clockgating fixes - Fix fbdev with scatter/gather display - S4 fix for navi - Soft recovery for gfx10 - Freesync fixes - Atomic check cursor fix - Add a gfxoff quirk - MST fix amdkfd: - Fix GEM reference counting meson: - error code propogation fix" * tag 'drm-fixes-2020-05-15' of git://anongit.freedesktop.org/drm/drm: (29 commits) drm/i915: Handle idling during i915_gem_evict_something busy loops drm/meson: pm resume add return errno branch drm/amd/amdgpu: Update update_config() logic drm/amd/amdgpu: add raven1 part to the gfxoff quirk list drm/i915: Mark concurrent submissions with a weak-dependency drm/i915: Propagate error from completed fences drm/i915/gvt: Fix kernel oops for 3-level ppgtt guest drm/i915/gvt: Init DPLL/DDI vreg for virtual display instead of inheritance. drm/amd/display: add basic atomic check for cursor plane drm/amd/display: Fix vblank and pageflip event handling for FreeSync drm/amdgpu: implement soft_recovery for gfx10 drm/amdgpu: enable hibernate support on Navi1X drm/amdgpu: Use GEM obj reference for KFD BOs drm/amdgpu: force fbdev into vram drm/amd/powerplay: perform PG ungate prior to CG ungate drm/amdgpu: drop unnecessary cancel_delayed_work_sync on PG ungate drm/amdgpu: disable MGCG/MGLS also on gfx CG ungate drm/i915/execlists: Track inflight CCID drm/i915/execlists: Avoid reusing the same logical CCID drm/i915/gem: Remove object_is_locked assertion from unpin_from_display_plane ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c14
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c163
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c10
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c6
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c8
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c120
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c34
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c49
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c10
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c26
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c12
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c16
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c6
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h1
-rw-r--r--drivers/gpu/drm/i915/i915_request.c12
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c6
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.h3
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler_types.h1
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c25
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c2
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c4
-rw-r--r--drivers/gpu/drm/tegra/drm.c3
-rw-r--r--drivers/gpu/host1x/dev.c59
37 files changed, 436 insertions, 274 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2992a49ad4a5..8ac1581a6b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -945,6 +945,7 @@ struct amdgpu_device {
/* s3/s4 mask */
bool in_suspend;
+ bool in_hibernate;
/* record last mm index being written through WREG32*/
unsigned long last_mm_index;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9dff792c9290..6a5b91d23fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1343,7 +1343,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Free the BO*/
- amdgpu_bo_unref(&mem->bo);
+ drm_gem_object_put_unlocked(&mem->bo->tbo.base);
mutex_destroy(&mem->lock);
kfree(mem);
@@ -1688,7 +1688,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
- (*mem)->bo = amdgpu_bo_ref(bo);
+ drm_gem_object_get(&bo->tbo.base);
+ (*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 466bfe541e45..a735d79a717b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1181,7 +1181,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_dev->dev_private;
int r;
+ adev->in_hibernate = true;
r = amdgpu_device_suspend(drm_dev, true);
+ adev->in_hibernate = false;
if (r)
return r;
return amdgpu_asic_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 9ae7b61f696a..25ddb482466a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
u32 cpp;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ AMDGPU_GEM_CREATE_VRAM_CLEARED;
info = drm_get_format_info(adev->ddev, mode_cmd);
cpp = info->cpp[0];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f92c158d89a1..0e0daf0021b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4273,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG /CGLS for GFX 3D Only === */
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === MGCG + MGLS === */
- /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */
+ gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
}
if (adev->cg_flags &
@@ -4353,11 +4353,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
- if (!enable) {
- amdgpu_gfx_off_ctrl(adev, false);
- cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
- } else
- amdgpu_gfx_off_ctrl(adev, true);
+ amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;
@@ -4918,6 +4914,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
+static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+}
+
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5309,6 +5318,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v10_0_ring_soft_recovery,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0c390485bc10..d2d9dce68c2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1236,6 +1236,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
+ /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
{ 0, 0, 0, 0, 0 },
};
@@ -5025,10 +5027,9 @@ static int gfx_v9_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_RAVEN:
case CHIP_RENOIR:
- if (!enable) {
+ if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
- cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
- }
+
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
@@ -5052,12 +5053,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,
amdgpu_gfx_off_ctrl(adev, true);
break;
case CHIP_VEGA12:
- if (!enable) {
- amdgpu_gfx_off_ctrl(adev, false);
- cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
- } else {
- amdgpu_gfx_off_ctrl(adev, true);
- }
+ amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9c83c1303f08..28e651b173ab 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
/**
* dm_crtc_high_irq() - Handles CRTC interrupt
- * @interrupt_params: ignored
+ * @interrupt_params: used for determining the CRTC instance
*
* Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK
* event handler.
@@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params)
unsigned long flags;
acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
-
- if (acrtc) {
- acrtc_state = to_dm_crtc_state(acrtc->base.state);
-
- DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",
- acrtc->crtc_id,
- amdgpu_dm_vrr_active(acrtc_state));
-
- /* Core vblank handling at start of front-porch is only possible
- * in non-vrr mode, as only there vblank timestamping will give
- * valid results while done in front-porch. Otherwise defer it
- * to dm_vupdate_high_irq after end of front-porch.
- */
- if (!amdgpu_dm_vrr_active(acrtc_state))
- drm_crtc_handle_vblank(&acrtc->base);
-
- /* Following stuff must happen at start of vblank, for crc
- * computation and below-the-range btr support in vrr mode.
- */
- amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
-
- if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI &&
- acrtc_state->vrr_params.supported &&
- acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
- spin_lock_irqsave(&adev->ddev->event_lock, flags);
- mod_freesync_handle_v_update(
- adev->dm.freesync_module,
- acrtc_state->stream,
- &acrtc_state->vrr_params);
-
- dc_stream_adjust_vmin_vmax(
- adev->dm.dc,
- acrtc_state->stream,
- &acrtc_state->vrr_params.adjust);
- spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
- }
- }
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-/**
- * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs
- * @interrupt params - interrupt parameters
- *
- * Notify DRM's vblank event handler at VSTARTUP
- *
- * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which:
- * * We are close enough to VUPDATE - the point of no return for hw
- * * We are in the fixed portion of variable front porch when vrr is enabled
- * * We are before VUPDATE, where double-buffered vrr registers are swapped
- *
- * It is therefore the correct place to signal vblank, send user flip events,
- * and update VRR.
- */
-static void dm_dcn_crtc_high_irq(void *interrupt_params)
-{
- struct common_irq_params *irq_params = interrupt_params;
- struct amdgpu_device *adev = irq_params->adev;
- struct amdgpu_crtc *acrtc;
- struct dm_crtc_state *acrtc_state;
- unsigned long flags;
-
- acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
-
if (!acrtc)
return;
@@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
amdgpu_dm_vrr_active(acrtc_state),
acrtc_state->active_planes);
+ /**
+ * Core vblank handling at start of front-porch is only possible
+ * in non-vrr mode, as only there vblank timestamping will give
+ * valid results while done in front-porch. Otherwise defer it
+ * to dm_vupdate_high_irq after end of front-porch.
+ */
+ if (!amdgpu_dm_vrr_active(acrtc_state))
+ drm_crtc_handle_vblank(&acrtc->base);
+
+ /**
+ * Following stuff must happen at start of vblank, for crc
+ * computation and below-the-range btr support in vrr mode.
+ */
amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
- drm_crtc_handle_vblank(&acrtc->base);
+
+ /* BTR updates need to happen before VUPDATE on Vega and above. */
+ if (adev->family < AMDGPU_FAMILY_AI)
+ return;
spin_lock_irqsave(&adev->ddev->event_lock, flags);
- if (acrtc_state->vrr_params.supported &&
+ if (acrtc_state->stream && acrtc_state->vrr_params.supported &&
acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
- mod_freesync_handle_v_update(
- adev->dm.freesync_module,
- acrtc_state->stream,
- &acrtc_state->vrr_params);
+ mod_freesync_handle_v_update(adev->dm.freesync_module,
+ acrtc_state->stream,
+ &acrtc_state->vrr_params);
- dc_stream_adjust_vmin_vmax(
- adev->dm.dc,
- acrtc_state->stream,
- &acrtc_state->vrr_params.adjust);
+ dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream,
+ &acrtc_state->vrr_params.adjust);
}
/*
@@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
* avoid race conditions between flip programming and completion,
* which could cause too early flip completion events.
*/
- if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
+ if (adev->family >= AMDGPU_FAMILY_RV &&
+ acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
acrtc_state->active_planes == 0) {
if (acrtc->event) {
drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
@@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
}
-#endif
static int dm_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
@@ -2445,8 +2394,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
+ amdgpu_dm_irq_register_interrupt(
+ adev, &int_params, dm_crtc_high_irq, c_irq_params);
+ }
+
+ /* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to
+ * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx
+ * to trigger at end of each vblank, regardless of state of the lock,
+ * matching DCE behaviour.
+ */
+ for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT;
+ i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1;
+ i++) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
+
+ if (r) {
+ DRM_ERROR("Failed to add vupdate irq id!\n");
+ return r;
+ }
+
+ int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
+ int_params.irq_source =
+ dc_interrupt_to_irq_source(dc, i, 0);
+
+ c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
+
+ c_irq_params->adev = adev;
+ c_irq_params->irq_src = int_params.irq_source;
+
amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_dcn_crtc_high_irq, c_irq_params);
+ dm_vupdate_high_irq, c_irq_params);
}
/* Use GRPH_PFLIP interrupt */
@@ -4453,10 +4430,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
struct amdgpu_device *adev = crtc->dev->dev_private;
int rc;
- /* Do not set vupdate for DCN hardware */
- if (adev->family > AMDGPU_FAMILY_AI)
- return 0;
-
irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
@@ -7882,6 +7855,7 @@ static int dm_update_plane_state(struct dc *dc,
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;
struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state;
+ struct amdgpu_crtc *new_acrtc;
bool needs_reset;
int ret = 0;
@@ -7891,9 +7865,30 @@ static int dm_update_plane_state(struct dc *dc,
dm_new_plane_state = to_dm_plane_state(new_plane_state);
dm_old_plane_state = to_dm_plane_state(old_plane_state);
- /*TODO Implement atomic check for cursor plane */
- if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ /*TODO Implement better atomic check for cursor plane */
+ if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+ if (!enable || !new_plane_crtc ||
+ drm_atomic_plane_disabling(plane->state, new_plane_state))
+ return 0;
+
+ new_acrtc = to_amdgpu_crtc(new_plane_crtc);
+
+ if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) ||
+ (new_plane_state->crtc_h > new_acrtc->max_cursor_height)) {
+ DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n",
+ new_plane_state->crtc_w, new_plane_state->crtc_h);
+ return -EINVAL;
+ }
+
+ if (new_plane_state->crtc_x <= -new_acrtc->max_cursor_width ||
+ new_plane_state->crtc_y <= -new_acrtc->max_cursor_height) {
+ DRM_DEBUG_ATOMIC("Bad cursor position %d, %d\n",
+ new_plane_state->crtc_x, new_plane_state->crtc_y);
+ return -EINVAL;
+ }
+
return 0;
+ }
needs_reset = should_reset_plane(state, plane, old_plane_state,
new_plane_state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 78e1c11d4ae5..dcf84a61de37 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
struct mod_hdcp_display *display = &hdcp_work[link_index].display;
struct mod_hdcp_link *link = &hdcp_work[link_index].link;
- memset(display, 0, sizeof(*display));
- memset(link, 0, sizeof(*link));
-
- display->index = aconnector->base.index;
-
if (config->dpms_off) {
hdcp_remove_display(hdcp_work, link_index, aconnector);
return;
}
+
+ memset(display, 0, sizeof(*display));
+ memset(link, 0, sizeof(*link));
+
+ display->index = aconnector->base.index;
display->state = MOD_HDCP_DISPLAY_ACTIVE;
if (aconnector->dc_sink != NULL)
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index e4e5a53b2b4e..8e2acb4df860 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -319,12 +319,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr,
if (*level & profile_mode_mask) {
hwmgr->saved_dpm_level = hwmgr->dpm_level;
hwmgr->en_umd_pstate = true;
- amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
+ AMD_IP_BLOCK_TYPE_GFX,
+ AMD_CG_STATE_UNGATE);
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index e8b27fab6aa1..e77046931e4c 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1476,7 +1476,7 @@ static int smu_disable_dpm(struct smu_context *smu)
bool use_baco = !smu->is_apu &&
((adev->in_gpu_reset &&
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
- (adev->in_runpm && amdgpu_asic_supports_baco(adev)));
+ ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
ret = smu_get_smc_version(smu, NULL, &smu_version);
if (ret) {
@@ -1744,12 +1744,12 @@ static int smu_enable_umd_pstate(void *handle,
if (*level & profile_mode_mask) {
smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level;
smu_dpm_ctx->enable_umd_pstate = true;
- amdgpu_device_ip_set_clockgating_state(smu->adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(smu->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_clockgating_state(smu->adev,
+ AMD_IP_BLOCK_TYPE_GFX,
+ AMD_CG_STATE_UNGATE);
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 2e5d835a9eaa..c125ca9ab9b3 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
if (!ret)
goto err_llb;
else if (ret > 1) {
- DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
-
+ DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
}
fbc->threshold = ret;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 0cc40e77bbd2..4f96c8788a2e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
if (!atomic_read(&obj->bind_count))
return;
@@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
void
i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
{
- struct drm_i915_gem_object *obj = vma->obj;
-
- assert_object_held(obj);
-
/* Bump the LRU to try and avoid premature eviction whilst flipping */
- i915_gem_object_bump_inactive_ggtt(obj);
+ i915_gem_object_bump_inactive_ggtt(vma->obj);
i915_vma_unpin(vma);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 07cb83a0d017..ca0d4f4f3615 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -69,7 +69,13 @@ struct intel_context {
#define CONTEXT_NOPREEMPT 7
u32 *lrc_reg_state;
- u64 lrc_desc;
+ union {
+ struct {
+ u32 lrca;
+ u32 ccid;
+ };
+ u64 desc;
+ } lrc;
u32 tag; /* cookie passed to HW to track this context on submission */
/* Time on GPU as tracked by the hw. */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index b469de0dd9b6..a1aa0d3e8be1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
-static inline bool
-intel_engine_has_timeslices(const struct intel_engine_cs *engine)
-{
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
- return false;
-
- return intel_engine_has_semaphores(engine);
-}
-
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3aa8a652c16d..883a9b7fe88d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+ if (HAS_EXECLISTS(dev_priv)) {
+ drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+ drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+ }
drm_printf(m, "\tRING_START: 0x%08x\n",
ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 80cdde712842..0be674ae1cf6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -157,6 +157,20 @@ struct intel_engine_execlists {
struct i915_priolist default_priolist;
/**
+ * @ccid: identifier for contexts submitted to this engine
+ */
+ u32 ccid;
+
+ /**
+ * @yield: CCID at the time of the last semaphore-wait interrupt.
+ *
+ * Instead of leaving a semaphore busy-spinning on an engine, we would
+ * like to switch to another ready context, i.e. yielding the semaphore
+ * timeslice.
+ */
+ u32 yield;
+
+ /**
* @error_interrupt: CS Master EIR
*
* The CS generates an interrupt when it detects an error. We capture
@@ -295,8 +309,7 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
- unsigned int context_tag;
-#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
+ unsigned long context_tag;
struct rb_node uabi_node;
@@ -483,10 +496,11 @@ struct intel_engine_cs {
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
-#define I915_ENGINE_IS_VIRTUAL BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_TIMESLICES BIT(4)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
+#define I915_ENGINE_IS_VIRTUAL BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
unsigned int flags;
/*
@@ -585,6 +599,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
}
static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return engine->flags & I915_ENGINE_HAS_TIMESLICES;
+}
+
+static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
{
return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..0cc7dd54f4f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
+ if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+ WRITE_ONCE(engine->execlists.yield,
+ ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+ ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+ engine->execlists.yield);
+ if (del_timer(&engine->execlists.timer))
+ tasklet = true;
+ }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
@@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT;
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT;
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 683014e7bc51..2dfaddb8811e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev,
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/
-static u64
+static u32
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- u64 desc;
+ u32 desc;
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
@@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
- desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
- /*
- * The following 32bits are copied into the OA reports (dword 2).
- * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
- * anything below.
- */
- if (INTEL_GEN(engine->i915) >= 11) {
- desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
- /* bits 48-53 */
-
- desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
- /* bits 61-63 */
- }
-
- return desc;
+ return i915_ggtt_offset(ce->state) | desc;
}
static inline unsigned int dword_in_page(void *addr)
@@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq,
__execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static u32 intel_context_get_runtime(const struct intel_context *ce)
@@ -1251,18 +1237,23 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
- ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
- ce->lrc_desc |= (u64)ce->tag << 32;
+ GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
+ ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc |=
- (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
- GEN11_SW_CTX_ID_SHIFT;
- BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ unsigned int tag = ffs(engine->context_tag);
+
+ GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
+ clear_bit(tag - 1, &engine->context_tag);
+ ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+
+ BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
}
+ ce->lrc.ccid |= engine->execlists.ccid;
+
__intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1302,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static inline void
__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
+ struct intel_engine_cs * const engine,
+ unsigned int ccid)
{
struct intel_context * const ce = rq->context;
@@ -1320,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq,
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
+ ccid &= GEN12_MAX_CONTEXT_HW_ID;
+ if (ccid < BITS_PER_LONG) {
+ GEM_BUG_ON(ccid == 0);
+ GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
+ set_bit(ccid - 1, &engine->context_tag);
+ }
+
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -1345,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
+ u32 ccid;
trace_i915_request_out(rq);
+ ccid = rq->context->lrc.ccid;
old = READ_ONCE(ce->inflight);
do
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
while (!try_cmpxchg(&ce->inflight, &old, cur));
if (!cur)
- __execlists_schedule_out(rq, old);
+ __execlists_schedule_out(rq, old, ccid);
i915_request_put(rq);
}
@@ -1361,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- u64 desc = ce->lrc_desc;
+ u64 desc = ce->lrc.desc;
u32 tail, prev;
/*
@@ -1400,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq)
*/
wmb();
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1719,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
@@ -1754,7 +1759,8 @@ static void defer_active(struct intel_engine_cs *engine)
}
static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
int hint;
@@ -1768,6 +1774,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
return hint >= effective_prio(rq);
}
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ /*
+ * Once bitten, forever smitten!
+ *
+ * If the active context ever busy-waited on a semaphore,
+ * it will be treated as a hog until the end of its timeslice (i.e.
+ * until it is scheduled out and replaced by a new submission,
+ * possibly even its own lite-restore). The HW only sends an interrupt
+ * on the first miss, and we do know if that semaphore has been
+ * signaled, or even if it is now stuck on another semaphore. Play
+ * safe, yield if it might be stuck -- it will be given a fresh
+ * timeslice in the near future.
+ */
+ return rq->context->lrc.ccid == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
static int
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
{
@@ -1783,8 +1815,7 @@ timeslice(const struct intel_engine_cs *engine)
return READ_ONCE(engine->props.timeslice_duration_ms);
}
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
{
const struct intel_engine_execlists *execlists = &engine->execlists;
const struct i915_request *rq = *execlists->active;
@@ -1946,13 +1977,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = NULL;
} else if (need_timeslice(engine, last) &&
- timer_expired(&engine->execlists.timer)) {
+ timeslice_expired(execlists, last)) {
ENGINE_TRACE(engine,
- "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
last->fence.seqno,
last->sched.attr.priority,
- execlists->queue_priority_hint);
+ execlists->queue_priority_hint,
+ yesno(timeslice_yield(execlists, last)));
ring_set_paused(engine, 1);
defer_active(engine);
@@ -2213,6 +2245,7 @@ done:
}
clear_ports(port + 1, last_port - port);
+ WRITE_ONCE(execlists->yield, -1);
execlists_submit_ports(engine);
set_preempt_timeout(engine, *active);
} else {
@@ -3043,7 +3076,7 @@ __execlists_context_pin(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
+ ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3072,7 +3105,7 @@ static void execlists_context_reset(struct intel_context *ce)
ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -3541,7 +3574,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
enable_error_interrupt(engine);
- engine->context_tag = 0;
+ engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -3753,7 +3786,7 @@ out_replay:
head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine, head);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -4369,8 +4402,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ }
}
if (INTEL_GEN(engine->i915) >= 12)
@@ -4449,6 +4485,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+ engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4516,6 +4553,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
+ if (INTEL_GEN(engine->i915) >= 11) {
+ execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+ execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
+ }
+
reset_csb_pointers(engine);
/* Finally, take ownership and responsibility for cleanup! */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f06ba750a0a..f95ae15ce865 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce,
goto err;
}
- cs = intel_ring_begin(rq, 10);
+ cs = intel_ring_begin(rq, 14);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err;
@@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce,
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_NEQ_SDD;
- *cs++ = 0;
+ MI_SEMAPHORE_SAD_GTE_SDD;
+ *cs++ = idx;
*cs++ = offset;
*cs++ = 0;
@@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce,
*cs++ = offset + idx * sizeof(u32);
*cs++ = 0;
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = offset;
+ *cs++ = 0;
+ *cs++ = idx + 1;
+
intel_ring_advance(rq, cs);
rq->sched.attr.priority = I915_PRIORITY_MASK;
@@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg)
for_each_engine(engine, gt, id) {
enum { A1, A2, B1 };
- enum { X = 1, Y, Z };
+ enum { X = 1, Z, Y };
struct i915_request *rq[3] = {};
struct intel_context *ce;
unsigned long heartbeat;
@@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[0] = create_rewinder(ce, NULL, slot, 1);
+ rq[0] = create_rewinder(ce, NULL, slot, X);
if (IS_ERR(rq[0])) {
intel_context_put(ce);
goto err;
}
- rq[1] = create_rewinder(ce, NULL, slot, 2);
+ rq[1] = create_rewinder(ce, NULL, slot, Y);
intel_context_put(ce);
if (IS_ERR(rq[1]))
goto err;
@@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[2] = create_rewinder(ce, rq[0], slot, 3);
+ rq[2] = create_rewinder(ce, rq[0], slot, Z);
intel_context_put(ce);
if (IS_ERR(rq[2]))
goto err;
@@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg)
GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
- GEM_BUG_ON(!i915_request_is_active(rq[A1]));
- GEM_BUG_ON(!i915_request_is_active(rq[A2]));
- GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
- /* Wait for the timeslice to kick in */
- del_timer(&engine->execlists.timer);
- tasklet_hi_schedule(&engine->execlists.tasklet);
- intel_engine_flush_submission(engine);
-
+ if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
+ /* Wait for the timeslice to kick in */
+ del_timer(&engine->execlists.timer);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+ intel_engine_flush_submission(engine);
+ }
/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index fe7778c28d2d..aa6d56e25a10 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc,
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
+ u32 ctx_desc = rq->context->lrc.ccid;
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
guc_wq_item_append(guc, engine->guc_id, ctx_desc,
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index a83df2f84eb9..a1696e9ce4b6 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -208,14 +208,41 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
- vgpu_vreg_t(vgpu, LCPLL1_CTL) |=
- LCPLL_PLL_ENABLE |
- LCPLL_PLL_LOCK;
- vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
-
+ /*
+ * Only 1 PIPE enabled in current vGPU display and PIPE_A is
+ * tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
+ * TRANSCODER_A can be enabled. PORT_x depends on the input of
+ * setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x
+ * so we fixed to DPLL0 here.
+ * Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode
+ */
+ vgpu_vreg_t(vgpu, DPLL_CTRL1) =
+ DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0);
+ vgpu_vreg_t(vgpu, DPLL_CTRL1) |=
+ DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0);
+ vgpu_vreg_t(vgpu, LCPLL1_CTL) =
+ LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK;
+ vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0);
+ /*
+ * Golden M/N are calculated based on:
+ * 24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
+ * DP link clk 1620 MHz and non-constant_n.
+ * TODO: calculate DP link symbol clk and stream clk m/n.
+ */
+ vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT;
+ vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
+ vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
+ vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
+ vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+ ~DPLL_CTRL2_DDI_CLK_OFF(PORT_B);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B);
vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@@ -236,6 +263,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+ ~DPLL_CTRL2_DDI_CLK_OFF(PORT_C);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C);
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@@ -256,6 +289,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+ ~DPLL_CTRL2_DDI_CLK_OFF(PORT_D);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D);
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index cb11c3184085..e92ed96c9b23 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -290,7 +290,7 @@ static void
shadow_context_descriptor_update(struct intel_context *ce,
struct intel_vgpu_workload *workload)
{
- u64 desc = ce->lrc_desc;
+ u64 desc = ce->lrc.desc;
/*
* Update bits 0-11 of the context descriptor which includes flags
@@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce,
desc |= (u64)workload->ctx_desc.addressing_mode <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
- ce->lrc_desc = desc;
+ ce->lrc.desc = desc;
}
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
@@ -379,7 +379,11 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
-
+ /* skip now as current i915 ppgtt alloc won't allocate
+ top level pdp for non 4-level table, won't impact
+ shadow ppgtt. */
+ if (!pd)
+ break;
px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
}
}
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 4518b9b35c3d..02ad1acd117c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -128,6 +128,13 @@ search_again:
active = NULL;
INIT_LIST_HEAD(&eviction_list);
list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+ if (vma == active) { /* now seen this vma twice */
+ if (flags & PIN_NONBLOCK)
+ break;
+
+ active = ERR_PTR(-EAGAIN);
+ }
+
/*
* We keep this list in a rough least-recently scanned order
* of active elements (inactive elements are cheap to reap).
@@ -143,21 +150,12 @@ search_again:
* To notice when we complete one full cycle, we record the
* first active element seen, before moving it to the tail.
*/
- if (i915_vma_is_active(vma)) {
- if (vma == active) {
- if (flags & PIN_NONBLOCK)
- break;
-
- active = ERR_PTR(-EAGAIN);
- }
-
- if (active != ERR_PTR(-EAGAIN)) {
- if (!active)
- active = vma;
+ if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) {
+ if (!active)
+ active = vma;
- list_move_tail(&vma->vm_link, &vm->bound_list);
- continue;
- }
+ list_move_tail(&vma->vm_link, &vm->bound_list);
+ continue;
}
if (mark_free(&scan, vma, flags, &eviction_list))
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2a4cd0ba5464..5c8e51d2ba5b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
static void record_request(const struct i915_request *request,
struct i915_request_coredump *erq)
{
- const struct i915_gem_context *ctx;
-
erq->flags = request->fence.flags;
erq->context = request->fence.context;
erq->seqno = request->fence.seqno;
@@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request,
erq->pid = 0;
rcu_read_lock();
- ctx = rcu_dereference(request->context->gem_context);
- if (ctx)
- erq->pid = pid_nr(ctx->pid);
+ if (!intel_context_is_closed(request->context)) {
+ const struct i915_gem_context *ctx;
+
+ ctx = rcu_dereference(request->context->gem_context);
+ if (ctx)
+ erq->pid = pid_nr(ctx->pid);
+ }
rcu_read_unlock();
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d91557d842dc..8a2b83807ffc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) |
GEN8_PIPE_CDCLK_CRC_DONE;
u32 de_pipe_enables;
- u32 de_port_masked = GEN8_AUX_CHANNEL_A;
+ u32 de_port_masked = gen8_de_port_aux_mask(dev_priv);
u32 de_port_enables;
u32 de_misc_masked = GEN8_DE_EDP_PSR;
enum pipe pipe;
@@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
if (INTEL_GEN(dev_priv) <= 10)
de_misc_masked |= GEN8_DE_MISC_GSE;
- if (INTEL_GEN(dev_priv) >= 9) {
- de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
- GEN9_AUX_CHANNEL_D;
- if (IS_GEN9_LP(dev_priv))
- de_port_masked |= BXT_DE_PORT_GMBUS;
- }
-
- if (INTEL_GEN(dev_priv) >= 11)
- de_port_masked |= ICL_AUX_CHANNEL_E;
-
- if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11)
- de_port_masked |= CNL_AUX_CHANNEL_F;
+ if (IS_GEN9_LP(dev_priv))
+ de_port_masked |= BXT_DE_PORT_GMBUS;
de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK |
GEN8_PIPE_FIFO_UNDERRUN;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 66a46e41d5ef..cf2c01f17da8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* dropped by GuC. They won't be part of the context
* ID in the OA reports, so squash those lower bits.
*/
- stream->specific_ctx_id =
- lower_32_bits(ce->lrc_desc) >> 12;
+ stream->specific_ctx_id = ce->lrc.lrca >> 12;
/*
* GuC uses the top bit to signal proxy submission, so
@@ -1328,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
/*
* Pick an unused context id
- * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts
+ * 0 - BITS_PER_LONG are used by other contexts
* GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
*/
stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
break;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e0c6021fdaf9..6e12000c4b6b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GT_BSD_CS_ERROR_INTERRUPT (1 << 15)
#define GT_BSD_USER_INTERRUPT (1 << 12)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) /* bdw+ */
#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c0df71d7d0ff..e2b78db685ea 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1017,11 +1017,15 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
GEM_BUG_ON(to == from);
GEM_BUG_ON(to->timeline == from->timeline);
- if (i915_request_completed(from))
+ if (i915_request_completed(from)) {
+ i915_sw_fence_set_error_once(&to->submit, from->fence.error);
return 0;
+ }
if (to->engine->schedule) {
- ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
+ ret = i915_sched_node_add_dependency(&to->sched,
+ &from->sched,
+ I915_DEPENDENCY_EXTERNAL);
if (ret < 0)
return ret;
}
@@ -1183,7 +1187,9 @@ __i915_request_await_execution(struct i915_request *to,
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
- err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+ err = i915_sched_node_add_dependency(&to->sched,
+ &from->sched,
+ I915_DEPENDENCY_WEAK);
if (err < 0)
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 68b06a7ba667..f0a9e8958ca0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -456,7 +456,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
}
int i915_sched_node_add_dependency(struct i915_sched_node *node,
- struct i915_sched_node *signal)
+ struct i915_sched_node *signal,
+ unsigned long flags)
{
struct i915_dependency *dep;
@@ -465,8 +466,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
return -ENOMEM;
if (!__i915_sched_node_add_dependency(node, signal, dep,
- I915_DEPENDENCY_EXTERNAL |
- I915_DEPENDENCY_ALLOC))
+ flags | I915_DEPENDENCY_ALLOC))
i915_dependency_free(dep);
return 0;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index d1dc4efef77b..6f0bf00fc569 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
unsigned long flags);
int i915_sched_node_add_dependency(struct i915_sched_node *node,
- struct i915_sched_node *signal);
+ struct i915_sched_node *signal,
+ unsigned long flags);
void i915_sched_node_fini(struct i915_sched_node *node);
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index d18e70550054..7186875088a0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -78,6 +78,7 @@ struct i915_dependency {
unsigned long flags;
#define I915_DEPENDENCY_ALLOC BIT(0)
#define I915_DEPENDENCY_EXTERNAL BIT(1)
+#define I915_DEPENDENCY_WEAK BIT(2)
};
#endif /* _I915_SCHEDULER_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 82e3bc280622..2cd7a7e87c0a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1228,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma)
lockdep_assert_held(&vma->vm->mutex);
- /*
- * First wait upon any activity as retiring the request may
- * have side-effects such as unpinning or even unbinding this vma.
- *
- * XXX Actually waiting under the vm->mutex is a hinderance and
- * should be pipelined wherever possible. In cases where that is
- * unavoidable, we should lift the wait to before the mutex.
- */
- ret = i915_vma_sync(vma);
- if (ret)
- return ret;
-
if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
@@ -1313,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma)
if (!drm_mm_node_allocated(&vma->node))
return 0;
- if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
- /* XXX not always required: nop_clear_range */
- wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
-
/* Optimistic wait before taking the mutex */
err = i915_vma_sync(vma);
if (err)
goto out_rpm;
+ if (i915_vma_is_pinned(vma)) {
+ vma_print_allocator(vma, "is pinned");
+ return -EAGAIN;
+ }
+
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+ /* XXX not always required: nop_clear_range */
+ wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
err = mutex_lock_interruptible(&vm->mutex);
if (err)
goto out_rpm;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 8375054ba27d..a52986a9e7a6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
* WaIncreaseLatencyIPCEnabled: kbl,cfl
* Display WA #1141: kbl,cfl
*/
- if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
+ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
dev_priv->ipc_enabled)
latency += 4;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 58b5f40a07dd..af89c7fc8f59 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -173,7 +173,7 @@ static int igt_vma_create(void *arg)
}
nc = 0;
- for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
+ for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) {
for (; nc < num_ctx; nc++) {
ctx = mock_context(i915, "mock");
if (!ctx)
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index b5f5eb7b4bb9..8c2e1b47e81a 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -412,9 +412,7 @@ static int __maybe_unused meson_drv_pm_resume(struct device *dev)
if (priv->afbcd.ops)
priv->afbcd.ops->init(priv);
- drm_mode_config_helper_resume(priv->drm);
-
- return 0;
+ return drm_mode_config_helper_resume(priv->drm);
}
static int compare_of(struct device *dev, void *data)
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bd268028fb3d..583cd6e0ae27 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
static bool host1x_drm_wants_iommu(struct host1x_device *dev)
{
+ struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
struct iommu_domain *domain;
/*
@@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
* sufficient and whether or not the host1x is attached to an IOMMU
* doesn't matter.
*/
- if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32))
+ if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))
return true;
return domain != NULL;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 388bcc2889aa..d24344e91922 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host)
}
}
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+ /*
+ * If we support addressing a maximum of 32 bits of physical memory
+ * and if the host1x firewall is enabled, there's no need to enable
+ * IOMMU support. This can happen for example on Tegra20, Tegra30
+ * and Tegra114.
+ *
+ * Tegra124 and later can address up to 34 bits of physical memory and
+ * many platforms come equipped with more than 2 GiB of system memory,
+ * which requires crossing the 4 GiB boundary. But there's a catch: on
+ * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+ * only address up to 32 bits of memory in GATHER opcodes, which means
+ * that command buffers need to either be in the first 2 GiB of system
+ * memory (which could quickly lead to memory exhaustion), or command
+ * buffers need to be treated differently from other buffers (which is
+ * not possible with the current ABI).
+ *
+ * A third option is to use the IOMMU in these cases to make sure all
+ * buffers will be mapped into a 32-bit IOVA space that host1x can
+ * address. This allows all of the system memory to be used and works
+ * within the limitations of the host1x on these SoCs.
+ *
+ * In summary, default to enable IOMMU on Tegra124 and later. For any
+ * of the earlier SoCs, only use the IOMMU for additional safety when
+ * the host1x firewall is disabled.
+ */
+ if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+ if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ return false;
+ }
+
+ return true;
+}
+
static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
int err;
/*
- * If the host1x firewall is enabled, there's no need to enable IOMMU
- * support. Similarly, if host1x is already attached to an IOMMU (via
- * the DMA API), don't try to attach again.
+ * We may not always want to enable IOMMU support (for example if the
+ * host1x firewall is already enabled and we don't support addressing
+ * more than 32 bits of physical memory), so check for that first.
+ *
+ * Similarly, if host1x is already attached to an IOMMU (via the DMA
+ * API), don't try to attach again.
*/
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
+ if (!host1x_wants_iommu(host) || domain)
return domain;
host->group = iommu_group_get(host->dev);
@@ -502,6 +540,19 @@ static void __exit tegra_host1x_exit(void)
}
module_exit(tegra_host1x_exit);
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+ return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");