drm/amdgpu: fix the issue of reserving bad pages failed

In amdgpu_ras_reset_gpu, because bad pages may not be freed,
it has high probability to reserve bad pages failed.

Change to reserve bad pages when freeing VRAM.

v2:
1. avoid allocating the drm_mm node outside of amdgpu_vram_mgr.c
2. move bad page reserving into amdgpu_ras_add_bad_pages, if vram mgr
   reserve bad page failed, it will put it into pending list, otherwise
   put it into processed list;
3. remove amdgpu_ras_release_bad_pages, because retired page's info has
   been moved into amdgpu_vram_mgr

v3:
1. formate code style;
2. rename amdgpu_vram_reserve_scope as amdgpu_vram_reservation;
3. rename scope_pending as reservations_pending;
4. rename scope_processed as reserved_pages;
5. change to iterate over all the pending ones and try to insert them
   with drm_mm_reserve_node();

v4:
1. rename amdgpu_vram_mgr_reserve_scope as
amdgpu_vram_mgr_reserve_range;
2. remove unused include "amdgpu_ras.h";
3. rename amdgpu_vram_mgr_check_and_reserve as
amdgpu_vram_mgr_do_reserve;
4. refine amdgpu_vram_mgr_reserve_range to call
amdgpu_vram_mgr_do_reserve.

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Hawking Zhang <hawking.zhang@amd.com>
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Dennis Li
2020-10-22 17:44:55 +08:00
committed by Alex Deucher
parent 5eeb45934c
commit 676deb3877
4 changed files with 156 additions and 119 deletions

View File

@@ -187,6 +187,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
drm_mm_init(&mgr->mm, 0, man->size);
spin_lock_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
/* Add the two VRAM-related sysfs files */
ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
@@ -211,6 +213,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
struct amdgpu_vram_reservation *rsv, *temp;
ttm_resource_manager_set_used(man, false);
@@ -219,6 +222,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
return;
spin_lock(&mgr->lock);
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
drm_mm_remove_node(&rsv->mm_node);
kfree(rsv);
}
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
@@ -277,6 +287,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
return usage;
}
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct drm_mm *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
uint64_t vis_usage;
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
if (drm_mm_reserve_node(mm, &rsv->mm_node))
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Successed\n",
rsv->mm_node.start, rsv->mm_node.size);
vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
atomic64_add(vis_usage, &mgr->vis_usage);
atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
list_move(&rsv->node, &mgr->reserved_pages);
}
}
/**
* amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
*
* @man: TTM memory type manager
* @start: start address of the range in VRAM
* @size: size of the range
*
* Reserve memory from start addess with the specified size in VRAM
*/
int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
uint64_t start, uint64_t size)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
if (!rsv)
return -ENOMEM;
INIT_LIST_HEAD(&rsv->node);
rsv->mm_node.start = start >> PAGE_SHIFT;
rsv->mm_node.size = size >> PAGE_SHIFT;
spin_lock(&mgr->lock);
list_add_tail(&mgr->reservations_pending, &rsv->node);
amdgpu_vram_mgr_do_reserve(man);
spin_unlock(&mgr->lock);
return 0;
}
/**
* amdgpu_vram_mgr_query_page_status - query the reservation status
*
* @man: TTM memory type manager
* @start: start address of a page in VRAM
*
* Returns:
* -EBUSY: the page is still hold and in pending list
* 0: the page has been reserved
* -ENOENT: the input page is not a reservation
*/
int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
uint64_t start)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
int ret;
spin_lock(&mgr->lock);
list_for_each_entry(rsv, &mgr->reservations_pending, node) {
if ((rsv->mm_node.start <= start) &&
(start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = -EBUSY;
goto out;
}
}
list_for_each_entry(rsv, &mgr->reserved_pages, node) {
if ((rsv->mm_node.start <= start) &&
(start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = 0;
goto out;
}
}
ret = -ENOENT;
out:
spin_unlock(&mgr->lock);
return ret;
}
/**
* amdgpu_vram_mgr_virt_start - update virtual start address
*
@@ -447,6 +552,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
++nodes;
}
amdgpu_vram_mgr_do_reserve(man);
spin_unlock(&mgr->lock);
atomic64_sub(usage, &mgr->usage);