Merge tag 'ib-mfd-gpio-input-pwm-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd into next
Merge an immutable branch between MFD, GPIO, Input and PWM to resolve conflicts for the merge window pull request.
This commit is contained in:
@@ -722,6 +722,13 @@ ForEachMacros:
|
||||
- 'v4l2_m2m_for_each_src_buf'
|
||||
- 'v4l2_m2m_for_each_src_buf_safe'
|
||||
- 'virtio_device_for_each_vq'
|
||||
- 'vkms_config_for_each_connector'
|
||||
- 'vkms_config_for_each_crtc'
|
||||
- 'vkms_config_for_each_encoder'
|
||||
- 'vkms_config_for_each_plane'
|
||||
- 'vkms_config_connector_for_each_possible_encoder'
|
||||
- 'vkms_config_encoder_for_each_possible_crtc'
|
||||
- 'vkms_config_plane_for_each_possible_crtc'
|
||||
- 'while_for_each_ftrace_op'
|
||||
- 'workloads__for_each'
|
||||
- 'xa_for_each'
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -40,6 +40,7 @@
|
||||
*.o
|
||||
*.o.*
|
||||
*.patch
|
||||
*.pyc
|
||||
*.rmeta
|
||||
*.rpm
|
||||
*.rsi
|
||||
|
||||
27
.mailmap
27
.mailmap
@@ -21,7 +21,8 @@ Adam Radford <aradford@gmail.com>
|
||||
Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
|
||||
Adrian Bunk <bunk@stusta.de>
|
||||
Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com>
|
||||
Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org>
|
||||
Akhil P Oommen <akhilpo@oss.qualcomm.com> <akhilpo@codeaurora.org>
|
||||
Akhil P Oommen <akhilpo@oss.qualcomm.com> <quic_akhilpo@quicinc.com>
|
||||
Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
Alan Cox <root@hraefn.swansea.linux.org.uk>
|
||||
Aleksandar Markovic <aleksandar.markovic@mips.com> <aleksandar.markovic@imgtec.com>
|
||||
@@ -102,10 +103,12 @@ Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
|
||||
Arnaud Patard <arnaud.patard@rtp-net.org>
|
||||
Arnd Bergmann <arnd@arndb.de>
|
||||
Arun Kumar Neelakantam <quic_aneela@quicinc.com> <aneela@codeaurora.org>
|
||||
Asahi Lina <lina+kernel@asahilina.net> <lina@asahilina.net>
|
||||
Ashok Raj Nagarajan <quic_arnagara@quicinc.com> <arnagara@codeaurora.org>
|
||||
Ashwin Chaugule <quic_ashwinc@quicinc.com> <ashwinc@codeaurora.org>
|
||||
Asutosh Das <quic_asutoshd@quicinc.com> <asutoshd@codeaurora.org>
|
||||
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
|
||||
Atish Patra <atish.patra@linux.dev> <atishp@atishpatra.org>
|
||||
Atish Patra <atish.patra@linux.dev> <atish.patra@wdc.com>
|
||||
Avaneesh Kumar Dwivedi <quic_akdwived@quicinc.com> <akdwived@codeaurora.org>
|
||||
Axel Dyks <xl@xlsigned.net>
|
||||
Axel Lin <axel.lin@gmail.com>
|
||||
@@ -134,6 +137,7 @@ Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
|
||||
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
|
||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
|
||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
|
||||
Benno Lossin <lossin@kernel.org> <benno.lossin@proton.me>
|
||||
Bingwu Zhang <xtex@aosc.io> <xtexchooser@duck.com>
|
||||
Bingwu Zhang <xtex@aosc.io> <xtex@xtexx.eu.org>
|
||||
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
|
||||
@@ -154,6 +158,9 @@ Brian King <brking@us.ibm.com>
|
||||
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
||||
Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
|
||||
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
|
||||
Casey Connolly <casey.connolly@linaro.org> <caleb.connolly@linaro.org>
|
||||
Casey Connolly <casey.connolly@linaro.org> <caleb@connolly.tech>
|
||||
Casey Connolly <casey.connolly@linaro.org> <caleb@postmarketos.org>
|
||||
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
|
||||
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
|
||||
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com>
|
||||
@@ -312,6 +319,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
|
||||
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@opinsys.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
|
||||
@@ -414,6 +422,8 @@ Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
|
||||
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
|
||||
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
|
||||
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
|
||||
Krzysztof Wilczyński <kwilczynski@kernel.org> <krzysztof.wilczynski@linux.com>
|
||||
Krzysztof Wilczyński <kwilczynski@kernel.org> <kw@linux.com>
|
||||
Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org>
|
||||
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
|
||||
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
|
||||
@@ -456,6 +466,7 @@ Maheshwar Ajja <quic_majja@quicinc.com> <majja@codeaurora.org>
|
||||
Malathi Gottam <quic_mgottam@quicinc.com> <mgottam@codeaurora.org>
|
||||
Manikanta Pubbisetty <quic_mpubbise@quicinc.com> <mpubbise@codeaurora.org>
|
||||
Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
|
||||
Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
|
||||
Manoj Basapathi <quic_manojbm@quicinc.com> <manojbm@codeaurora.org>
|
||||
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
|
||||
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
|
||||
@@ -509,6 +520,7 @@ Mayuresh Janorkar <mayur@ti.com>
|
||||
Md Sadre Alam <quic_mdalam@quicinc.com> <mdalam@codeaurora.org>
|
||||
Miaoqing Pan <quic_miaoqing@quicinc.com> <miaoqing@codeaurora.org>
|
||||
Michael Buesch <m@bues.ch>
|
||||
Michael Riesch <michael.riesch@collabora.com> <michael.riesch@wolfvision.net>
|
||||
Michal Simek <michal.simek@amd.com> <michal.simek@xilinx.com>
|
||||
Michel Dänzer <michel@tungstengraphics.com>
|
||||
Michel Lespinasse <michel@lespinasse.org>
|
||||
@@ -543,6 +555,8 @@ Naveen N Rao <naveen@kernel.org> <naveen.n.rao@linux.vnet.ibm.com>
|
||||
Neeraj Upadhyay <neeraj.upadhyay@kernel.org> <quic_neeraju@quicinc.com>
|
||||
Neeraj Upadhyay <neeraj.upadhyay@kernel.org> <neeraju@codeaurora.org>
|
||||
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
|
||||
NeilBrown <neil@brown.name> <neilb@suse.de>
|
||||
NeilBrown <neil@brown.name> <neilb@cse.unsw.edu.au>
|
||||
Nguyen Anh Quynh <aquynh@gmail.com>
|
||||
Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
|
||||
Nicholas Piggin <npiggin@gmail.com> <npiggin@kernel.dk>
|
||||
@@ -588,6 +602,12 @@ Paul Mackerras <paulus@ozlabs.org> <paulus@samba.org>
|
||||
Paul Mackerras <paulus@ozlabs.org> <paulus@au1.ibm.com>
|
||||
Paul Moore <paul@paul-moore.com> <paul.moore@hp.com>
|
||||
Paul Moore <paul@paul-moore.com> <pmoore@redhat.com>
|
||||
Paulo Alcantara <pc@manguebit.org> <pcacjr@zytor.com>
|
||||
Paulo Alcantara <pc@manguebit.org> <paulo@paulo.ac>
|
||||
Paulo Alcantara <pc@manguebit.org> <pc@cjr.nz>
|
||||
Paulo Alcantara <pc@manguebit.org> <palcantara@suse.de>
|
||||
Paulo Alcantara <pc@manguebit.org> <palcantara@suse.com>
|
||||
Paulo Alcantara <pc@manguebit.org> <pc@manguebit.com>
|
||||
Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org>
|
||||
Peter A Jonsson <pj@ludd.ltu.se>
|
||||
Peter Oruba <peter.oruba@amd.com>
|
||||
@@ -628,6 +648,8 @@ Richard Genoud <richard.genoud@bootlin.com> <richard.genoud@gmail.com>
|
||||
Richard Leitner <richard.leitner@linux.dev> <dev@g0hl1n.net>
|
||||
Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net>
|
||||
Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com>
|
||||
Rob Clark <robin.clark@oss.qualcomm.com> <robdclark@chromium.org>
|
||||
Rob Clark <robin.clark@oss.qualcomm.com> <robdclark@gmail.com>
|
||||
Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org>
|
||||
Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org>
|
||||
Rodrigo Siqueira <siqueira@igalia.com> <rodrigosiqueiramelo@gmail.com>
|
||||
@@ -720,6 +742,7 @@ Sven Eckelmann <sven@narfation.org> <sven.eckelmann@gmx.de>
|
||||
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@open-mesh.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@openmesh.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven@open-mesh.com>
|
||||
Sven Peter <sven@kernel.org> <sven@svenpeter.dev>
|
||||
Takashi YOSHII <takashi.yoshii.zj@renesas.com>
|
||||
Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org>
|
||||
Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
|
||||
|
||||
2
.pylintrc
Normal file
2
.pylintrc
Normal file
@@ -0,0 +1,2 @@
|
||||
[MASTER]
|
||||
init-hook='import sys; sys.path += ["scripts/lib/kdoc", "scripts/lib/abi"]'
|
||||
2
CREDITS
2
CREDITS
@@ -2336,7 +2336,7 @@ D: Author of the dialog utility, foundation
|
||||
D: for Menuconfig's lxdialog.
|
||||
|
||||
N: Christoph Lameter
|
||||
E: christoph@lameter.com
|
||||
E: cl@gentwo.org
|
||||
D: Digiboard PC/Xe and PC/Xi, Digiboard EPCA
|
||||
D: NUMA support, Slab allocators, Page migration
|
||||
D: Scalability, Time subsystem
|
||||
|
||||
@@ -547,6 +547,21 @@ Description:
|
||||
[RO] Maximum size in bytes of a single element in a DMA
|
||||
scatter/gather list.
|
||||
|
||||
What: /sys/block/<disk>/queue/max_write_streams
|
||||
Date: November 2024
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Maximum number of write streams supported, 0 if not
|
||||
supported. If supported, valid values are 1 through
|
||||
max_write_streams, inclusive.
|
||||
|
||||
What: /sys/block/<disk>/queue/write_stream_granularity
|
||||
Date: November 2024
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] Granularity of a write stream in bytes. The granularity
|
||||
of a write stream is the size that should be discarded or
|
||||
overwritten together to avoid write amplification in the device.
|
||||
|
||||
What: /sys/block/<disk>/queue/max_segments
|
||||
Date: March 2010
|
||||
|
||||
@@ -26,7 +26,12 @@ Date: March 2006
|
||||
KernelVersion: 2.6.17
|
||||
Contact: Richard Purdie <rpurdie@rpsys.net>
|
||||
Description:
|
||||
Show the actual brightness by querying the hardware.
|
||||
Show the actual brightness by querying the hardware. Due
|
||||
to implementation differences in hardware this may not
|
||||
match the value in 'brightness'. For example some hardware
|
||||
may treat blanking differently or have custom power saving
|
||||
features. Userspace should generally use the values in
|
||||
'brightness' to make decisions.
|
||||
Users: HAL
|
||||
|
||||
What: /sys/class/backlight/<backlight>/max_brightness
|
||||
|
||||
@@ -715,3 +715,101 @@ Description: This file shows 1 in case the system reset happened due to the
|
||||
switch board.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/global_wp_request
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file when written 1 activates request to allow access to
|
||||
the write protected flashes. Such request can be performed only
|
||||
for system equipped with BMC (Board Management Controller),
|
||||
which can grant access to protected flashes. In case BMC allows
|
||||
access - it will respond with "global_wp_response". BMC decides
|
||||
regarding time window of granted access. After granted window is
|
||||
expired, BMC will change value back to 0.
|
||||
Default value is 0.
|
||||
|
||||
The file is read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/global_wp_response
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file, when set 1, indicates that access to protected
|
||||
flashes have been granted to host CPU by BMC.
|
||||
Default value is 0.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/shutdown_unlock
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: When ASICs are getting overheated, system protection
|
||||
hardware mechanism enforces system reboot. After system
|
||||
reboot ASICs come up in locked state. To unlock ASICs,
|
||||
this file should be written 1
|
||||
Default value is 0.
|
||||
|
||||
The file is read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/boot_progress
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files show the Data Process Unit board boot progress
|
||||
state. Valid states are:
|
||||
- 4 : OS starting.
|
||||
- 5 : OS running.
|
||||
- 6 : Low-Power Standby.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/dpu_id
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file shows hardware Id of Data Process Unit board.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_aux_pwr_or_reload
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_dpu_thermal
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_from_main_board
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files expose the cause of the most recent reset of the Data
|
||||
Processing Unit (DPU) board. The possible causes are:
|
||||
- Power auxiliary outage or power reload.
|
||||
- Thermal shutdown.
|
||||
- Reset request from the main board.
|
||||
Value 1 in file means this is reset cause, 0 - otherwise. Only one of
|
||||
the above causes could be 1 at the same time, representing only last
|
||||
reset cause.
|
||||
|
||||
The files are read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/perst_rst
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/phy_rst
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/tpm_rst
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/usbphy_rst
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files allow to reset hardware components of Data Process
|
||||
Unit board. Respectively PCI, Ethernet PHY, TPM and USB PHY
|
||||
resets.
|
||||
Default values for all the attributes is 1. Writing 0 will
|
||||
cause reset of the related component.
|
||||
|
||||
The files are read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/ufm_upgrade
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files show status of Unified Fabric Manager upgrade.
|
||||
state. 0 - means upgrade is done, 1 - otherwise.
|
||||
|
||||
The file is read only.
|
||||
|
||||
64
Documentation/ABI/testing/debugfs-alienware-wmi
Normal file
64
Documentation/ABI/testing/debugfs-alienware-wmi
Normal file
@@ -0,0 +1,64 @@
|
||||
What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/system_description
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: Kurt Borja <kuurtb@gmail.com>
|
||||
Description:
|
||||
This file exposes the raw ``system_description`` number reported
|
||||
by the WMAX device.
|
||||
|
||||
Only present on devices with the AWCC interface.
|
||||
|
||||
See Documentation/admin-guide/laptops/alienware-wmi.rst for
|
||||
details.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/hwmon_data
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: Kurt Borja <kuurtb@gmail.com>
|
||||
Description:
|
||||
This file exposes HWMON private data.
|
||||
|
||||
Includes fan sensor count, temperature sensor count, internal
|
||||
fan IDs and internal temp IDs.
|
||||
|
||||
See Documentation/admin-guide/laptops/alienware-wmi.rst for
|
||||
details.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/pprof_data
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: Kurt Borja <kuurtb@gmail.com>
|
||||
Description:
|
||||
This file exposes Platform Profile private data.
|
||||
|
||||
Includes internal mapping to platform profiles and thermal
|
||||
profile IDs.
|
||||
|
||||
See Documentation/admin-guide/laptops/alienware-wmi.rst for
|
||||
details.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/gpio_ctl/total_gpios
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Kurt Borja <kuurtb@gmail.com>
|
||||
Description:
|
||||
Total number of GPIO pins reported by the device.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/gpio_ctl/pinX
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Kurt Borja <kuurtb@gmail.com>
|
||||
Description:
|
||||
This file controls GPIO pin X status.
|
||||
|
||||
See Documentation/wmi/devices/alienware-wmi.rst for details.
|
||||
|
||||
RW
|
||||
70
Documentation/ABI/testing/debugfs-pcie-ptm
Normal file
70
Documentation/ABI/testing/debugfs-pcie-ptm
Normal file
@@ -0,0 +1,70 @@
|
||||
What: /sys/kernel/debug/pcie_ptm_*/local_clock
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RO) PTM local clock in nanoseconds. Applicable for both Root
|
||||
Complex and Endpoint controllers.
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/master_clock
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RO) PTM master clock in nanoseconds. Applicable only for
|
||||
Endpoint controllers.
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/t1
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RO) PTM T1 timestamp in nanoseconds. Applicable only for
|
||||
Endpoint controllers.
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/t2
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RO) PTM T2 timestamp in nanoseconds. Applicable only for
|
||||
Root Complex controllers.
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/t3
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RO) PTM T3 timestamp in nanoseconds. Applicable only for
|
||||
Root Complex controllers.
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/t4
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RO) PTM T4 timestamp in nanoseconds. Applicable only for
|
||||
Endpoint controllers.
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/context_update
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RW) Control the PTM context update mode. Applicable only for
|
||||
Endpoint controllers.
|
||||
|
||||
Following values are supported:
|
||||
|
||||
* auto = PTM context auto update trigger for every 10ms
|
||||
|
||||
* manual = PTM context manual update. Writing 'manual' to this
|
||||
file triggers PTM context update (default)
|
||||
|
||||
What: /sys/kernel/debug/pcie_ptm_*/context_valid
|
||||
Date: May 2025
|
||||
Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
Description:
|
||||
(RW) Control the PTM context validity (local clock timing).
|
||||
Applicable only for Root Complex controllers. PTM context is
|
||||
invalidated by hardware if the Root Complex enters low power
|
||||
mode or changes link frequency.
|
||||
|
||||
Following values are supported:
|
||||
|
||||
* 0 = PTM context invalid (default)
|
||||
|
||||
* 1 = PTM context valid
|
||||
@@ -31,6 +31,42 @@ Description: SCMI Raw asynchronous message injection/snooping facility; write
|
||||
(receiving an EOF at each message boundary).
|
||||
Users: Debugging, any userspace test suite
|
||||
|
||||
What: /sys/kernel/debug/scmi/<n>/raw/message_poll
|
||||
Date: June 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: cristian.marussi@arm.com
|
||||
Description: SCMI Raw message injection/snooping facility using polling mode;
|
||||
write a complete SCMI command message (header included) in
|
||||
little-endian binary format to have it sent to the configured
|
||||
backend SCMI server for instance <n>, using polling mode on
|
||||
the reception path. (if transport is polling capable)
|
||||
Any subsequently received response can be read from this same
|
||||
entry if it arrived within the configured timeout.
|
||||
Each write to the entry causes one command request to be built
|
||||
and sent while the replies are read back one message at time
|
||||
(receiving an EOF at each message boundary).
|
||||
Users: Debugging, any userspace test suite
|
||||
|
||||
What: /sys/kernel/debug/scmi/<n>/raw/message_poll_async
|
||||
Date: June 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: cristian.marussi@arm.com
|
||||
Description: SCMI Raw asynchronous message injection/snooping facility using
|
||||
polling-mode; write a complete SCMI asynchronous command message
|
||||
(header included) in little-endian binary format to have it sent
|
||||
to the configured backend SCMI server for instance <n>, using
|
||||
polling-mode on the reception path of the immediate part of the
|
||||
asynchronous command. (if transport is polling capable)
|
||||
Any subsequently received response can be read from this same
|
||||
entry if it arrived within the configured timeout.
|
||||
Any additional delayed response received afterwards can be read
|
||||
from this same entry too if it arrived within the configured
|
||||
timeout.
|
||||
Each write to the entry causes one command request to be built
|
||||
and sent while the replies are read back one message at time
|
||||
(receiving an EOF at each message boundary).
|
||||
Users: Debugging, any userspace test suite
|
||||
|
||||
What: /sys/kernel/debug/scmi/<n>/raw/errors
|
||||
Date: March 2023
|
||||
KernelVersion: 6.3
|
||||
@@ -115,3 +151,58 @@ Description: SCMI Raw asynchronous message injection/snooping facility; write
|
||||
exist only if the transport is configured to have more than
|
||||
one default channel.
|
||||
Users: Debugging, any userspace test suite
|
||||
|
||||
|
||||
What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_poll
|
||||
Date: June 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: cristian.marussi@arm.com
|
||||
Description: SCMI Raw message injection/snooping facility using polling mode;
|
||||
write a complete SCMI command message (header included) in
|
||||
little-endian binary format to have it sent to the configured
|
||||
backend SCMI server for instance <n> through the <m> transport
|
||||
channel, using polling mode on the reception path.
|
||||
(if transport is polling capable)
|
||||
Any subsequently received response can be read from this same
|
||||
entry if it arrived on channel <m> within the configured
|
||||
timeout.
|
||||
Each write to the entry causes one command request to be built
|
||||
and sent while the replies are read back one message at time
|
||||
(receiving an EOF at each message boundary).
|
||||
Channel identifier <m> matches the SCMI protocol number which
|
||||
has been associated with this transport channel in the DT
|
||||
description, with base protocol number 0x10 being the default
|
||||
channel for this instance.
|
||||
Note that these per-channel entries rooted at <..>/channels
|
||||
exist only if the transport is configured to have more than
|
||||
one default channel.
|
||||
Users: Debugging, any userspace test suite
|
||||
|
||||
What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_poll_async
|
||||
Date: June 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: cristian.marussi@arm.com
|
||||
Description: SCMI Raw asynchronous message injection/snooping facility using
|
||||
polling-mode; write a complete SCMI asynchronous command message
|
||||
(header included) in little-endian binary format to have it sent
|
||||
to the configured backend SCMI server for instance <n> through
|
||||
the <m> transport channel, using polling mode on the reception
|
||||
path of the immediate part of the asynchronous command.
|
||||
(if transport is polling capable)
|
||||
Any subsequently received response can be read from this same
|
||||
entry if it arrived on channel <m> within the configured
|
||||
timeout.
|
||||
Any additional delayed response received afterwards can be read
|
||||
from this same entry too if it arrived within the configured
|
||||
timeout.
|
||||
Each write to the entry causes one command request to be built
|
||||
and sent while the replies are read back one message at time
|
||||
(receiving an EOF at each message boundary).
|
||||
Channel identifier <m> matches the SCMI protocol number which
|
||||
has been associated with this transport channel in the DT
|
||||
description, with base protocol number 0x10 being the default
|
||||
channel for this instance.
|
||||
Note that these per-channel entries rooted at <..>/channels
|
||||
exist only if the transport is configured to have more than
|
||||
one default channel.
|
||||
Users: Debugging, any userspace test suite
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
What: /sys/kernel/debug/turris-mox-rwtm/do_sign
|
||||
Date: Jun 2020
|
||||
KernelVersion: 5.8
|
||||
Contact: Marek Behún <kabel@kernel.org>
|
||||
Description:
|
||||
|
||||
======= ===========================================================
|
||||
(Write) Message to sign with the ECDSA private key stored in
|
||||
device's OTP. The message must be exactly 64 bytes
|
||||
(since this is intended for SHA-512 hashes).
|
||||
(Read) The resulting signature, 136 bytes. This contains the
|
||||
R and S values of the ECDSA signature, both in
|
||||
big-endian format.
|
||||
======= ===========================================================
|
||||
@@ -242,7 +242,7 @@ Description:
|
||||
decoding a Host Physical Address range. Note that this number
|
||||
may be elevated without any regionX objects active or even
|
||||
enumerated, as this may be due to decoders established by
|
||||
platform firwmare or a previous kernel (kexec).
|
||||
platform firmware or a previous kernel (kexec).
|
||||
|
||||
|
||||
What: /sys/bus/cxl/devices/decoderX.Y
|
||||
@@ -572,7 +572,7 @@ Description:
|
||||
|
||||
|
||||
What: /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth
|
||||
/sys/bus/cxl/devices/regionZ/accessY/write_banwidth
|
||||
/sys/bus/cxl/devices/regionZ/accessY/write_bandwidth
|
||||
Date: Jan, 2024
|
||||
KernelVersion: v6.9
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
|
||||
@@ -94,6 +94,7 @@ Description:
|
||||
What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/buffer/sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/events/sampling_frequency
|
||||
What: /sys/bus/iio/devices/triggerX/sampling_frequency
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
@@ -740,7 +741,9 @@ Description:
|
||||
1kohm_to_gnd: connected to ground via an 1kOhm resistor,
|
||||
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
|
||||
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
|
||||
7.7kohm_to_gnd: connected to ground via a 7.7kOhm resistor,
|
||||
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
|
||||
32kohm_to_gnd: connected to ground via a 32kOhm resistor,
|
||||
42kohm_to_gnd: connected to ground via a 42kOhm resistor,
|
||||
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
|
||||
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
|
||||
|
||||
@@ -117,3 +117,47 @@ Date: July 2018
|
||||
KernelVersion: 4.19.0
|
||||
Contact: linux-pci@vger.kernel.org, rajatja@google.com
|
||||
Description: Total number of ERR_NONFATAL messages reported to rootport.
|
||||
|
||||
PCIe AER ratelimits
|
||||
-------------------
|
||||
|
||||
These attributes show up under all the devices that are AER capable.
|
||||
They represent configurable ratelimits of logs per error type.
|
||||
|
||||
See Documentation/PCI/pcieaer-howto.rst for more info on ratelimits.
|
||||
|
||||
What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_interval_ms
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16.0
|
||||
Contact: linux-pci@vger.kernel.org
|
||||
Description: Writing 0 disables AER correctable error log ratelimiting.
|
||||
Writing a positive value sets the ratelimit interval in ms.
|
||||
Default is DEFAULT_RATELIMIT_INTERVAL (5000 ms).
|
||||
|
||||
What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_burst
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16.0
|
||||
Contact: linux-pci@vger.kernel.org
|
||||
Description: Ratelimit burst for correctable error logs. Writing a value
|
||||
changes the number of errors (burst) allowed per interval
|
||||
before ratelimiting. Reading gets the current ratelimit
|
||||
burst. Default is DEFAULT_RATELIMIT_BURST (10).
|
||||
|
||||
What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_interval_ms
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16.0
|
||||
Contact: linux-pci@vger.kernel.org
|
||||
Description: Writing 0 disables AER non-fatal uncorrectable error log
|
||||
ratelimiting. Writing a positive value sets the ratelimit
|
||||
interval in ms. Default is DEFAULT_RATELIMIT_INTERVAL
|
||||
(5000 ms).
|
||||
|
||||
What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_burst
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16.0
|
||||
Contact: linux-pci@vger.kernel.org
|
||||
Description: Ratelimit burst for non-fatal uncorrectable error logs.
|
||||
Writing a value changes the number of errors (burst)
|
||||
allowed per interval before ratelimiting. Reading gets the
|
||||
current ratelimit burst. Default is DEFAULT_RATELIMIT_BURST
|
||||
(10).
|
||||
@@ -76,6 +76,6 @@ Date: May 2017
|
||||
Contact: Darren Hart (VMware) <dvhart@infradead.org>
|
||||
Description:
|
||||
This file contains a boolean flags signaling the data block
|
||||
aassociated with the given WMI device is writable. If the
|
||||
associated with the given WMI device is writable. If the
|
||||
given WMI device is not associated with a data block, then
|
||||
this file will not exist.
|
||||
|
||||
@@ -72,6 +72,12 @@ Description:
|
||||
/sys/class/leds/<led> once a given trigger is selected. For
|
||||
their documentation see `sysfs-class-led-trigger-*`.
|
||||
|
||||
Writing "none" removes the trigger for this LED.
|
||||
|
||||
Writing "default" sets the trigger to the LED's default trigger
|
||||
(which would often be configured in the device tree for the
|
||||
hardware).
|
||||
|
||||
What: /sys/class/leds/<led>/inverted
|
||||
Date: January 2011
|
||||
KernelVersion: 2.6.38
|
||||
|
||||
@@ -456,7 +456,7 @@ Description:
|
||||
"Over voltage", "Under voltage", "Unspecified failure", "Cold",
|
||||
"Watchdog timer expire", "Safety timer expire",
|
||||
"Over current", "Calibration required", "Warm",
|
||||
"Cool", "Hot", "No battery"
|
||||
"Cool", "Hot", "No battery", "Blown fuse", "Cell imbalance"
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/precharge_current
|
||||
Date: June 2017
|
||||
@@ -508,11 +508,12 @@ Description:
|
||||
Access: Read, Write
|
||||
|
||||
Valid values:
|
||||
================ ====================================
|
||||
auto: Charge normally, respect thresholds
|
||||
inhibit-charge: Do not charge while AC is attached
|
||||
force-discharge: Force discharge while AC is attached
|
||||
================ ====================================
|
||||
===================== ========================================
|
||||
auto: Charge normally, respect thresholds
|
||||
inhibit-charge: Do not charge while AC is attached
|
||||
inhibit-charge-awake: inhibit-charge only when device is awake
|
||||
force-discharge: Force discharge while AC is attached
|
||||
===================== ========================================
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/technology
|
||||
Date: May 2007
|
||||
@@ -822,3 +823,46 @@ Description:
|
||||
Each entry is a link to the device which registered the extension.
|
||||
|
||||
Access: Read
|
||||
|
||||
What: /sys/class/power_supply/max8971-charger/fast_charge_timer
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15.0
|
||||
Contact: Svyatoslav Ryhel <clamor95@gmail.com>
|
||||
Description:
|
||||
This entry shows and sets the maximum time the max8971
|
||||
charger operates in fast-charge mode. When the timer expires
|
||||
the device will terminate fast-charge mode (charging current
|
||||
will drop to 0 A) and will trigger interrupt.
|
||||
|
||||
Valid values:
|
||||
|
||||
- 4 - 10 (hours), step by 1
|
||||
- 0: disabled.
|
||||
|
||||
What: /sys/class/power_supply/max8971-charger/top_off_threshold_current
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15.0
|
||||
Contact: Svyatoslav Ryhel <clamor95@gmail.com>
|
||||
Description:
|
||||
This entry shows and sets the charging current threshold for
|
||||
entering top-off charging mode. When charging current in fast
|
||||
charge mode drops below this value, the charger will trigger
|
||||
interrupt and start top-off charging mode.
|
||||
|
||||
Valid values:
|
||||
|
||||
- 50000 - 200000 (microamps), step by 50000 (rounded down)
|
||||
|
||||
What: /sys/class/power_supply/max8971-charger/top_off_timer
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15.0
|
||||
Contact: Svyatoslav Ryhel <clamor95@gmail.com>
|
||||
Description:
|
||||
This entry shows and sets the maximum time the max8971
|
||||
charger operates in top-off charge mode. When the timer expires
|
||||
the device will terminate top-off charge mode (charging current
|
||||
will drop to 0 A) and will trigger interrupt.
|
||||
|
||||
Valid values:
|
||||
|
||||
- 0 - 70 (minutes), step by 10 (rounded down)
|
||||
|
||||
27
Documentation/ABI/testing/sysfs-class-power-gaokun
Normal file
27
Documentation/ABI/testing/sysfs-class-power-gaokun
Normal file
@@ -0,0 +1,27 @@
|
||||
What: /sys/class/power_supply/gaokun-ec-battery/smart_charge_delay
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: Pengyu Luo <mitltlatltl@gmail.com>
|
||||
Description:
|
||||
This entry allows configuration of smart charging delay.
|
||||
|
||||
Smart charging behavior: when the power adapter is connected
|
||||
for delay hours, battery charging will follow the rules of
|
||||
charge_control_start_threshold and charge_control_end_threshold.
|
||||
For more information about charge control, please refer to
|
||||
sysfs-class-power.
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
Valid values: In hours (non-negative)
|
||||
|
||||
What: /sys/class/power_supply/gaokun-ec-battery/battery_adaptive_charge
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: Pengyu Luo <mitltlatltl@gmail.com>
|
||||
Description:
|
||||
This entry allows enabling battery adaptive charging.
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
Valid values: 0 (disabled) or 1 (enabled)
|
||||
@@ -111,6 +111,7 @@ What: /sys/devices/system/cpu/cpuidle/available_governors
|
||||
/sys/devices/system/cpu/cpuidle/current_driver
|
||||
/sys/devices/system/cpu/cpuidle/current_governor
|
||||
/sys/devices/system/cpu/cpuidle/current_governer_ro
|
||||
/sys/devices/system/cpu/cpuidle/intel_c1_demotion
|
||||
Date: September 2007
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: Discover cpuidle policy and mechanism
|
||||
@@ -132,7 +133,11 @@ Description: Discover cpuidle policy and mechanism
|
||||
|
||||
current_governor_ro: (RO) displays current idle policy.
|
||||
|
||||
See Documentation/admin-guide/pm/cpuidle.rst and
|
||||
intel_c1_demotion: (RW) enables/disables the C1 demotion
|
||||
feature on Intel CPUs.
|
||||
|
||||
See Documentation/admin-guide/pm/cpuidle.rst,
|
||||
Documentation/admin-guide/pm/intel_idle.rst, and
|
||||
Documentation/driver-api/pm/cpuidle.rst for more information.
|
||||
|
||||
|
||||
@@ -268,6 +273,60 @@ Description: Discover CPUs in the same CPU frequency coordination domain
|
||||
This file is only present if the acpi-cpufreq or the cppc-cpufreq
|
||||
drivers are in use.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/cpufreq/auto_select
|
||||
Date: May 2025
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description: Autonomous selection enable
|
||||
|
||||
Read/write interface to control autonomous selection enable
|
||||
Read returns autonomous selection status:
|
||||
0: autonomous selection is disabled
|
||||
1: autonomous selection is enabled
|
||||
|
||||
Write 'y' or '1' or 'on' to enable autonomous selection.
|
||||
Write 'n' or '0' or 'off' to disable autonomous selection.
|
||||
|
||||
This file is only present if the cppc-cpufreq driver is in use.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/cpufreq/auto_act_window
|
||||
Date: May 2025
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description: Autonomous activity window
|
||||
|
||||
This file indicates a moving utilization sensitivity window to
|
||||
the platform's autonomous selection policy.
|
||||
|
||||
Read/write an integer represents autonomous activity window (in
|
||||
microseconds) from/to this file. The max value to write is
|
||||
1270000000 but the max significand is 127. This means that if 128
|
||||
is written to this file, 127 will be stored. If the value is
|
||||
greater than 130, only the first two digits will be saved as
|
||||
significand.
|
||||
|
||||
Writing a zero value to this file enable the platform to
|
||||
determine an appropriate Activity Window depending on the workload.
|
||||
|
||||
Writing to this file only has meaning when Autonomous Selection is
|
||||
enabled.
|
||||
|
||||
This file is only present if the cppc-cpufreq driver is in use.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/cpufreq/energy_performance_preference_val
|
||||
Date: May 2025
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description: Energy performance preference
|
||||
|
||||
Read/write an 8-bit integer from/to this file. This file
|
||||
represents a range of values from 0 (performance preference) to
|
||||
0xFF (energy efficiency preference) that influences the rate of
|
||||
performance increase/decrease and the result of the hardware's
|
||||
energy efficiency and performance optimization policies.
|
||||
|
||||
Writing to this file only has meaning when Autonomous Selection is
|
||||
enabled.
|
||||
|
||||
This file is only present if the cppc-cpufreq driver is in use.
|
||||
|
||||
|
||||
What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
|
||||
Date: August 2008
|
||||
@@ -485,6 +544,7 @@ What: /sys/devices/system/cpu/cpuX/regs/
|
||||
/sys/devices/system/cpu/cpuX/regs/identification/
|
||||
/sys/devices/system/cpu/cpuX/regs/identification/midr_el1
|
||||
/sys/devices/system/cpu/cpuX/regs/identification/revidr_el1
|
||||
/sys/devices/system/cpu/cpuX/regs/identification/aidr_el1
|
||||
/sys/devices/system/cpu/cpuX/regs/identification/smidr_el1
|
||||
Date: June 2016
|
||||
Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
|
||||
@@ -511,11 +571,13 @@ Description: information about CPUs heterogeneity.
|
||||
|
||||
What: /sys/devices/system/cpu/vulnerabilities
|
||||
/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
|
||||
/sys/devices/system/cpu/vulnerabilities/indirect_target_selection
|
||||
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
||||
/sys/devices/system/cpu/vulnerabilities/l1tf
|
||||
/sys/devices/system/cpu/vulnerabilities/mds
|
||||
/sys/devices/system/cpu/vulnerabilities/meltdown
|
||||
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
|
||||
/sys/devices/system/cpu/vulnerabilities/old_microcode
|
||||
/sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
|
||||
/sys/devices/system/cpu/vulnerabilities/retbleed
|
||||
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
What: /sys/devices/virtual/misc/tdx_guest/measurements/MRNAME[:HASH]
|
||||
Date: April, 2025
|
||||
KernelVersion: v6.16
|
||||
Contact: linux-coco@lists.linux.dev
|
||||
Description:
|
||||
Value of a TDX measurement register (MR). MRNAME and HASH above
|
||||
are placeholders. The optional suffix :HASH is used for MRs
|
||||
that have associated hash algorithms. See below for a complete
|
||||
list of TDX MRs exposed via sysfs. Refer to Intel TDX Module
|
||||
ABI Specification for the definition of TDREPORT and the full
|
||||
list of TDX measurements.
|
||||
|
||||
Intel TDX Module ABI Specification can be found at:
|
||||
https://www.intel.com/content/www/us/en/developer/tools/trust-domain-extensions/documentation.html#architecture
|
||||
|
||||
See also:
|
||||
https://docs.kernel.org/driver-api/coco/measurement-registers.html
|
||||
|
||||
What: /sys/devices/virtual/misc/tdx_guest/measurements/mrconfigid
|
||||
Date: April, 2025
|
||||
KernelVersion: v6.16
|
||||
Contact: linux-coco@lists.linux.dev
|
||||
Description:
|
||||
(RO) MRCONFIGID - 48-byte immutable storage typically used for
|
||||
software-defined ID for non-owner-defined configuration of the
|
||||
guest TD – e.g., run-time or OS configuration.
|
||||
|
||||
What: /sys/devices/virtual/misc/tdx_guest/measurements/mrowner
|
||||
Date: April, 2025
|
||||
KernelVersion: v6.16
|
||||
Contact: linux-coco@lists.linux.dev
|
||||
Description:
|
||||
(RO) MROWNER - 48-byte immutable storage typically used for
|
||||
software-defined ID for the guest TD’s owner.
|
||||
|
||||
What: /sys/devices/virtual/misc/tdx_guest/measurements/mrownerconfig
|
||||
Date: April, 2025
|
||||
KernelVersion: v6.16
|
||||
Contact: linux-coco@lists.linux.dev
|
||||
Description:
|
||||
(RO) MROWNERCONFIG - 48-byte immutable storage typically used
|
||||
for software-defined ID for owner-defined configuration of the
|
||||
guest TD – e.g., specific to the workload rather than the
|
||||
run-time or OS.
|
||||
|
||||
What: /sys/devices/virtual/misc/tdx_guest/measurements/mrtd:sha384
|
||||
Date: April, 2025
|
||||
KernelVersion: v6.16
|
||||
Contact: linux-coco@lists.linux.dev
|
||||
Description:
|
||||
(RO) MRTD - Measurement of the initial contents of the TD.
|
||||
|
||||
What: /sys/devices/virtual/misc/tdx_guest/measurements/rtmr[0123]:sha384
|
||||
Date: April, 2025
|
||||
KernelVersion: v6.16
|
||||
Contact: linux-coco@lists.linux.dev
|
||||
Description:
|
||||
(RW) RTMR[0123] - 4 Run-Time extendable Measurement Registers.
|
||||
Read from any of these returns the current value of the
|
||||
corresponding RTMR. Write extends the written buffer to the
|
||||
RTMR. All writes must start at offset 0 and be 48 bytes in
|
||||
size. Partial writes will result in EINVAL returned by the
|
||||
write() syscall.
|
||||
@@ -1,6 +1,6 @@
|
||||
What: /sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
|
||||
Date: September, 2023
|
||||
KernelVersion: 6.5
|
||||
Date: March, 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: linux-input@vger.kernel.org
|
||||
Description:
|
||||
The set of keys displayed on the Touch Bar.
|
||||
|
||||
@@ -17,7 +17,7 @@ Description: Read only. Returns the firmware version of Intel MAX10
|
||||
What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_address
|
||||
Date: January 2021
|
||||
KernelVersion: 5.12
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns the first MAC address in a block
|
||||
of sequential MAC addresses assigned to the board
|
||||
that is managed by the Intel MAX10 BMC. It is stored in
|
||||
@@ -28,7 +28,7 @@ Description: Read only. Returns the first MAC address in a block
|
||||
What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_count
|
||||
Date: January 2021
|
||||
KernelVersion: 5.12
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns the number of sequential MAC
|
||||
addresses assigned to the board managed by the Intel
|
||||
MAX10 BMC. This value is stored in FLASH and is mirrored
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_root_entry_hash
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns the root entry hash for the static
|
||||
region if one is programmed, else it returns the
|
||||
string: "hash not programmed". This file is only
|
||||
@@ -11,7 +11,7 @@ Description: Read only. Returns the root entry hash for the static
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_root_entry_hash
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns the root entry hash for the partial
|
||||
reconfiguration region if one is programmed, else it
|
||||
returns the string: "hash not programmed". This file
|
||||
@@ -21,7 +21,7 @@ Description: Read only. Returns the root entry hash for the partial
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_root_entry_hash
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns the root entry hash for the BMC image
|
||||
if one is programmed, else it returns the string:
|
||||
"hash not programmed". This file is only visible if the
|
||||
@@ -31,7 +31,7 @@ Description: Read only. Returns the root entry hash for the BMC image
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_canceled_csks
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns a list of indices for canceled code
|
||||
signing keys for the static region. The standard bitmap
|
||||
list format is used (e.g. "1,2-6,9").
|
||||
@@ -39,7 +39,7 @@ Description: Read only. Returns a list of indices for canceled code
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_canceled_csks
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns a list of indices for canceled code
|
||||
signing keys for the partial reconfiguration region. The
|
||||
standard bitmap list format is used (e.g. "1,2-6,9").
|
||||
@@ -47,7 +47,7 @@ Description: Read only. Returns a list of indices for canceled code
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_canceled_csks
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns a list of indices for canceled code
|
||||
signing keys for the BMC. The standard bitmap list format
|
||||
is used (e.g. "1,2-6,9").
|
||||
@@ -55,7 +55,7 @@ Description: Read only. Returns a list of indices for canceled code
|
||||
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/flash_count
|
||||
Date: Sep 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: Peter Colberg <peter.colberg@altera.com>
|
||||
Contact: Matthew Gerlach <matthew.gerlach@altera.com>
|
||||
Description: Read only. Returns number of times the secure update
|
||||
staging area has been flashed.
|
||||
Format: "%u".
|
||||
|
||||
@@ -60,26 +60,26 @@ Description: RO. Package default power limit (default TDP setting).
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_crit
|
||||
Date: February 2024
|
||||
KernelVersion: 6.8
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_crit
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RW. Package reactive critical (I1) power limit in microwatts.
|
||||
Description: RW. Card reactive critical (I1) power limit in microwatts.
|
||||
|
||||
Package reactive critical (I1) power limit in microwatts is exposed
|
||||
Card reactive critical (I1) power limit in microwatts is exposed
|
||||
for client products. The power controller will throttle the
|
||||
operating frequency if the power averaged over a window exceeds
|
||||
this limit.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr2_crit
|
||||
Date: February 2024
|
||||
KernelVersion: 6.8
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr1_crit
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RW. Package reactive critical (I1) power limit in milliamperes.
|
||||
Description: RW. Card reactive critical (I1) power limit in milliamperes.
|
||||
|
||||
Package reactive critical (I1) power limit in milliamperes is
|
||||
Card reactive critical (I1) power limit in milliamperes is
|
||||
exposed for server products. The power controller will throttle
|
||||
the operating frequency if the power averaged over a window
|
||||
exceeds this limit.
|
||||
@@ -124,3 +124,27 @@ Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RO. VRAM temperature in millidegree Celsius.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
|
||||
Date: March 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RO. Fan 1 speed in RPM.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
|
||||
Date: March 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RO. Fan 2 speed in RPM.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
|
||||
Date: March 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RO. Fan 3 speed in RPM.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
@@ -4,7 +4,7 @@ KernelVersion: 6.7
|
||||
Contact: qat-linux@intel.com
|
||||
Description: (RO) Reports the number of correctable errors detected by the device.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
|
||||
Date: January 2024
|
||||
@@ -12,7 +12,7 @@ KernelVersion: 6.7
|
||||
Contact: qat-linux@intel.com
|
||||
Description: (RO) Reports the number of non fatal errors detected by the device.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
|
||||
Date: January 2024
|
||||
@@ -20,7 +20,7 @@ KernelVersion: 6.7
|
||||
Contact: qat-linux@intel.com
|
||||
Description: (RO) Reports the number of fatal errors detected by the device.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
|
||||
Date: January 2024
|
||||
@@ -38,4 +38,4 @@ Description: (WO) Write to resets all error counters of a device.
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
|
||||
0
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
@@ -1636,3 +1636,52 @@ Description:
|
||||
attribute value.
|
||||
|
||||
The attribute is read only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/wb_resize_enable
|
||||
What: /sys/bus/platform/devices/*.ufs/wb_resize_enable
|
||||
Date: April 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The host can enable the WriteBooster buffer resize by setting this
|
||||
attribute.
|
||||
|
||||
======== ======================================
|
||||
idle There is no resize operation
|
||||
decrease Decrease WriteBooster buffer size
|
||||
increase Increase WriteBooster buffer size
|
||||
======== ======================================
|
||||
|
||||
The file is write only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_resize_hint
|
||||
What: /sys/bus/platform/devices/*.ufs/attributes/wb_resize_hint
|
||||
Date: April 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
wb_resize_hint indicates hint information about which type of resize
|
||||
for WriteBooster buffer is recommended by the device.
|
||||
|
||||
========= ======================================
|
||||
keep Recommend keep the buffer size
|
||||
decrease Recommend to decrease the buffer size
|
||||
increase Recommend to increase the buffer size
|
||||
========= ======================================
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_resize_status
|
||||
What: /sys/bus/platform/devices/*.ufs/attributes/wb_resize_status
|
||||
Date: April 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The host can check the resize operation status of the WriteBooster
|
||||
buffer by reading this attribute.
|
||||
|
||||
================ ========================================
|
||||
idle Resize operation is not issued
|
||||
in_progress Resize operation in progress
|
||||
complete_success Resize operation completed successfully
|
||||
general_failure Resize operation general failure
|
||||
================ ========================================
|
||||
|
||||
The file is read only.
|
||||
|
||||
@@ -248,3 +248,24 @@ Description:
|
||||
# cat ff_pwr_btn
|
||||
7 enabled
|
||||
|
||||
What: /sys/firmware/acpi/memory_ranges/rangeX
|
||||
Date: February 2025
|
||||
Contact: Tony Luck <tony.luck@intel.com>
|
||||
Description:
|
||||
On systems with the ACPI MRRM table reports the parameters for
|
||||
each range.
|
||||
|
||||
base: Starting system physical address.
|
||||
|
||||
length: Length of this range in bytes.
|
||||
|
||||
node: NUMA node that this range belongs to. Negative numbers
|
||||
indicate that the node number could not be determined (e.g
|
||||
for an address range that is reserved for future hot add of
|
||||
memory).
|
||||
|
||||
local_region_id: ID associated with access by agents
|
||||
local to this range of addresses.
|
||||
|
||||
remote_region_id: ID associated with access by agents
|
||||
non-local to this range of addresses.
|
||||
|
||||
@@ -12,15 +12,6 @@ Contact: Marek Behún <kabel@kernel.org>
|
||||
Description: (Read) MAC addresses burned into eFuses of this Turris Mox board.
|
||||
Format: %pM
|
||||
|
||||
What: /sys/firmware/turris-mox-rwtm/pubkey
|
||||
Date: August 2019
|
||||
KernelVersion: 5.4
|
||||
Contact: Marek Behún <kabel@kernel.org>
|
||||
Description: (Read) ECDSA public key (in pubkey hex compressed form) computed
|
||||
as pair to the ECDSA private key burned into eFuses of this
|
||||
Turris Mox Board.
|
||||
Format: string
|
||||
|
||||
What: /sys/firmware/turris-mox-rwtm/ram_size
|
||||
Date: August 2019
|
||||
KernelVersion: 5.4
|
||||
|
||||
@@ -27,3 +27,11 @@ Description: Writing to this will drop compression-related caches,
|
||||
- 1 : invalidate cached compressed folios
|
||||
- 2 : drop in-memory pclusters
|
||||
- 3 : drop in-memory pclusters and cached compressed folios
|
||||
|
||||
What: /sys/fs/erofs/accel
|
||||
Date: May 2025
|
||||
Contact: "Bo Liu" <liubo03@inspur.com>
|
||||
Description: Used to set or show hardware accelerators in effect
|
||||
and multiple accelerators are separated by '\n'.
|
||||
Supported accelerator(s): qat_deflate.
|
||||
Disable all accelerators with an empty string (echo > accel).
|
||||
|
||||
@@ -270,7 +270,7 @@ Description: Shows all enabled kernel features.
|
||||
inode_checksum, flexible_inline_xattr, quota_ino,
|
||||
inode_crtime, lost_found, verity, sb_checksum,
|
||||
casefold, readonly, compression, test_dummy_encryption_v2,
|
||||
atomic_write, pin_file, encrypted_casefold.
|
||||
atomic_write, pin_file, encrypted_casefold, linear_lookup.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/inject_rate
|
||||
Date: May 2016
|
||||
@@ -710,32 +710,34 @@ Description: Support configuring fault injection type, should be
|
||||
enabled with fault_injection option, fault type value
|
||||
is shown below, it supports single or combined type.
|
||||
|
||||
=========================== ===========
|
||||
=========================== ==========
|
||||
Type_Name Type_Value
|
||||
=========================== ===========
|
||||
FAULT_KMALLOC 0x000000001
|
||||
FAULT_KVMALLOC 0x000000002
|
||||
FAULT_PAGE_ALLOC 0x000000004
|
||||
FAULT_PAGE_GET 0x000000008
|
||||
FAULT_ALLOC_BIO 0x000000010 (obsolete)
|
||||
FAULT_ALLOC_NID 0x000000020
|
||||
FAULT_ORPHAN 0x000000040
|
||||
FAULT_BLOCK 0x000000080
|
||||
FAULT_DIR_DEPTH 0x000000100
|
||||
FAULT_EVICT_INODE 0x000000200
|
||||
FAULT_TRUNCATE 0x000000400
|
||||
FAULT_READ_IO 0x000000800
|
||||
FAULT_CHECKPOINT 0x000001000
|
||||
FAULT_DISCARD 0x000002000
|
||||
FAULT_WRITE_IO 0x000004000
|
||||
FAULT_SLAB_ALLOC 0x000008000
|
||||
FAULT_DQUOT_INIT 0x000010000
|
||||
FAULT_LOCK_OP 0x000020000
|
||||
FAULT_BLKADDR_VALIDITY 0x000040000
|
||||
FAULT_BLKADDR_CONSISTENCE 0x000080000
|
||||
FAULT_NO_SEGMENT 0x000100000
|
||||
FAULT_INCONSISTENT_FOOTER 0x000200000
|
||||
=========================== ===========
|
||||
=========================== ==========
|
||||
FAULT_KMALLOC 0x00000001
|
||||
FAULT_KVMALLOC 0x00000002
|
||||
FAULT_PAGE_ALLOC 0x00000004
|
||||
FAULT_PAGE_GET 0x00000008
|
||||
FAULT_ALLOC_BIO 0x00000010 (obsolete)
|
||||
FAULT_ALLOC_NID 0x00000020
|
||||
FAULT_ORPHAN 0x00000040
|
||||
FAULT_BLOCK 0x00000080
|
||||
FAULT_DIR_DEPTH 0x00000100
|
||||
FAULT_EVICT_INODE 0x00000200
|
||||
FAULT_TRUNCATE 0x00000400
|
||||
FAULT_READ_IO 0x00000800
|
||||
FAULT_CHECKPOINT 0x00001000
|
||||
FAULT_DISCARD 0x00002000
|
||||
FAULT_WRITE_IO 0x00004000
|
||||
FAULT_SLAB_ALLOC 0x00008000
|
||||
FAULT_DQUOT_INIT 0x00010000
|
||||
FAULT_LOCK_OP 0x00020000
|
||||
FAULT_BLKADDR_VALIDITY 0x00040000
|
||||
FAULT_BLKADDR_CONSISTENCE 0x00080000
|
||||
FAULT_NO_SEGMENT 0x00100000
|
||||
FAULT_INCONSISTENT_FOOTER 0x00200000
|
||||
FAULT_TIMEOUT 0x00400000 (1000ms)
|
||||
FAULT_VMALLOC 0x00800000
|
||||
=========================== ==========
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
|
||||
Date: January 2023
|
||||
@@ -846,3 +848,16 @@ Description: For several zoned storage devices, vendors will provide extra space
|
||||
reserved_blocks. However, it is not enough, since this extra space should
|
||||
not be shown to users. So, with this new sysfs node, we can hide the space
|
||||
by substracting reserved_blocks from total bytes.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/encoding_flags
|
||||
Date: April 2025
|
||||
Contact: "Chao Yu" <chao@kernel.org>
|
||||
Description: This is a read-only entry to show the value of sb.s_encoding_flags, the
|
||||
value is hexadecimal.
|
||||
|
||||
============================ ==========
|
||||
Flag_Name Flag_Value
|
||||
============================ ==========
|
||||
SB_ENC_STRICT_MODE_FL 0x00000001
|
||||
SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002
|
||||
============================ ==========
|
||||
|
||||
7
Documentation/ABI/testing/sysfs-kernel-hardlockup_count
Normal file
7
Documentation/ABI/testing/sysfs-kernel-hardlockup_count
Normal file
@@ -0,0 +1,7 @@
|
||||
What: /sys/kernel/hardlockup_count
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
Shows how many times the system has detected a hard lockup since last boot.
|
||||
Available only if CONFIG_HARDLOCKUP_DETECTOR is enabled.
|
||||
@@ -283,6 +283,12 @@ Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the current
|
||||
value of the goal metric.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/nid
|
||||
Date: Apr 2025
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the nid
|
||||
parameter of the goal.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
|
||||
@@ -20,6 +20,35 @@ Description: Weight configuration interface for nodeN
|
||||
Minimum weight: 1
|
||||
Maximum weight: 255
|
||||
|
||||
Writing an empty string or `0` will reset the weight to the
|
||||
system default. The system default may be set by the kernel
|
||||
or drivers at boot or during hotplug events.
|
||||
Writing invalid values (i.e. any values not in [1,255],
|
||||
empty string, ...) will return -EINVAL.
|
||||
|
||||
Changing the weight to a valid value will automatically
|
||||
switch the system to manual mode as well.
|
||||
|
||||
What: /sys/kernel/mm/mempolicy/weighted_interleave/auto
|
||||
Date: May 2025
|
||||
Contact: Linux memory management mailing list <linux-mm@kvack.org>
|
||||
Description: Auto-weighting configuration interface
|
||||
|
||||
Configuration mode for weighted interleave. 'true' indicates
|
||||
that the system is in auto mode, and a 'false' indicates that
|
||||
the system is in manual mode.
|
||||
|
||||
In auto mode, all node weights are re-calculated and overwritten
|
||||
(visible via the nodeN interfaces) whenever new bandwidth data
|
||||
is made available during either boot or hotplug events.
|
||||
|
||||
In manual mode, node weights can only be updated by the user.
|
||||
Note that nodes that are onlined with previously set weights
|
||||
will reuse those weights. If they were not previously set or
|
||||
are onlined with missing bandwidth data, the weights will use
|
||||
a default weight of 1.
|
||||
|
||||
Writing any true value string (e.g. Y or 1) will enable auto
|
||||
mode, while writing any false value string (e.g. N or 0) will
|
||||
enable manual mode. All other strings are ignored and will
|
||||
return -EINVAL.
|
||||
|
||||
Writing a new weight to a node directly via the nodeN interface
|
||||
will also automatically switch the system to manual mode.
|
||||
|
||||
@@ -16,9 +16,13 @@ Description: Enable/disable demoting pages during reclaim
|
||||
Allowing page migration during reclaim enables these
|
||||
systems to migrate pages from fast tiers to slow tiers
|
||||
when the fast tier is under pressure. This migration
|
||||
is performed before swap. It may move data to a NUMA
|
||||
node that does not fall into the cpuset of the
|
||||
allocating process which might be construed to violate
|
||||
the guarantees of cpusets. This should not be enabled
|
||||
on systems which need strict cpuset location
|
||||
guarantees.
|
||||
is performed before swap if an eligible numa node is
|
||||
present in cpuset.mems for the cgroup (or if cpuset v1
|
||||
is being used). If cpusets.mems changes at runtime, it
|
||||
may move data to a NUMA node that does not fall into the
|
||||
cpuset of the new cpusets.mems, which might be construed
|
||||
to violate the guarantees of cpusets. Shared memory,
|
||||
such as libraries, owned by another cgroup may still be
|
||||
demoted and result in memory use on a node not present
|
||||
in cpusets.mem. This should not be enabled on systems
|
||||
which need strict cpuset location guarantees.
|
||||
|
||||
6
Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
Normal file
6
Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
Normal file
@@ -0,0 +1,6 @@
|
||||
What: /sys/kernel/rcu_stall_count
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
Shows how many times the system has detected an RCU stall since last boot.
|
||||
@@ -2,7 +2,7 @@ What: /sys/kernel/slab
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The /sys/kernel/slab directory contains a snapshot of the
|
||||
internal state of the SLUB allocator for each cache. Certain
|
||||
@@ -14,7 +14,7 @@ What: /sys/kernel/slab/<cache>/aliases
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The aliases file is read-only and specifies how many caches
|
||||
have merged into this cache.
|
||||
@@ -23,7 +23,7 @@ What: /sys/kernel/slab/<cache>/align
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The align file is read-only and specifies the cache's object
|
||||
alignment in bytes.
|
||||
@@ -32,7 +32,7 @@ What: /sys/kernel/slab/<cache>/alloc_calls
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The alloc_calls file is read-only and lists the kernel code
|
||||
locations from which allocations for this cache were performed.
|
||||
@@ -43,7 +43,7 @@ What: /sys/kernel/slab/<cache>/alloc_fastpath
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The alloc_fastpath file shows how many objects have been
|
||||
allocated using the fast path. It can be written to clear the
|
||||
@@ -54,7 +54,7 @@ What: /sys/kernel/slab/<cache>/alloc_from_partial
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The alloc_from_partial file shows how many times a cpu slab has
|
||||
been full and it has been refilled by using a slab from the list
|
||||
@@ -66,7 +66,7 @@ What: /sys/kernel/slab/<cache>/alloc_refill
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The alloc_refill file shows how many times the per-cpu freelist
|
||||
was empty but there were objects available as the result of
|
||||
@@ -77,7 +77,7 @@ What: /sys/kernel/slab/<cache>/alloc_slab
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The alloc_slab file is shows how many times a new slab had to
|
||||
be allocated from the page allocator. It can be written to
|
||||
@@ -88,7 +88,7 @@ What: /sys/kernel/slab/<cache>/alloc_slowpath
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The alloc_slowpath file shows how many objects have been
|
||||
allocated using the slow path because of a refill or
|
||||
@@ -100,7 +100,7 @@ What: /sys/kernel/slab/<cache>/cache_dma
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The cache_dma file is read-only and specifies whether objects
|
||||
are from ZONE_DMA.
|
||||
@@ -110,7 +110,7 @@ What: /sys/kernel/slab/<cache>/cpu_slabs
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The cpu_slabs file is read-only and displays how many cpu slabs
|
||||
are active and their NUMA locality.
|
||||
@@ -119,7 +119,7 @@ What: /sys/kernel/slab/<cache>/cpuslab_flush
|
||||
Date: April 2009
|
||||
KernelVersion: 2.6.31
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The file cpuslab_flush shows how many times a cache's cpu slabs
|
||||
have been flushed as the result of destroying or shrinking a
|
||||
@@ -132,7 +132,7 @@ What: /sys/kernel/slab/<cache>/ctor
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The ctor file is read-only and specifies the cache's object
|
||||
constructor function, which is invoked for each object when a
|
||||
@@ -142,7 +142,7 @@ What: /sys/kernel/slab/<cache>/deactivate_empty
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The deactivate_empty file shows how many times an empty cpu slab
|
||||
was deactivated. It can be written to clear the current count.
|
||||
@@ -152,7 +152,7 @@ What: /sys/kernel/slab/<cache>/deactivate_full
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The deactivate_full file shows how many times a full cpu slab
|
||||
was deactivated. It can be written to clear the current count.
|
||||
@@ -162,7 +162,7 @@ What: /sys/kernel/slab/<cache>/deactivate_remote_frees
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The deactivate_remote_frees file shows how many times a cpu slab
|
||||
has been deactivated and contained free objects that were freed
|
||||
@@ -173,7 +173,7 @@ What: /sys/kernel/slab/<cache>/deactivate_to_head
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The deactivate_to_head file shows how many times a partial cpu
|
||||
slab was deactivated and added to the head of its node's partial
|
||||
@@ -184,7 +184,7 @@ What: /sys/kernel/slab/<cache>/deactivate_to_tail
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The deactivate_to_tail file shows how many times a partial cpu
|
||||
slab was deactivated and added to the tail of its node's partial
|
||||
@@ -195,7 +195,7 @@ What: /sys/kernel/slab/<cache>/destroy_by_rcu
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The destroy_by_rcu file is read-only and specifies whether
|
||||
slabs (not objects) are freed by rcu.
|
||||
@@ -204,7 +204,7 @@ What: /sys/kernel/slab/<cache>/free_add_partial
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_add_partial file shows how many times an object has
|
||||
been freed in a full slab so that it had to added to its node's
|
||||
@@ -215,7 +215,7 @@ What: /sys/kernel/slab/<cache>/free_calls
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_calls file is read-only and lists the locations of
|
||||
object frees if slab debugging is enabled (see
|
||||
@@ -225,7 +225,7 @@ What: /sys/kernel/slab/<cache>/free_fastpath
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_fastpath file shows how many objects have been freed
|
||||
using the fast path because it was an object from the cpu slab.
|
||||
@@ -236,7 +236,7 @@ What: /sys/kernel/slab/<cache>/free_frozen
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_frozen file shows how many objects have been freed to
|
||||
a frozen slab (i.e. a remote cpu slab). It can be written to
|
||||
@@ -247,7 +247,7 @@ What: /sys/kernel/slab/<cache>/free_remove_partial
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_remove_partial file shows how many times an object has
|
||||
been freed to a now-empty slab so that it had to be removed from
|
||||
@@ -259,7 +259,7 @@ What: /sys/kernel/slab/<cache>/free_slab
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_slab file shows how many times an empty slab has been
|
||||
freed back to the page allocator. It can be written to clear
|
||||
@@ -270,7 +270,7 @@ What: /sys/kernel/slab/<cache>/free_slowpath
|
||||
Date: February 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The free_slowpath file shows how many objects have been freed
|
||||
using the slow path (i.e. to a full or partial slab). It can
|
||||
@@ -281,7 +281,7 @@ What: /sys/kernel/slab/<cache>/hwcache_align
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The hwcache_align file is read-only and specifies whether
|
||||
objects are aligned on cachelines.
|
||||
@@ -301,7 +301,7 @@ What: /sys/kernel/slab/<cache>/object_size
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The object_size file is read-only and specifies the cache's
|
||||
object size.
|
||||
@@ -310,7 +310,7 @@ What: /sys/kernel/slab/<cache>/objects
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The objects file is read-only and displays how many objects are
|
||||
active and from which nodes they are from.
|
||||
@@ -319,7 +319,7 @@ What: /sys/kernel/slab/<cache>/objects_partial
|
||||
Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The objects_partial file is read-only and displays how many
|
||||
objects are on partial slabs and from which nodes they are
|
||||
@@ -329,7 +329,7 @@ What: /sys/kernel/slab/<cache>/objs_per_slab
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The file objs_per_slab is read-only and specifies how many
|
||||
objects may be allocated from a single slab of the order
|
||||
@@ -339,7 +339,7 @@ What: /sys/kernel/slab/<cache>/order
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The order file specifies the page order at which new slabs are
|
||||
allocated. It is writable and can be changed to increase the
|
||||
@@ -356,7 +356,7 @@ What: /sys/kernel/slab/<cache>/order_fallback
|
||||
Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The order_fallback file shows how many times an allocation of a
|
||||
new slab has not been possible at the cache's order and instead
|
||||
@@ -369,7 +369,7 @@ What: /sys/kernel/slab/<cache>/partial
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The partial file is read-only and displays how long many
|
||||
partial slabs there are and how long each node's list is.
|
||||
@@ -378,7 +378,7 @@ What: /sys/kernel/slab/<cache>/poison
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The poison file specifies whether objects should be poisoned
|
||||
when a new slab is allocated.
|
||||
@@ -387,7 +387,7 @@ What: /sys/kernel/slab/<cache>/reclaim_account
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The reclaim_account file specifies whether the cache's objects
|
||||
are reclaimable (and grouped by their mobility).
|
||||
@@ -396,7 +396,7 @@ What: /sys/kernel/slab/<cache>/red_zone
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The red_zone file specifies whether the cache's objects are red
|
||||
zoned.
|
||||
@@ -405,7 +405,7 @@ What: /sys/kernel/slab/<cache>/remote_node_defrag_ratio
|
||||
Date: January 2008
|
||||
KernelVersion: 2.6.25
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The file remote_node_defrag_ratio specifies the percentage of
|
||||
times SLUB will attempt to refill the cpu slab with a partial
|
||||
@@ -419,7 +419,7 @@ What: /sys/kernel/slab/<cache>/sanity_checks
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The sanity_checks file specifies whether expensive checks
|
||||
should be performed on free and, at minimum, enables double free
|
||||
@@ -430,7 +430,7 @@ What: /sys/kernel/slab/<cache>/shrink
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The shrink file is used to reclaim unused slab cache
|
||||
memory from a cache. Empty per-cpu or partial slabs
|
||||
@@ -446,7 +446,7 @@ What: /sys/kernel/slab/<cache>/slab_size
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The slab_size file is read-only and specifies the object size
|
||||
with metadata (debugging information and alignment) in bytes.
|
||||
@@ -455,7 +455,7 @@ What: /sys/kernel/slab/<cache>/slabs
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The slabs file is read-only and displays how long many slabs
|
||||
there are (both cpu and partial) and from which nodes they are
|
||||
@@ -465,7 +465,7 @@ What: /sys/kernel/slab/<cache>/store_user
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The store_user file specifies whether the location of
|
||||
allocation or free should be tracked for a cache.
|
||||
@@ -474,7 +474,7 @@ What: /sys/kernel/slab/<cache>/total_objects
|
||||
Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The total_objects file is read-only and displays how many total
|
||||
objects a cache has and from which nodes they are from.
|
||||
@@ -483,7 +483,7 @@ What: /sys/kernel/slab/<cache>/trace
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
The trace file specifies whether object allocations and frees
|
||||
should be traced.
|
||||
@@ -492,7 +492,7 @@ What: /sys/kernel/slab/<cache>/validate
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Christoph Lameter <cl@linux-foundation.org>
|
||||
Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
Writing to the validate file causes SLUB to traverse all of its
|
||||
cache's objects and check the validity of metadata.
|
||||
@@ -506,14 +506,14 @@ Description:
|
||||
|
||||
What: /sys/kernel/slab/<cache>/slabs_cpu_partial
|
||||
Date: Aug 2011
|
||||
Contact: Christoph Lameter <cl@linux.com>
|
||||
Contact: Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
This read-only file shows the number of partialli allocated
|
||||
frozen slabs.
|
||||
|
||||
What: /sys/kernel/slab/<cache>/cpu_partial
|
||||
Date: Aug 2011
|
||||
Contact: Christoph Lameter <cl@linux.com>
|
||||
Contact: Christoph Lameter <cl@gentwo.org>
|
||||
Description:
|
||||
This read-only file shows the number of per cpu partial
|
||||
pages to keep around.
|
||||
|
||||
7
Documentation/ABI/testing/sysfs-kernel-softlockup_count
Normal file
7
Documentation/ABI/testing/sysfs-kernel-softlockup_count
Normal file
@@ -0,0 +1,7 @@
|
||||
What: /sys/kernel/softlockup_count
|
||||
Date: May 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
Shows how many times the system has detected a soft lockup since last boot.
|
||||
Available only if CONFIG_SOFTLOCKUP_DETECTOR is enabled.
|
||||
14
Documentation/ABI/testing/sysfs-platform-alienware-wmi
Normal file
14
Documentation/ABI/testing/sysfs-platform-alienware-wmi
Normal file
@@ -0,0 +1,14 @@
|
||||
What: /sys/class/hwmon/hwmonX/fanY_boost
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: Kurt Borja <kuurtb@gmail.com>
|
||||
Description:
|
||||
This file exposes fan boost control for Dell gaming laptops with
|
||||
the AWCC WMI interface.
|
||||
|
||||
See Documentation/admin-guide/laptops/alienware-wmi.rst for
|
||||
details.
|
||||
|
||||
Integer value in the range 0 to 255
|
||||
|
||||
RW
|
||||
25
Documentation/ABI/testing/sysfs-platform-oxp
Normal file
25
Documentation/ABI/testing/sysfs-platform-oxp
Normal file
@@ -0,0 +1,25 @@
|
||||
What: /sys/devices/platform/<platform>/tt_toggle
|
||||
Date: Jun 2023
|
||||
KernelVersion: 6.5
|
||||
Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
|
||||
Description:
|
||||
Takeover TDP controls from the device. OneXPlayer devices have a
|
||||
turbo button that can be used to switch between two TDP modes
|
||||
(usually 15W and 25W). By setting this attribute to 1, this
|
||||
functionality is disabled, handing TDP control over to (Windows)
|
||||
userspace software and the Turbo button turns into a keyboard
|
||||
shortcut over the AT keyboard of the device. In addition,
|
||||
using this setting is a prerequisite for PWM control for most
|
||||
newer models (otherwise it NOOPs).
|
||||
|
||||
What: /sys/devices/platform/<platform>/tt_led
|
||||
Date: April 2025
|
||||
KernelVersion: 6.16
|
||||
Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
|
||||
Description:
|
||||
Some OneXPlayer devices (e.g., X1 series) feature a little LED
|
||||
nested in the Turbo button. This LED is illuminated when the
|
||||
device is in the higher TDP mode (e.g., 25W). Once tt_toggle
|
||||
is engaged, this LED is left dangling to its last state. This
|
||||
attribute allows userspace to control the LED state manually
|
||||
(either with 1 or 0). Only a subset of devices contain this LED.
|
||||
@@ -60,9 +60,8 @@ endif #HAVE_LATEXMK
|
||||
# Internal variables.
|
||||
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||
PAPEROPT_letter = -D latex_paper_size=letter
|
||||
KERNELDOC = $(srctree)/scripts/kernel-doc
|
||||
KERNELDOC_CONF = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
|
||||
ALLSPHINXOPTS = $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
|
||||
ALLSPHINXOPTS = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
|
||||
ALLSPHINXOPTS += $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
|
||||
ifneq ($(wildcard $(srctree)/.config),)
|
||||
ifeq ($(CONFIG_RUST),y)
|
||||
# Let Sphinx know we will include rustdoc
|
||||
@@ -83,9 +82,11 @@ loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
|
||||
# $5 reST source folder relative to $(src),
|
||||
# e.g. "userspace-api/media" for the linux-tv book-set at ./Documentation/userspace-api/media
|
||||
|
||||
PYTHONPYCACHEPREFIX ?= $(abspath $(BUILDDIR)/__pycache__)
|
||||
|
||||
quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
|
||||
cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media $2 && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONPYCACHEPREFIX="$(PYTHONPYCACHEPREFIX)" \
|
||||
BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(src)/$5/$(SPHINX_CONF)) \
|
||||
$(PYTHON3) $(srctree)/scripts/jobserver-exec \
|
||||
$(CONFIG_SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
|
||||
|
||||
10
Documentation/PCI/controller/index.rst
Normal file
10
Documentation/PCI/controller/index.rst
Normal file
@@ -0,0 +1,10 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================================
|
||||
PCI Native Host Bridge and Endpoint Drivers
|
||||
===========================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
rcar-pcie-firmware
|
||||
32
Documentation/PCI/controller/rcar-pcie-firmware.rst
Normal file
32
Documentation/PCI/controller/rcar-pcie-firmware.rst
Normal file
@@ -0,0 +1,32 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================================================
|
||||
Firmware of PCIe controller for Renesas R-Car V4H
|
||||
=================================================
|
||||
|
||||
Renesas R-Car V4H (r8a779g0) has a PCIe controller, requiring a specific
|
||||
firmware download during startup.
|
||||
|
||||
However, Renesas currently cannot distribute the firmware free of charge.
|
||||
|
||||
The firmware file "104_PCIe_fw_addr_data_ver1.05.txt" (note that the file name
|
||||
might be different between different datasheet revisions) can be found in the
|
||||
datasheet encoded as text, and as such, the file's content must be converted
|
||||
back to binary form. This can be achieved using the following example script:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
$ awk '/^\s*0x[0-9A-Fa-f]{4}\s+0x[0-9A-Fa-f]{4}/ { print substr($2,5,2) substr($2,3,2) }' \
|
||||
104_PCIe_fw_addr_data_ver1.05.txt | \
|
||||
xxd -p -r > rcar_gen4_pcie.bin
|
||||
|
||||
Once the text content has been converted into a binary firmware file, verify
|
||||
its checksum as follows:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
$ sha1sum rcar_gen4_pcie.bin
|
||||
1d0bd4b189b4eb009f5d564b1f93a79112994945 rcar_gen4_pcie.bin
|
||||
|
||||
The resulting binary file called "rcar_gen4_pcie.bin" should be placed in the
|
||||
"/lib/firmware" directory before the driver runs.
|
||||
@@ -8,6 +8,6 @@ PCI NVMe Function
|
||||
|
||||
The PCI NVMe endpoint function implements a PCI NVMe controller using the NVMe
|
||||
subsystem target core code. The driver for this function resides with the NVMe
|
||||
subsystem as drivers/nvme/target/nvmet-pciep.c.
|
||||
subsystem as drivers/nvme/target/pci-epf.c.
|
||||
|
||||
See Documentation/nvme/nvme-pci-endpoint-target.rst for more details.
|
||||
|
||||
@@ -17,5 +17,6 @@ PCI Bus Subsystem
|
||||
pci-error-recovery
|
||||
pcieaer-howto
|
||||
endpoint/index
|
||||
controller/index
|
||||
boot-interrupts
|
||||
tph
|
||||
|
||||
@@ -85,12 +85,27 @@ In the example, 'Requester ID' means the ID of the device that sent
|
||||
the error message to the Root Port. Please refer to PCIe specs for other
|
||||
fields.
|
||||
|
||||
AER Ratelimits
|
||||
--------------
|
||||
|
||||
Since error messages can be generated for each transaction, we may see
|
||||
large volumes of errors reported. To prevent spammy devices from flooding
|
||||
the console/stalling execution, messages are throttled by device and error
|
||||
type (correctable vs. non-fatal uncorrectable). Fatal errors, including
|
||||
DPC errors, are not ratelimited.
|
||||
|
||||
AER uses the default ratelimit of DEFAULT_RATELIMIT_BURST (10 events) over
|
||||
DEFAULT_RATELIMIT_INTERVAL (5 seconds).
|
||||
|
||||
Ratelimits are exposed in the form of sysfs attributes and configurable.
|
||||
See Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
|
||||
|
||||
AER Statistics / Counters
|
||||
-------------------------
|
||||
|
||||
When PCIe AER errors are captured, the counters / statistics are also exposed
|
||||
in the form of sysfs attributes which are documented at
|
||||
Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
|
||||
Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
|
||||
|
||||
Developer Guide
|
||||
===============
|
||||
|
||||
@@ -334,7 +334,7 @@ If the system-call audit module were to ever need to reject stale data, one way
|
||||
to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
|
||||
``audit_entry`` structure, and modify audit_filter_task() as follows::
|
||||
|
||||
static enum audit_state audit_filter_task(struct task_struct *tsk)
|
||||
static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key)
|
||||
{
|
||||
struct audit_entry *e;
|
||||
enum audit_state state;
|
||||
@@ -346,16 +346,18 @@ to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to
|
||||
if (e->deleted) {
|
||||
spin_unlock(&e->lock);
|
||||
rcu_read_unlock();
|
||||
return AUDIT_BUILD_CONTEXT;
|
||||
return NULL;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (state == AUDIT_STATE_RECORD)
|
||||
*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
|
||||
return state;
|
||||
/* As long as e->lock is held, e is valid and
|
||||
* its value is not stale */
|
||||
return e;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return AUDIT_BUILD_CONTEXT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
|
||||
|
||||
@@ -15,6 +15,9 @@ to start learning about RCU:
|
||||
| 2014 Big API Table https://lwn.net/Articles/609973/
|
||||
| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/
|
||||
| 2019 Big API Table https://lwn.net/Articles/777165/
|
||||
| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/
|
||||
| 2024 Background Information https://lwn.net/Articles/988641/
|
||||
| 2024 Big API Table https://lwn.net/Articles/988666/
|
||||
|
||||
For those preferring video:
|
||||
|
||||
|
||||
@@ -423,7 +423,7 @@ Field descriptions:
|
||||
|
||||
Event Example::
|
||||
|
||||
type=1422 audit(1653425529.927:53): policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
|
||||
type=1422 audit(1653425529.927:53): policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1 errno=0
|
||||
type=1300 audit(1653425529.927:53): arch=c000003e syscall=1 success=yes exit=2567 a0=3 a1=5596fcae1fb0 a2=a07 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
|
||||
type=1327 audit(1653425529.927:53): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2E
|
||||
|
||||
@@ -433,24 +433,55 @@ This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record
|
||||
|
||||
Field descriptions:
|
||||
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| Field | Value Type | Optional? | Description of Value |
|
||||
+================+============+===========+===================================================+
|
||||
| policy_name | string | No | The policy_name |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| policy_version | string | No | The policy_version |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| policy_digest | string | No | The policy hash |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| auid | integer | No | The login user ID |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| ses | integer | No | The login session ID |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| lsm | string | No | The lsm name associated with the event |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| res | integer | No | The result of the audited operation(success/fail) |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| Field | Value Type | Optional? | Description of Value |
|
||||
+================+============+===========+=============================================================+
|
||||
| policy_name | string | Yes | The policy_name |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| policy_version | string | Yes | The policy_version |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| policy_digest | string | Yes | The policy hash |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| auid | integer | No | The login user ID |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| ses | integer | No | The login session ID |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| lsm | string | No | The lsm name associated with the event |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| res | integer | No | The result of the audited operation(success/fail) |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
| errno | integer | No | Error code from policy loading operations (see table below) |
|
||||
+----------------+------------+-----------+-------------------------------------------------------------+
|
||||
|
||||
Policy error codes (errno):
|
||||
|
||||
The following table lists the error codes that may appear in the errno field while loading or updating the policy:
|
||||
|
||||
+----------------+--------------------------------------------------------+
|
||||
| Error Code | Description |
|
||||
+================+========================================================+
|
||||
| 0 | Success |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -EPERM | Insufficient permission |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -EEXIST | Same name policy already deployed |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -EBADMSG | Policy is invalid |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -ENOMEM | Out of memory (OOM) |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -ERANGE | Policy version number overflow |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -EINVAL | Policy version parsing error |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -ENOKEY | Key used to sign the IPE policy not found in keyring |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -EKEYREJECTED | Policy signature verification failed |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -ESTALE | Attempting to update an IPE policy with older version |
|
||||
+----------------+--------------------------------------------------------+
|
||||
| -ENOENT | Policy was deleted while updating |
|
||||
+----------------+--------------------------------------------------------+
|
||||
|
||||
1404 AUDIT_MAC_STATUS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@@ -259,7 +259,7 @@ Configuring the kernel
|
||||
Compiling the kernel
|
||||
--------------------
|
||||
|
||||
- Make sure you have at least gcc 5.1 available.
|
||||
- Make sure you have at least gcc 8.1 available.
|
||||
For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
|
||||
|
||||
- Do a ``make`` to create a compressed kernel image. It is also possible to do
|
||||
|
||||
@@ -11,6 +11,7 @@ Block Devices
|
||||
nbd
|
||||
paride
|
||||
ramdisk
|
||||
zoned_loop
|
||||
zram
|
||||
|
||||
drbd/index
|
||||
|
||||
169
Documentation/admin-guide/blockdev/zoned_loop.rst
Normal file
169
Documentation/admin-guide/blockdev/zoned_loop.rst
Normal file
@@ -0,0 +1,169 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======================
|
||||
Zoned Loop Block Device
|
||||
=======================
|
||||
|
||||
.. Contents:
|
||||
|
||||
1) Overview
|
||||
2) Creating a Zoned Device
|
||||
3) Deleting a Zoned Device
|
||||
4) Example
|
||||
|
||||
|
||||
1) Overview
|
||||
-----------
|
||||
|
||||
The zoned loop block device driver (zloop) allows a user to create a zoned block
|
||||
device using one regular file per zone as backing storage. This driver does not
|
||||
directly control any hardware and uses read, write and truncate operations to
|
||||
regular files of a file system to emulate a zoned block device.
|
||||
|
||||
Using zloop, zoned block devices with a configurable capacity, zone size and
|
||||
number of conventional zones can be created. The storage for each zone of the
|
||||
device is implemented using a regular file with a maximum size equal to the zone
|
||||
size. The size of a file backing a conventional zone is always equal to the zone
|
||||
size. The size of a file backing a sequential zone indicates the amount of data
|
||||
sequentially written to the file, that is, the size of the file directly
|
||||
indicates the position of the write pointer of the zone.
|
||||
|
||||
When resetting a sequential zone, its backing file size is truncated to zero.
|
||||
Conversely, for a zone finish operation, the backing file is truncated to the
|
||||
zone size. With this, the maximum capacity of a zloop zoned block device created
|
||||
can be larger configured to be larger than the storage space available on the
|
||||
backing file system. Of course, for such configuration, writing more data than
|
||||
the storage space available on the backing file system will result in write
|
||||
errors.
|
||||
|
||||
The zoned loop block device driver implements a complete zone transition state
|
||||
machine. That is, zones can be empty, implicitly opened, explicitly opened,
|
||||
closed or full. The current implementation does not support any limits on the
|
||||
maximum number of open and active zones.
|
||||
|
||||
No user tools are necessary to create and delete zloop devices.
|
||||
|
||||
2) Creating a Zoned Device
|
||||
--------------------------
|
||||
|
||||
Once the zloop module is loaded (or if zloop is compiled in the kernel), the
|
||||
character device file /dev/zloop-control can be used to add a zloop device.
|
||||
This is done by writing an "add" command directly to the /dev/zloop-control
|
||||
device::
|
||||
|
||||
$ modprobe zloop
|
||||
$ ls -l /dev/zloop*
|
||||
crw-------. 1 root root 10, 123 Jan 6 19:18 /dev/zloop-control
|
||||
|
||||
$ mkdir -p <base directory/<device ID>
|
||||
$ echo "add [options]" > /dev/zloop-control
|
||||
|
||||
The options available for the add command can be listed by reading the
|
||||
/dev/zloop-control device::
|
||||
|
||||
$ cat /dev/zloop-control
|
||||
add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io
|
||||
remove id=%d
|
||||
|
||||
In more details, the options that can be used with the "add" command are as
|
||||
follows.
|
||||
|
||||
================ ===========================================================
|
||||
id Device number (the X in /dev/zloopX).
|
||||
Default: automatically assigned.
|
||||
capacity_mb Device total capacity in MiB. This is always rounded up to
|
||||
the nearest higher multiple of the zone size.
|
||||
Default: 16384 MiB (16 GiB).
|
||||
zone_size_mb Device zone size in MiB. Default: 256 MiB.
|
||||
zone_capacity_mb Device zone capacity (must always be equal to or lower than
|
||||
the zone size. Default: zone size.
|
||||
conv_zones Total number of conventioanl zones starting from sector 0.
|
||||
Default: 8.
|
||||
base_dir Path to the base directoy where to create the directory
|
||||
containing the zone files of the device.
|
||||
Default=/var/local/zloop.
|
||||
The device directory containing the zone files is always
|
||||
named with the device ID. E.g. the default zone file
|
||||
directory for /dev/zloop0 is /var/local/zloop/0.
|
||||
nr_queues Number of I/O queues of the zoned block device. This value is
|
||||
always capped by the number of online CPUs
|
||||
Default: 1
|
||||
queue_depth Maximum I/O queue depth per I/O queue.
|
||||
Default: 64
|
||||
buffered_io Do buffered IOs instead of direct IOs (default: false)
|
||||
================ ===========================================================
|
||||
|
||||
3) Deleting a Zoned Device
|
||||
--------------------------
|
||||
|
||||
Deleting an unused zoned loop block device is done by issuing the "remove"
|
||||
command to /dev/zloop-control, specifying the ID of the device to remove::
|
||||
|
||||
$ echo "remove id=X" > /dev/zloop-control
|
||||
|
||||
The remove command does not have any option.
|
||||
|
||||
A zoned device that was removed can be re-added again without any change to the
|
||||
state of the device zones: the device zones are restored to their last state
|
||||
before the device was removed. Adding again a zoned device after it was removed
|
||||
must always be done using the same configuration as when the device was first
|
||||
added. If a zone configuration change is detected, an error will be returned and
|
||||
the zoned device will not be created.
|
||||
|
||||
To fully delete a zoned device, after executing the remove operation, the device
|
||||
base directory containing the backing files of the device zones must be deleted.
|
||||
|
||||
4) Example
|
||||
----------
|
||||
|
||||
The following sequence of commands creates a 2GB zoned device with zones of 64
|
||||
MB and a zone capacity of 63 MB::
|
||||
|
||||
$ modprobe zloop
|
||||
$ mkdir -p /var/local/zloop/0
|
||||
$ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity=63MB" > /dev/zloop-control
|
||||
|
||||
For the device created (/dev/zloop0), the zone backing files are all created
|
||||
under the default base directory (/var/local/zloop)::
|
||||
|
||||
$ ls -l /var/local/zloop/0
|
||||
total 0
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000000
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000001
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000002
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000003
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000004
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000005
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000006
|
||||
-rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000007
|
||||
-rw-------. 1 root root 0 Jan 6 22:23 seq-000008
|
||||
-rw-------. 1 root root 0 Jan 6 22:23 seq-000009
|
||||
...
|
||||
|
||||
The zoned device created (/dev/zloop0) can then be used normally::
|
||||
|
||||
$ lsblk -z
|
||||
NAME ZONED ZONE-SZ ZONE-NR ZONE-AMAX ZONE-OMAX ZONE-APP ZONE-WGRAN
|
||||
zloop0 host-managed 64M 32 0 0 1M 4K
|
||||
$ blkzone report /dev/zloop0
|
||||
start: 0x000000000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x000020000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x000040000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x000060000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x000080000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x0000a0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x0000c0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x0000e0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
|
||||
start: 0x000100000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
|
||||
start: 0x000120000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
|
||||
...
|
||||
|
||||
Deleting this device is done using the command::
|
||||
|
||||
$ echo "remove id=0" > /dev/zloop-control
|
||||
|
||||
The removed device can be re-added again using the same "add" command as when
|
||||
the device was first created. To fully delete a zoned device, its backing files
|
||||
should also be deleted after executing the remove command::
|
||||
|
||||
$ rm -r /var/local/zloop/0
|
||||
@@ -317,6 +317,26 @@ a single line of text and contains the following stats separated by whitespace:
|
||||
Optional Feature
|
||||
================
|
||||
|
||||
IDLE pages tracking
|
||||
-------------------
|
||||
|
||||
zram has built-in support for idle pages tracking (that is, allocated but
|
||||
not used pages). This feature is useful for e.g. zram writeback and
|
||||
recompression. In order to mark pages as idle, execute the following command::
|
||||
|
||||
echo all > /sys/block/zramX/idle
|
||||
|
||||
This will mark all allocated zram pages as idle. The idle mark will be
|
||||
removed only when the page (block) is accessed (e.g. overwritten or freed).
|
||||
Additionally, when CONFIG_ZRAM_TRACK_ENTRY_ACTIME is enabled, pages can be
|
||||
marked as idle based on how many seconds have passed since the last access to
|
||||
a particular zram page::
|
||||
|
||||
echo 86400 > /sys/block/zramX/idle
|
||||
|
||||
In this example, all pages which haven't been accessed in more than 86400
|
||||
seconds (one day) will be marked idle.
|
||||
|
||||
writeback
|
||||
---------
|
||||
|
||||
@@ -331,24 +351,7 @@ If admin wants to use incompressible page writeback, they could do it via::
|
||||
|
||||
echo huge > /sys/block/zramX/writeback
|
||||
|
||||
To use idle page writeback, first, user need to declare zram pages
|
||||
as idle::
|
||||
|
||||
echo all > /sys/block/zramX/idle
|
||||
|
||||
From now on, any pages on zram are idle pages. The idle mark
|
||||
will be removed until someone requests access of the block.
|
||||
IOW, unless there is access request, those pages are still idle pages.
|
||||
Additionally, when CONFIG_ZRAM_TRACK_ENTRY_ACTIME is enabled pages can be
|
||||
marked as idle based on how long (in seconds) it's been since they were
|
||||
last accessed::
|
||||
|
||||
echo 86400 > /sys/block/zramX/idle
|
||||
|
||||
In this example all pages which haven't been accessed in more than 86400
|
||||
seconds (one day) will be marked idle.
|
||||
|
||||
Admin can request writeback of those idle pages at right timing via::
|
||||
Admin can request writeback of idle pages at right timing via::
|
||||
|
||||
echo idle > /sys/block/zramX/writeback
|
||||
|
||||
@@ -369,6 +372,23 @@ they could write a page index into the interface::
|
||||
|
||||
echo "page_index=1251" > /sys/block/zramX/writeback
|
||||
|
||||
In Linux 6.16 this interface underwent some rework. First, the interface
|
||||
now supports `key=value` format for all of its parameters (`type=huge_idle`,
|
||||
etc.) Second, the support for `page_indexes` was introduced, which specify
|
||||
`LOW-HIGH` range (or ranges) of pages to be written-back. This reduces the
|
||||
number of syscalls, but more importantly this enables optimal post-processing
|
||||
target selection strategy. Usage example::
|
||||
|
||||
echo "type=idle" > /sys/block/zramX/writeback
|
||||
echo "page_indexes=1-100 page_indexes=200-300" > \
|
||||
/sys/block/zramX/writeback
|
||||
|
||||
We also now permit multiple page_index params per call and a mix of
|
||||
single pages and page ranges::
|
||||
|
||||
echo page_index=42 page_index=99 page_indexes=100-200 \
|
||||
page_indexes=500-700 > /sys/block/zramX/writeback
|
||||
|
||||
If there are lots of write IO with flash device, potentially, it has
|
||||
flash wearout problem so that admin needs to design write limitation
|
||||
to guarantee storage health for entire product life.
|
||||
@@ -482,8 +502,6 @@ attempt to recompress:::
|
||||
|
||||
echo "type=huge_idle max_pages=42" > /sys/block/zramX/recompress
|
||||
|
||||
Recompression of idle pages requires memory tracking.
|
||||
|
||||
During re-compression for every page, that matches re-compression criteria,
|
||||
ZRAM iterates the list of registered alternative compression algorithms in
|
||||
order of their priorities. ZRAM stops either when re-compression was
|
||||
|
||||
@@ -196,7 +196,7 @@ will see the assembler code for the routine shown, but if your kernel has
|
||||
debug symbols the C code will also be available. (Debug symbols can be enabled
|
||||
in the kernel hacking menu of the menu configuration.) For example::
|
||||
|
||||
$ objdump -r -S -l --disassemble net/dccp/ipv4.o
|
||||
$ objdump -r -S -l --disassemble net/ipv4/tcp.o
|
||||
|
||||
.. note::
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
|
||||
|
||||
Modified by Paul Jackson <pj@sgi.com>
|
||||
|
||||
Modified by Christoph Lameter <cl@linux.com>
|
||||
Modified by Christoph Lameter <cl@gentwo.org>
|
||||
|
||||
.. CONTENTS:
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ Written by Simon.Derr@bull.net
|
||||
|
||||
- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
|
||||
- Modified by Paul Jackson <pj@sgi.com>
|
||||
- Modified by Christoph Lameter <cl@linux.com>
|
||||
- Modified by Christoph Lameter <cl@gentwo.org>
|
||||
- Modified by Paul Menage <menage@google.com>
|
||||
- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
|
||||
|
||||
|
||||
@@ -1076,7 +1076,7 @@ cpufreq governor about the minimum desired frequency which should always be
|
||||
provided by a CPU, as well as the maximum desired frequency, which should not
|
||||
be exceeded by a CPU.
|
||||
|
||||
WARNING: cgroup2 cpu controller doesn't yet fully support the control of
|
||||
WARNING: cgroup2 cpu controller doesn't yet support the (bandwidth) control of
|
||||
realtime processes. For a kernel built with the CONFIG_RT_GROUP_SCHED option
|
||||
enabled for group scheduling of realtime processes, the cpu controller can only
|
||||
be enabled when all RT processes are in the root cgroup. Be aware that system
|
||||
@@ -1095,19 +1095,34 @@ realtime processes irrespective of CONFIG_RT_GROUP_SCHED.
|
||||
CPU Interface Files
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
All time durations are in microseconds.
|
||||
The interaction of a process with the cpu controller depends on its scheduling
|
||||
policy and the underlying scheduler. From the point of view of the cpu controller,
|
||||
processes can be categorized as follows:
|
||||
|
||||
* Processes under the fair-class scheduler
|
||||
* Processes under a BPF scheduler with the ``cgroup_set_weight`` callback
|
||||
* Everything else: ``SCHED_{FIFO,RR,DEADLINE}`` and processes under a BPF scheduler
|
||||
without the ``cgroup_set_weight`` callback
|
||||
|
||||
For details on when a process is under the fair-class scheduler or a BPF scheduler,
|
||||
check out :ref:`Documentation/scheduler/sched-ext.rst <sched-ext>`.
|
||||
|
||||
For each of the following interface files, the above categories
|
||||
will be referred to. All time durations are in microseconds.
|
||||
|
||||
cpu.stat
|
||||
A read-only flat-keyed file.
|
||||
This file exists whether the controller is enabled or not.
|
||||
|
||||
It always reports the following three stats:
|
||||
It always reports the following three stats, which account for all the
|
||||
processes in the cgroup:
|
||||
|
||||
- usage_usec
|
||||
- user_usec
|
||||
- system_usec
|
||||
|
||||
and the following five when the controller is enabled:
|
||||
and the following five when the controller is enabled, which account for
|
||||
only the processes under the fair-class scheduler:
|
||||
|
||||
- nr_periods
|
||||
- nr_throttled
|
||||
@@ -1125,6 +1140,10 @@ All time durations are in microseconds.
|
||||
If the cgroup has been configured to be SCHED_IDLE (cpu.idle = 1),
|
||||
then the weight will show as a 0.
|
||||
|
||||
This file affects only processes under the fair-class scheduler and a BPF
|
||||
scheduler with the ``cgroup_set_weight`` callback depending on what the
|
||||
callback actually does.
|
||||
|
||||
cpu.weight.nice
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is "0".
|
||||
@@ -1137,6 +1156,10 @@ All time durations are in microseconds.
|
||||
granularity is coarser for the nice values, the read value is
|
||||
the closest approximation of the current weight.
|
||||
|
||||
This file affects only processes under the fair-class scheduler and a BPF
|
||||
scheduler with the ``cgroup_set_weight`` callback depending on what the
|
||||
callback actually does.
|
||||
|
||||
cpu.max
|
||||
A read-write two value file which exists on non-root cgroups.
|
||||
The default is "max 100000".
|
||||
@@ -1149,43 +1172,55 @@ All time durations are in microseconds.
|
||||
$PERIOD duration. "max" for $MAX indicates no limit. If only
|
||||
one number is written, $MAX is updated.
|
||||
|
||||
This file affects only processes under the fair-class scheduler.
|
||||
|
||||
cpu.max.burst
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is "0".
|
||||
|
||||
The burst in the range [0, $MAX].
|
||||
|
||||
This file affects only processes under the fair-class scheduler.
|
||||
|
||||
cpu.pressure
|
||||
A read-write nested-keyed file.
|
||||
|
||||
Shows pressure stall information for CPU. See
|
||||
:ref:`Documentation/accounting/psi.rst <psi>` for details.
|
||||
|
||||
This file accounts for all the processes in the cgroup.
|
||||
|
||||
cpu.uclamp.min
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
The default is "0", i.e. no utilization boosting.
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
The default is "0", i.e. no utilization boosting.
|
||||
|
||||
The requested minimum utilization (protection) as a percentage
|
||||
rational number, e.g. 12.34 for 12.34%.
|
||||
The requested minimum utilization (protection) as a percentage
|
||||
rational number, e.g. 12.34 for 12.34%.
|
||||
|
||||
This interface allows reading and setting minimum utilization clamp
|
||||
values similar to the sched_setattr(2). This minimum utilization
|
||||
value is used to clamp the task specific minimum utilization clamp.
|
||||
This interface allows reading and setting minimum utilization clamp
|
||||
values similar to the sched_setattr(2). This minimum utilization
|
||||
value is used to clamp the task specific minimum utilization clamp,
|
||||
including those of realtime processes.
|
||||
|
||||
The requested minimum utilization (protection) is always capped by
|
||||
the current value for the maximum utilization (limit), i.e.
|
||||
`cpu.uclamp.max`.
|
||||
The requested minimum utilization (protection) is always capped by
|
||||
the current value for the maximum utilization (limit), i.e.
|
||||
`cpu.uclamp.max`.
|
||||
|
||||
This file affects all the processes in the cgroup.
|
||||
|
||||
cpu.uclamp.max
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
The default is "max". i.e. no utilization capping
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
The default is "max". i.e. no utilization capping
|
||||
|
||||
The requested maximum utilization (limit) as a percentage rational
|
||||
number, e.g. 98.76 for 98.76%.
|
||||
The requested maximum utilization (limit) as a percentage rational
|
||||
number, e.g. 98.76 for 98.76%.
|
||||
|
||||
This interface allows reading and setting maximum utilization clamp
|
||||
values similar to the sched_setattr(2). This maximum utilization
|
||||
value is used to clamp the task specific maximum utilization clamp.
|
||||
This interface allows reading and setting maximum utilization clamp
|
||||
values similar to the sched_setattr(2). This maximum utilization
|
||||
value is used to clamp the task specific maximum utilization clamp,
|
||||
including those of realtime processes.
|
||||
|
||||
This file affects all the processes in the cgroup.
|
||||
|
||||
cpu.idle
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
@@ -1197,7 +1232,7 @@ All time durations are in microseconds.
|
||||
own relative priorities, but the cgroup itself will be treated as
|
||||
very low priority relative to its peers.
|
||||
|
||||
|
||||
This file affects only processes under the fair-class scheduler.
|
||||
|
||||
Memory
|
||||
------
|
||||
@@ -1299,6 +1334,18 @@ PAGE_SIZE multiple when read back.
|
||||
monitors the limited cgroup to alleviate heavy reclaim
|
||||
pressure.
|
||||
|
||||
If memory.high is opened with O_NONBLOCK then the synchronous
|
||||
reclaim is bypassed. This is useful for admin processes that
|
||||
need to dynamically adjust the job's memory limits without
|
||||
expending their own CPU resources on memory reclamation. The
|
||||
job will trigger the reclaim and/or get throttled on its
|
||||
next charge request.
|
||||
|
||||
Please note that with O_NONBLOCK, there is a chance that the
|
||||
target memory cgroup may take indefinite amount of time to
|
||||
reduce usage below the limit due to delayed charge request or
|
||||
busy-hitting its memory to slow down reclaim.
|
||||
|
||||
memory.max
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is "max".
|
||||
@@ -1316,6 +1363,18 @@ PAGE_SIZE multiple when read back.
|
||||
Caller could retry them differently, return into userspace
|
||||
as -ENOMEM or silently ignore in cases like disk readahead.
|
||||
|
||||
If memory.max is opened with O_NONBLOCK, then the synchronous
|
||||
reclaim and oom-kill are bypassed. This is useful for admin
|
||||
processes that need to dynamically adjust the job's memory limits
|
||||
without expending their own CPU resources on memory reclamation.
|
||||
The job will trigger the reclaim and/or oom-kill on its next
|
||||
charge request.
|
||||
|
||||
Please note that with O_NONBLOCK, there is a chance that the
|
||||
target memory cgroup may take indefinite amount of time to
|
||||
reduce usage below the limit due to delayed charge request or
|
||||
busy-hitting its memory to slow down reclaim.
|
||||
|
||||
memory.reclaim
|
||||
A write-only nested-keyed file which exists for all cgroups.
|
||||
|
||||
@@ -1348,6 +1407,9 @@ The following nested keys are defined.
|
||||
same semantics as vm.swappiness applied to memcg reclaim with
|
||||
all the existing limitations and potential future extensions.
|
||||
|
||||
The valid range for swappiness is [0-200, max], setting
|
||||
swappiness=max exclusively reclaims anonymous memory.
|
||||
|
||||
memory.peak
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
|
||||
@@ -1670,6 +1732,12 @@ The following nested keys are defined.
|
||||
numa_hint_faults (npn)
|
||||
Number of NUMA hinting faults.
|
||||
|
||||
numa_task_migrated (npn)
|
||||
Number of task migration by NUMA balancing.
|
||||
|
||||
numa_task_swapped (npn)
|
||||
Number of task swap by NUMA balancing.
|
||||
|
||||
pgdemote_kswapd
|
||||
Number of pages demoted by kswapd.
|
||||
|
||||
@@ -3019,7 +3087,7 @@ Filesystem Support for Writeback
|
||||
--------------------------------
|
||||
|
||||
A filesystem can support cgroup writeback by updating
|
||||
address_space_operations->writepage[s]() to annotate bio's using the
|
||||
address_space_operations->writepages() to annotate bio's using the
|
||||
following two functions.
|
||||
|
||||
wbc_init_bio(@wbc, @bio)
|
||||
|
||||
@@ -69,6 +69,113 @@ write-only attribute files in sysfs.
|
||||
$ echo gpio-aggregator.0 > delete_device
|
||||
|
||||
|
||||
Aggregating GPIOs using Configfs
|
||||
--------------------------------
|
||||
|
||||
**Group:** ``/config/gpio-aggregator``
|
||||
|
||||
This is the root directory of the gpio-aggregator configfs tree.
|
||||
|
||||
**Group:** ``/config/gpio-aggregator/<example-name>``
|
||||
|
||||
This directory represents a GPIO aggregator device. You can assign any
|
||||
name to ``<example-name>`` (e.g. ``agg0``), except names starting with
|
||||
``_sysfs`` prefix, which are reserved for auto-generated configfs
|
||||
entries corresponding to devices created via Sysfs.
|
||||
|
||||
**Attribute:** ``/config/gpio-aggregator/<example-name>/live``
|
||||
|
||||
The ``live`` attribute allows to trigger the actual creation of the device
|
||||
once it's fully configured. Accepted values are:
|
||||
|
||||
* ``1``, ``yes``, ``true`` : enable the virtual device
|
||||
* ``0``, ``no``, ``false`` : disable the virtual device
|
||||
|
||||
**Attribute:** ``/config/gpio-aggregator/<example-name>/dev_name``
|
||||
|
||||
The read-only ``dev_name`` attribute exposes the name of the device as it
|
||||
will appear in the system on the platform bus (e.g. ``gpio-aggregator.0``).
|
||||
This is useful for identifying a character device for the newly created
|
||||
aggregator. If it's ``gpio-aggregator.0``,
|
||||
``/sys/devices/platform/gpio-aggregator.0/gpiochipX`` path tells you that the
|
||||
GPIO device id is ``X``.
|
||||
|
||||
You must create subdirectories for each virtual line you want to
|
||||
instantiate, named exactly as ``line0``, ``line1``, ..., ``lineY``, when
|
||||
you want to instantiate ``Y+1`` (Y >= 0) lines. Configure all lines before
|
||||
activating the device by setting ``live`` to 1.
|
||||
|
||||
**Group:** ``/config/gpio-aggregator/<example-name>/<lineY>/``
|
||||
|
||||
This directory represents a GPIO line to include in the aggregator.
|
||||
|
||||
**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/key``
|
||||
|
||||
**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/offset``
|
||||
|
||||
The default values after creating the ``<lineY>`` directory are:
|
||||
|
||||
* ``key`` : <empty>
|
||||
* ``offset`` : -1
|
||||
|
||||
``key`` must always be explicitly configured, while ``offset`` depends.
|
||||
Two configuration patterns exist for each ``<lineY>``:
|
||||
|
||||
(a). For lookup by GPIO line name:
|
||||
|
||||
* Set ``key`` to the line name.
|
||||
* Ensure ``offset`` remains -1 (the default).
|
||||
|
||||
(b). For lookup by GPIO chip name and the line offset within the chip:
|
||||
|
||||
* Set ``key`` to the chip name.
|
||||
* Set ``offset`` to the line offset (0 <= ``offset`` < 65535).
|
||||
|
||||
**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/name``
|
||||
|
||||
The ``name`` attribute sets a custom name for lineY. If left unset, the
|
||||
line will remain unnamed.
|
||||
|
||||
Once the configuration is done, the ``'live'`` attribute must be set to 1
|
||||
in order to instantiate the aggregator device. It can be set back to 0 to
|
||||
destroy the virtual device. The module will synchronously wait for the new
|
||||
aggregator device to be successfully probed and if this doesn't happen, writing
|
||||
to ``'live'`` will result in an error. This is a different behaviour from the
|
||||
case when you create it using sysfs ``new_device`` interface.
|
||||
|
||||
.. note::
|
||||
|
||||
For aggregators created via Sysfs, the configfs entries are
|
||||
auto-generated and appear as ``/config/gpio-aggregator/_sysfs.<N>/``. You
|
||||
cannot add or remove line directories with mkdir(2)/rmdir(2). To modify
|
||||
lines, you must use the "delete_device" interface to tear down the
|
||||
existing device and reconfigure it from scratch. However, you can still
|
||||
toggle the aggregator with the ``live`` attribute and adjust the
|
||||
``key``, ``offset``, and ``name`` attributes for each line when ``live``
|
||||
is set to 0 by hand (i.e. it's not waiting for deferred probe).
|
||||
|
||||
Sample configuration commands
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# Create a directory for an aggregator device
|
||||
$ mkdir /sys/kernel/config/gpio-aggregator/agg0
|
||||
|
||||
# Configure each line
|
||||
$ mkdir /sys/kernel/config/gpio-aggregator/agg0/line0
|
||||
$ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line0/key
|
||||
$ echo 6 > /sys/kernel/config/gpio-aggregator/agg0/line0/offset
|
||||
$ echo test0 > /sys/kernel/config/gpio-aggregator/agg0/line0/name
|
||||
$ mkdir /sys/kernel/config/gpio-aggregator/agg0/line1
|
||||
$ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line1/key
|
||||
$ echo 7 > /sys/kernel/config/gpio-aggregator/agg0/line1/offset
|
||||
$ echo test1 > /sys/kernel/config/gpio-aggregator/agg0/line1/name
|
||||
|
||||
# Activate the aggregator device
|
||||
$ echo 1 > /sys/kernel/config/gpio-aggregator/agg0/live
|
||||
|
||||
|
||||
Generic GPIO Driver
|
||||
-------------------
|
||||
|
||||
|
||||
@@ -23,3 +23,5 @@ are configurable at compile, boot or run time.
|
||||
gather_data_sampling
|
||||
reg-file-data-sampling
|
||||
rsb
|
||||
old_microcode
|
||||
indirect-target-selection
|
||||
|
||||
168
Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
Normal file
168
Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
Normal file
@@ -0,0 +1,168 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Indirect Target Selection (ITS)
|
||||
===============================
|
||||
|
||||
ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were
|
||||
released before Alder Lake. ITS may allow an attacker to control the prediction
|
||||
of indirect branches and RETs located in the lower half of a cacheline.
|
||||
|
||||
ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium).
|
||||
|
||||
Scope of Impact
|
||||
---------------
|
||||
- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be
|
||||
predicted with unintended target corresponding to a branch in the guest.
|
||||
|
||||
- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native
|
||||
gadgets.
|
||||
|
||||
- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect
|
||||
branches may still be predicted with targets corresponding to direct branches
|
||||
executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which
|
||||
should be available via distro updates. Alternatively microcode can be
|
||||
obtained from Intel's github repository [#f1]_.
|
||||
|
||||
Affected CPUs
|
||||
-------------
|
||||
Below is the list of ITS affected CPUs [#f2]_ [#f3]_:
|
||||
|
||||
======================== ============ ==================== ===============
|
||||
Common name Family_Model eIBRS Intra-mode BTI
|
||||
Guest/Host Isolation
|
||||
======================== ============ ==================== ===============
|
||||
SKYLAKE_X (step >= 6) 06_55H Affected Affected
|
||||
ICELAKE_X 06_6AH Not affected Affected
|
||||
ICELAKE_D 06_6CH Not affected Affected
|
||||
ICELAKE_L 06_7EH Not affected Affected
|
||||
TIGERLAKE_L 06_8CH Not affected Affected
|
||||
TIGERLAKE 06_8DH Not affected Affected
|
||||
KABYLAKE_L (step >= 12) 06_8EH Affected Affected
|
||||
KABYLAKE (step >= 13) 06_9EH Affected Affected
|
||||
COMETLAKE 06_A5H Affected Affected
|
||||
COMETLAKE_L 06_A6H Affected Affected
|
||||
ROCKETLAKE 06_A7H Not affected Affected
|
||||
======================== ============ ==================== ===============
|
||||
|
||||
- All affected CPUs enumerate Enhanced IBRS feature.
|
||||
- IBPB isolation is affected on all ITS affected CPUs, and need a microcode
|
||||
update for mitigation.
|
||||
- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden
|
||||
Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the
|
||||
host's affected status.
|
||||
- Intel Atom CPUs are not affected by ITS.
|
||||
|
||||
Mitigation
|
||||
----------
|
||||
As only the indirect branches and RETs that have their last byte of instruction
|
||||
in the lower half of the cacheline are vulnerable to ITS, the basic idea behind
|
||||
the mitigation is to not allow indirect branches in the lower half.
|
||||
|
||||
This is achieved by relying on existing retpoline support in the kernel, and in
|
||||
compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly
|
||||
added ITS-safe thunks. These safe thunks consists of indirect branch in the
|
||||
second half of the cacheline. Not all retpoline sites are patched to thunks, if
|
||||
a retpoline site is evaluated to be ITS-safe, it is replaced with an inline
|
||||
indirect branch.
|
||||
|
||||
Dynamic thunks
|
||||
~~~~~~~~~~~~~~
|
||||
From a dynamically allocated pool of safe-thunks, each vulnerable site is
|
||||
replaced with a new thunk, such that they get a unique address. This could
|
||||
improve the branch prediction accuracy. Also, it is a defense-in-depth measure
|
||||
against aliasing.
|
||||
|
||||
Note, for simplicity, indirect branches in eBPF programs are always replaced
|
||||
with a jump to a static thunk in __x86_indirect_its_thunk_array. If required,
|
||||
in future this can be changed to use dynamic thunks.
|
||||
|
||||
All vulnerable RETs are replaced with a static thunk, they do not use dynamic
|
||||
thunks. This is because RETs get their prediction from RSB mostly that does not
|
||||
depend on source address. RETs that underflow RSB may benefit from dynamic
|
||||
thunks. But, RETs significantly outnumber indirect branches, and any benefit
|
||||
from a unique source address could be outweighed by the increased icache
|
||||
footprint and iTLB pressure.
|
||||
|
||||
Retpoline
|
||||
~~~~~~~~~
|
||||
Retpoline sequence also mitigates ITS-unsafe indirect branches. For this
|
||||
reason, when retpoline is enabled, ITS mitigation only relocates the RETs to
|
||||
safe thunks. Unless user requested the RSB-stuffing mitigation.
|
||||
|
||||
RSB Stuffing
|
||||
~~~~~~~~~~~~
|
||||
RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow
|
||||
attacks. And it also mitigates RETs that are vulnerable to ITS.
|
||||
|
||||
Mitigation in guests
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration
|
||||
and Family/Model of the guest. This is because eIBRS feature could be hidden
|
||||
from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which
|
||||
indicates that the guest is running on an unaffected host.
|
||||
|
||||
To prevent guests from unnecessarily deploying the mitigation on unaffected
|
||||
platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When
|
||||
a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit
|
||||
is not set by any hardware, but is **intended for VMMs to synthesize** it for
|
||||
guests as per the host's affected status.
|
||||
|
||||
Mitigation options
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
The ITS mitigation can be controlled using the "indirect_target_selection"
|
||||
kernel parameter. The available options are:
|
||||
|
||||
======== ===================================================================
|
||||
on (default) Deploy the "Aligned branch/return thunks" mitigation.
|
||||
If spectre_v2 mitigation enables retpoline, aligned-thunks are only
|
||||
deployed for the affected RET instructions. Retpoline mitigates
|
||||
indirect branches.
|
||||
|
||||
off Disable ITS mitigation.
|
||||
|
||||
vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation
|
||||
part of ITS. Otherwise, mitigation is not deployed. This option is
|
||||
useful when host userspace is not in the threat model, and only
|
||||
attacks from guest to host are considered.
|
||||
|
||||
stuff Deploy RSB-fill mitigation when retpoline is also deployed.
|
||||
Otherwise, deploy the default mitigation. When retpoline mitigation
|
||||
is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates
|
||||
ITS.
|
||||
|
||||
force Force the ITS bug and deploy the default mitigation.
|
||||
======== ===================================================================
|
||||
|
||||
Sysfs reporting
|
||||
---------------
|
||||
|
||||
The sysfs file showing ITS mitigation status is:
|
||||
|
||||
/sys/devices/system/cpu/vulnerabilities/indirect_target_selection
|
||||
|
||||
Note, microcode mitigation status is not reported in this file.
|
||||
|
||||
The possible values in this file are:
|
||||
|
||||
.. list-table::
|
||||
|
||||
* - Not affected
|
||||
- The processor is not vulnerable.
|
||||
* - Vulnerable
|
||||
- System is vulnerable and no mitigation has been applied.
|
||||
* - Vulnerable, KVM: Not affected
|
||||
- System is vulnerable to intra-mode BTI, but not affected by eIBRS
|
||||
guest/host isolation.
|
||||
* - Mitigation: Aligned branch/return thunks
|
||||
- The mitigation is enabled, affected indirect branches and RETs are
|
||||
relocated to safe thunks.
|
||||
* - Mitigation: Retpolines, Stuffing RSB
|
||||
- The mitigation is enabled using retpoline and RSB stuffing.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files
|
||||
|
||||
.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
|
||||
|
||||
.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list
|
||||
21
Documentation/admin-guide/hw-vuln/old_microcode.rst
Normal file
21
Documentation/admin-guide/hw-vuln/old_microcode.rst
Normal file
@@ -0,0 +1,21 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=============
|
||||
Old Microcode
|
||||
=============
|
||||
|
||||
The kernel keeps a table of released microcode. Systems that had
|
||||
microcode older than this at boot will say "Vulnerable". This means
|
||||
that the system was vulnerable to some known CPU issue. It could be
|
||||
security or functional, the kernel does not know or care.
|
||||
|
||||
You should update the CPU microcode to mitigate any exposure. This is
|
||||
usually accomplished by updating the files in
|
||||
/lib/firmware/intel-ucode/ via normal distribution updates. Intel also
|
||||
distributes these files in a github repo:
|
||||
|
||||
https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files.git
|
||||
|
||||
Just like all the other hardware vulnerabilities, exposure is
|
||||
determined at boot. Runtime microcode updates do not change the status
|
||||
of this vulnerability.
|
||||
@@ -547,6 +547,38 @@ from within add_taint() whenever the value set in this bitmask matches with the
|
||||
bit flag being set by add_taint().
|
||||
This will cause a kdump to occur at the add_taint()->panic() call.
|
||||
|
||||
Write the dump file to encrypted disk volume
|
||||
============================================
|
||||
|
||||
CONFIG_CRASH_DM_CRYPT can be enabled to support saving the dump file to an
|
||||
encrypted disk volume (only x86_64 supported for now). User space can interact
|
||||
with /sys/kernel/config/crash_dm_crypt_keys for setup,
|
||||
|
||||
1. Tell the first kernel what logon keys are needed to unlock the disk volumes,
|
||||
# Add key #1
|
||||
mkdir /sys/kernel/config/crash_dm_crypt_keys/7d26b7b4-e342-4d2d-b660-7426b0996720
|
||||
# Add key #1's description
|
||||
echo cryptsetup:7d26b7b4-e342-4d2d-b660-7426b0996720 > /sys/kernel/config/crash_dm_crypt_keys/description
|
||||
|
||||
# how many keys do we have now?
|
||||
cat /sys/kernel/config/crash_dm_crypt_keys/count
|
||||
1
|
||||
|
||||
# Add key #2 in the same way
|
||||
|
||||
# how many keys do we have now?
|
||||
cat /sys/kernel/config/crash_dm_crypt_keys/count
|
||||
2
|
||||
|
||||
# To support CPU/memory hot-plugging, re-use keys already saved to reserved
|
||||
# memory
|
||||
echo true > /sys/kernel/config/crash_dm_crypt_key/reuse
|
||||
|
||||
2. Load the dump-capture kernel
|
||||
|
||||
3. After the dump-capture kerne get booted, restore the keys to user keyring
|
||||
echo yes > /sys/kernel/crash_dm_crypt_keys/restore
|
||||
|
||||
Contact
|
||||
=======
|
||||
|
||||
|
||||
@@ -331,8 +331,8 @@ PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|P
|
||||
Page attributes. These flags are used to filter various unnecessary for
|
||||
dumping pages.
|
||||
|
||||
PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)
|
||||
-----------------------------------------------------------------------------
|
||||
PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_unaccepted)
|
||||
-------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
More page attributes. These flags are used to filter various unnecessary for
|
||||
dumping pages.
|
||||
|
||||
@@ -458,6 +458,9 @@
|
||||
arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
|
||||
Set instructions support
|
||||
|
||||
arm64.nompam [ARM64] Unconditionally disable Memory Partitioning And
|
||||
Monitoring support
|
||||
|
||||
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
|
||||
support
|
||||
|
||||
@@ -1828,6 +1831,13 @@
|
||||
lz4: Select LZ4 compression algorithm to
|
||||
compress/decompress hibernation image.
|
||||
|
||||
hibernate.pm_test_delay=
|
||||
[HIBERNATION]
|
||||
Sets the number of seconds to remain in a hibernation test
|
||||
mode before resuming the system (see
|
||||
/sys/power/pm_test). Only available when CONFIG_PM_DEBUG
|
||||
is set. Default value is 5.
|
||||
|
||||
highmem=nn[KMG] [KNL,BOOT,EARLY] forces the highmem zone to have an exact
|
||||
size of <nn>. This works even on boxes that have no
|
||||
highmem otherwise. This also works to reduce highmem
|
||||
@@ -2202,6 +2212,23 @@
|
||||
different crypto accelerators. This option can be used
|
||||
to achieve best performance for particular HW.
|
||||
|
||||
indirect_target_selection= [X86,Intel] Mitigation control for Indirect
|
||||
Target Selection(ITS) bug in Intel CPUs. Updated
|
||||
microcode is also required for a fix in IBPB.
|
||||
|
||||
on: Enable mitigation (default).
|
||||
off: Disable mitigation.
|
||||
force: Force the ITS bug and deploy default
|
||||
mitigation.
|
||||
vmexit: Only deploy mitigation if CPU is affected by
|
||||
guest/host isolation part of ITS.
|
||||
stuff: Deploy RSB-fill mitigation when retpoline is
|
||||
also deployed. Otherwise, deploy the default
|
||||
mitigation.
|
||||
|
||||
For details see:
|
||||
Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
|
||||
|
||||
init= [KNL]
|
||||
Format: <full_path>
|
||||
Run specified binary instead of /sbin/init as init
|
||||
@@ -2725,6 +2752,31 @@
|
||||
kgdbwait [KGDB,EARLY] Stop kernel execution and enter the
|
||||
kernel debugger at the earliest opportunity.
|
||||
|
||||
kho= [KEXEC,EARLY]
|
||||
Format: { "0" | "1" | "off" | "on" | "y" | "n" }
|
||||
Enables or disables Kexec HandOver.
|
||||
"0" | "off" | "n" - kexec handover is disabled
|
||||
"1" | "on" | "y" - kexec handover is enabled
|
||||
|
||||
kho_scratch= [KEXEC,EARLY]
|
||||
Format: ll[KMG],mm[KMG],nn[KMG] | nn%
|
||||
Defines the size of the KHO scratch region. The KHO
|
||||
scratch regions are physically contiguous memory
|
||||
ranges that can only be used for non-kernel
|
||||
allocations. That way, even when memory is heavily
|
||||
fragmented with handed over memory, the kexeced
|
||||
kernel will always have enough contiguous ranges to
|
||||
bootstrap itself.
|
||||
|
||||
It is possible to specify the exact amount of
|
||||
memory in the form of "ll[KMG],mm[KMG],nn[KMG]"
|
||||
where the first parameter defines the size of a low
|
||||
memory scratch area, the second parameter defines
|
||||
the size of a global scratch area and the third
|
||||
parameter defines the size of additional per-node
|
||||
scratch areas. The form "nn%" defines scale factor
|
||||
(in percents) of memory that was used during boot.
|
||||
|
||||
kmac= [MIPS] Korina ethernet MAC address.
|
||||
Configure the RouterBoard 532 series on-chip
|
||||
Ethernet adapter MAC address.
|
||||
@@ -3693,6 +3745,7 @@
|
||||
expose users to several CPU vulnerabilities.
|
||||
Equivalent to: if nokaslr then kpti=0 [ARM64]
|
||||
gather_data_sampling=off [X86]
|
||||
indirect_target_selection=off [X86]
|
||||
kvm.nx_huge_pages=off [X86]
|
||||
l1tf=off [X86]
|
||||
mds=off [X86]
|
||||
@@ -5654,6 +5707,31 @@
|
||||
are zero, rcutorture acts as if is interpreted
|
||||
they are all non-zero.
|
||||
|
||||
rcutorture.gpwrap_lag= [KNL]
|
||||
Enable grace-period wrap lag testing. Setting
|
||||
to false prevents the gpwrap lag test from
|
||||
running. Default is true.
|
||||
|
||||
rcutorture.gpwrap_lag_gps= [KNL]
|
||||
Set the value for grace-period wrap lag during
|
||||
active lag testing periods. This controls how many
|
||||
grace periods differences we tolerate between
|
||||
rdp and rnp's gp_seq before setting overflow flag.
|
||||
The default is always set to 8.
|
||||
|
||||
rcutorture.gpwrap_lag_cycle_mins= [KNL]
|
||||
Set the total cycle duration for gpwrap lag
|
||||
testing in minutes. This is the total time for
|
||||
one complete cycle of active and inactive
|
||||
testing periods. Default is 30 minutes.
|
||||
|
||||
rcutorture.gpwrap_lag_active_mins= [KNL]
|
||||
Set the duration for which gpwrap lag is active
|
||||
within each cycle, in minutes. During this time,
|
||||
the grace-period wrap lag will be set to the
|
||||
value specified by gpwrap_lag_gps. Default is
|
||||
5 minutes.
|
||||
|
||||
rcutorture.irqreader= [KNL]
|
||||
Run RCU readers from irq handlers, or, more
|
||||
accurately, from a timer handler. Not all RCU
|
||||
@@ -6250,7 +6328,7 @@
|
||||
port and the regular usb controller gets disabled.
|
||||
|
||||
root= [KNL] Root filesystem
|
||||
Usually this a a block device specifier of some kind,
|
||||
Usually this is a block device specifier of some kind,
|
||||
see the early_lookup_bdev comment in
|
||||
block/early-lookup.c for details.
|
||||
Alternatively this can be "ram" for the legacy initial
|
||||
@@ -6277,6 +6355,11 @@
|
||||
Memory area to be used by remote processor image,
|
||||
managed by CMA.
|
||||
|
||||
rt_group_sched= [KNL] Enable or disable SCHED_RR/FIFO group scheduling
|
||||
when CONFIG_RT_GROUP_SCHED=y. Defaults to
|
||||
!CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
|
||||
Format: <bool>
|
||||
|
||||
rw [KNL] Mount root device read-write on boot
|
||||
|
||||
S [KNL] Run init in single mode
|
||||
|
||||
127
Documentation/admin-guide/laptops/alienware-wmi.rst
Normal file
127
Documentation/admin-guide/laptops/alienware-wmi.rst
Normal file
@@ -0,0 +1,127 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
====================
|
||||
Alienware WMI Driver
|
||||
====================
|
||||
|
||||
Kurt Borja <kuurtb@gmail.com>
|
||||
|
||||
This is a driver for the "WMAX" WMI device, which is found in most Dell gaming
|
||||
laptops and controls various special features.
|
||||
|
||||
Before the launch of M-Series laptops (~2018), the "WMAX" device controlled
|
||||
basic RGB lighting, deep sleep mode, HDMI mode and amplifier status.
|
||||
|
||||
Later, this device was completely repurpused. Now it mostly deals with thermal
|
||||
profiles, sensor monitoring and overclocking. This interface is named "AWCC" and
|
||||
is known to be used by the AWCC OEM application to control these features.
|
||||
|
||||
The alienware-wmi driver controls both interfaces.
|
||||
|
||||
AWCC Interface
|
||||
==============
|
||||
|
||||
WMI device documentation: Documentation/wmi/devices/alienware-wmi.rst
|
||||
|
||||
Supported devices
|
||||
-----------------
|
||||
|
||||
- Alienware M-Series laptops
|
||||
- Alienware X-Series laptops
|
||||
- Alienware Aurora Desktops
|
||||
- Dell G-Series laptops
|
||||
|
||||
If you believe your device supports the AWCC interface and you don't have any of
|
||||
the features described in this document, try the following alienware-wmi module
|
||||
parameters:
|
||||
|
||||
- ``force_platform_profile=1``: Forces probing for platform profile support
|
||||
- ``force_hwmon=1``: Forces probing for HWMON support
|
||||
|
||||
If the module loads successfully with these parameters, consider submitting a
|
||||
patch adding your model to the ``awcc_dmi_table`` located in
|
||||
``drivers/platform/x86/dell/alienware-wmi-wmax.c`` or contacting the maintainer
|
||||
for further guidance.
|
||||
|
||||
Status
|
||||
------
|
||||
|
||||
The following features are currently supported:
|
||||
|
||||
- :ref:`Platform Profile <platform-profile>`:
|
||||
|
||||
- Thermal profile control
|
||||
|
||||
- G-Mode toggling
|
||||
|
||||
- :ref:`HWMON <hwmon>`:
|
||||
|
||||
- Sensor monitoring
|
||||
|
||||
- Manual fan control
|
||||
|
||||
.. _platform-profile:
|
||||
|
||||
Platform Profile
|
||||
----------------
|
||||
|
||||
The AWCC interface exposes various firmware defined thermal profiles. These are
|
||||
exposed to user-space through the Platform Profile class interface. Refer to
|
||||
:ref:`sysfs-class-platform-profile <abi_file_testing_sysfs_class_platform_profile>`
|
||||
for more information.
|
||||
|
||||
The name of the platform-profile class device exported by this driver is
|
||||
"alienware-wmi" and it's path can be found with:
|
||||
|
||||
::
|
||||
|
||||
grep -l "alienware-wmi" /sys/class/platform-profile/platform-profile-*/name | sed 's|/[^/]*$||'
|
||||
|
||||
If the device supports G-Mode, it is also toggled when selecting the
|
||||
``performance`` profile.
|
||||
|
||||
.. note::
|
||||
You may set the ``force_gmode`` module parameter to always try to toggle this
|
||||
feature, without checking if your model supports it.
|
||||
|
||||
.. _hwmon:
|
||||
|
||||
HWMON
|
||||
-----
|
||||
|
||||
The AWCC interface also supports sensor monitoring and manual fan control. Both
|
||||
of these features are exposed to user-space through the HWMON interface.
|
||||
|
||||
The name of the hwmon class device exported by this driver is "alienware_wmi"
|
||||
and it's path can be found with:
|
||||
|
||||
::
|
||||
|
||||
grep -l "alienware_wmi" /sys/class/hwmon/hwmon*/name | sed 's|/[^/]*$||'
|
||||
|
||||
Sensor monitoring is done through the standard HWMON interface. Refer to
|
||||
:ref:`sysfs-class-hwmon <abi_file_testing_sysfs_class_hwmon>` for more
|
||||
information.
|
||||
|
||||
Manual fan control on the other hand, is not exposed directly by the AWCC
|
||||
interface. Instead it let's us control a fan `boost` value. This `boost` value
|
||||
has the following aproximate behavior over the fan pwm:
|
||||
|
||||
::
|
||||
|
||||
pwm = pwm_base + (fan_boost / 255) * (pwm_max - pwm_base)
|
||||
|
||||
Due to the above behavior, the fan `boost` control is exposed to user-space
|
||||
through the following, custom hwmon sysfs attribute:
|
||||
|
||||
=============================== ======= =======================================
|
||||
Name Perm Description
|
||||
=============================== ======= =======================================
|
||||
fan[1-4]_boost RW Fan boost value.
|
||||
|
||||
Integer value between 0 and 255
|
||||
=============================== ======= =======================================
|
||||
|
||||
.. note::
|
||||
In some devices, manual fan control only works reliably if the ``custom``
|
||||
platform profile is selected.
|
||||
@@ -7,6 +7,7 @@ Laptop Drivers
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
alienware-wmi
|
||||
asus-laptop
|
||||
disk-shock-protection
|
||||
laptop-mode
|
||||
|
||||
26
Documentation/admin-guide/media/c3-isp.dot
Normal file
26
Documentation/admin-guide/media/c3-isp.dot
Normal file
@@ -0,0 +1,26 @@
|
||||
digraph board {
|
||||
rankdir=TB
|
||||
n00000001 [label="{{<port0> 0 | <port1> 1} | c3-isp-core\n/dev/v4l-subdev0 | {<port2> 2 | <port3> 3 | <port4> 4 | <port5> 5}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n00000001:port3 -> n00000008:port0
|
||||
n00000001:port4 -> n0000000b:port0
|
||||
n00000001:port5 -> n0000000e:port0
|
||||
n00000001:port2 -> n00000027
|
||||
n00000008 [label="{{<port0> 0} | c3-isp-resizer0\n/dev/v4l-subdev1 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n00000008:port1 -> n00000016 [style=bold]
|
||||
n0000000b [label="{{<port0> 0} | c3-isp-resizer1\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n0000000b:port1 -> n0000001a [style=bold]
|
||||
n0000000e [label="{{<port0> 0} | c3-isp-resizer2\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n0000000e:port1 -> n00000023 [style=bold]
|
||||
n00000011 [label="{{<port0> 0} | c3-mipi-adapter\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n00000011:port1 -> n00000001:port0 [style=bold]
|
||||
n00000016 [label="c3-isp-cap0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
|
||||
n0000001a [label="c3-isp-cap1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
|
||||
n0000001e [label="{{<port0> 0} | c3-mipi-csi2\n/dev/v4l-subdev5 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n0000001e:port1 -> n00000011:port0 [style=bold]
|
||||
n00000023 [label="c3-isp-cap2\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
|
||||
n00000027 [label="c3-isp-stats\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
|
||||
n0000002b [label="c3-isp-params\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
|
||||
n0000002b -> n00000001:port1
|
||||
n0000003f [label="{{} | imx290 2-001a\n/dev/v4l-subdev6 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||
n0000003f:port0 -> n0000001e:port0 [style=bold]
|
||||
}
|
||||
101
Documentation/admin-guide/media/c3-isp.rst
Normal file
101
Documentation/admin-guide/media/c3-isp.rst
Normal file
@@ -0,0 +1,101 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0-only OR MIT)
|
||||
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=================================================
|
||||
Amlogic C3 Image Signal Processing (C3ISP) driver
|
||||
=================================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This file documents the Amlogic C3ISP driver located under
|
||||
drivers/media/platform/amlogic/c3/isp.
|
||||
|
||||
The current version of the driver supports the C3ISP found on
|
||||
Amlogic C308L processor.
|
||||
|
||||
The driver implements V4L2, Media controller and V4L2 subdev interfaces.
|
||||
Camera sensor using V4L2 subdev interface in the kernel is supported.
|
||||
|
||||
The driver has been tested on AW419-C308L-Socket platform.
|
||||
|
||||
Amlogic C3 ISP
|
||||
==============
|
||||
|
||||
The Camera hardware found on C308L processors and supported by
|
||||
the driver consists of:
|
||||
|
||||
- 1 MIPI-CSI-2 module: handles the physical layer of the MIPI CSI-2 receiver and
|
||||
receives data from the connected camera sensor.
|
||||
- 1 MIPI-ADAPTER module: organizes MIPI data to meet ISP input requirements and
|
||||
send MIPI data to ISP.
|
||||
- 1 ISP (Image Signal Processing) module: contains a pipeline of image processing
|
||||
hardware blocks. The ISP pipeline contains three resizers at the end each of
|
||||
them connected to a DMA interface which writes the output data to memory.
|
||||
|
||||
A high-level functional view of the C3 ISP is presented below.::
|
||||
|
||||
+----------+ +-------+
|
||||
| Resizer |--->| WRMIF |
|
||||
+---------+ +------------+ +--------------+ +-------+ |----------+ +-------+
|
||||
| Sensor |--->| MIPI CSI-2 |--->| MIPI ADAPTER |--->| ISP |---|----------+ +-------+
|
||||
+---------+ +------------+ +--------------+ +-------+ | Resizer |--->| WRMIF |
|
||||
+----------+ +-------+
|
||||
|----------+ +-------+
|
||||
| Resizer |--->| WRMIF |
|
||||
+----------+ +-------+
|
||||
|
||||
Driver architecture and design
|
||||
==============================
|
||||
|
||||
With the goal to model the hardware links between the modules and to expose a
|
||||
clean, logical and usable interface, the driver registers the following V4L2
|
||||
sub-devices:
|
||||
|
||||
- 1 `c3-mipi-csi2` sub-device - the MIPI CSI-2 receiver
|
||||
- 1 `c3-mipi-adapter` sub-device - the MIPI adapter
|
||||
- 1 `c3-isp-core` sub-device - the ISP core
|
||||
- 3 `c3-isp-resizer` sub-devices - the ISP resizers
|
||||
|
||||
The `c3-isp-core` sub-device is linked to 2 video device nodes for statistics
|
||||
capture and parameters programming:
|
||||
|
||||
- the `c3-isp-stats` capture video device node for statistics capture
|
||||
- the `c3-isp-params` output video device for parameters programming
|
||||
|
||||
Each `c3-isp-resizer` sub-device is linked to a capture video device node where
|
||||
frames are captured from:
|
||||
|
||||
- `c3-isp-resizer0` is linked to the `c3-isp-cap0` capture video device
|
||||
- `c3-isp-resizer1` is linked to the `c3-isp-cap1` capture video device
|
||||
- `c3-isp-resizer2` is linked to the `c3-isp-cap2` capture video device
|
||||
|
||||
The media controller pipeline graph is as follows (with connected a
|
||||
IMX290 camera sensor):
|
||||
|
||||
.. _isp_topology_graph:
|
||||
|
||||
.. kernel-figure:: c3-isp.dot
|
||||
:alt: c3-isp.dot
|
||||
:align: center
|
||||
|
||||
Media pipeline topology
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
Runtime configuration of the ISP hardware is performed on the `c3-isp-params`
|
||||
video device node using the :ref:`V4L2_META_FMT_C3ISP_PARAMS
|
||||
<v4l2-meta-fmt-c3isp-params>` as data format. The buffer structure is defined by
|
||||
:c:type:`c3_isp_params_cfg`.
|
||||
|
||||
Statistics are captured from the `c3-isp-stats` video device node using the
|
||||
:ref:`V4L2_META_FMT_C3ISP_STATS <v4l2-meta-fmt-c3isp-stats>` data format.
|
||||
|
||||
The final picture size and format is configured using the V4L2 video
|
||||
capture interface on the `c3-isp-cap[0, 2]` video device nodes.
|
||||
|
||||
The Amlogic C3 ISP is supported by `libcamera <https://libcamera.org>`_ with a
|
||||
dedicated pipeline handler and algorithms that perform run-time image correction
|
||||
and enhancement.
|
||||
@@ -1,8 +1,17 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
The mgb4 driver
|
||||
===============
|
||||
|
||||
Copyright |copy| 2023 - 2025 Digiteq Automotive
|
||||
author: Martin Tůma <martin.tuma@digiteqautomotive.com>
|
||||
|
||||
This is a v4l2 device driver for the Digiteq Automotive FrameGrabber 4, a PCIe
|
||||
card capable of capturing and generating FPD-Link III and GMSL2/3 video streams
|
||||
as used in the automotive industry.
|
||||
|
||||
sysfs interface
|
||||
---------------
|
||||
|
||||
|
||||
@@ -86,7 +86,6 @@ saa7134 Philips SAA7134
|
||||
saa7164 NXP SAA7164
|
||||
smipcie SMI PCIe DVBSky cards
|
||||
solo6x10 Bluecherry / Softlogic 6x10 capture cards (MPEG-4/H.264)
|
||||
sta2x11_vip STA2X11 VIP Video For Linux
|
||||
tw5864 Techwell TW5864 video/audio grabber and encoder
|
||||
tw686x Intersil/Techwell TW686x
|
||||
tw68 Techwell tw68x Video For Linux
|
||||
|
||||
@@ -10,6 +10,7 @@ Video4Linux (V4L) driver-specific documentation
|
||||
:maxdepth: 2
|
||||
|
||||
bttv
|
||||
c3-isp
|
||||
cafe_ccic
|
||||
cx88
|
||||
fimc
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==========================
|
||||
DAMON: Data Access MONitor
|
||||
==========================
|
||||
================================================================
|
||||
DAMON: Data Access MONitoring and Access-aware System Operations
|
||||
================================================================
|
||||
|
||||
:doc:`DAMON </mm/damon/index>` allows light-weight data access monitoring.
|
||||
Using DAMON, users can analyze the memory access patterns of their systems and
|
||||
optimize those.
|
||||
:doc:`DAMON </mm/damon/index>` is a Linux kernel subsystem for efficient data
|
||||
access monitoring and access-aware system operations.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -81,7 +81,7 @@ comma (",").
|
||||
│ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms,effective_bytes
|
||||
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||
│ │ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
|
||||
│ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value
|
||||
│ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value,nid
|
||||
│ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
|
||||
│ │ │ │ │ │ │ :ref:`{core_,ops_,}filters <sysfs_filters>`/nr_filters
|
||||
│ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max
|
||||
@@ -390,11 +390,11 @@ number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each goal and current achievement.
|
||||
Among the multiple feedback, the best one is used.
|
||||
|
||||
Each goal directory contains three files, namely ``target_metric``,
|
||||
``target_value`` and ``current_value``. Users can set and get the three
|
||||
parameters for the quota auto-tuning goals that specified on the :ref:`design
|
||||
doc <damon_design_damos_quotas_auto_tuning>` by writing to and reading from each
|
||||
of the files. Note that users should further write
|
||||
Each goal directory contains four files, namely ``target_metric``,
|
||||
``target_value``, ``current_value`` and ``nid``. Users can set and get the
|
||||
four parameters for the quota auto-tuning goals that specified on the
|
||||
:ref:`design doc <damon_design_damos_quotas_auto_tuning>` by writing to and
|
||||
reading from each of the files. Note that users should further write
|
||||
``commit_schemes_quota_goals`` to the ``state`` file of the :ref:`kdamond
|
||||
directory <sysfs_kdamond>` to pass the feedback to DAMON.
|
||||
|
||||
|
||||
@@ -42,3 +42,4 @@ the Linux memory management.
|
||||
transhuge
|
||||
userfaultfd
|
||||
zswap
|
||||
kho
|
||||
|
||||
115
Documentation/admin-guide/mm/kho.rst
Normal file
115
Documentation/admin-guide/mm/kho.rst
Normal file
@@ -0,0 +1,115 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
====================
|
||||
Kexec Handover Usage
|
||||
====================
|
||||
|
||||
Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
|
||||
regions, which could contain serialized system states, across kexec.
|
||||
|
||||
This document expects that you are familiar with the base KHO
|
||||
:ref:`concepts <kho-concepts>`. If you have not read
|
||||
them yet, please do so now.
|
||||
|
||||
Prerequisites
|
||||
=============
|
||||
|
||||
KHO is available when the kernel is compiled with ``CONFIG_KEXEC_HANDOVER``
|
||||
set to y. Every KHO producer may have its own config option that you
|
||||
need to enable if you would like to preserve their respective state across
|
||||
kexec.
|
||||
|
||||
To use KHO, please boot the kernel with the ``kho=on`` command line
|
||||
parameter. You may use ``kho_scratch`` parameter to define size of the
|
||||
scratch regions. For example ``kho_scratch=16M,512M,256M`` will reserve a
|
||||
16 MiB low memory scratch area, a 512 MiB global scratch region, and 256 MiB
|
||||
per NUMA node scratch regions on boot.
|
||||
|
||||
Perform a KHO kexec
|
||||
===================
|
||||
|
||||
First, before you perform a KHO kexec, you need to move the system into
|
||||
the :ref:`KHO finalization phase <kho-finalization-phase>` ::
|
||||
|
||||
$ echo 1 > /sys/kernel/debug/kho/out/finalize
|
||||
|
||||
After this command, the KHO FDT is available in
|
||||
``/sys/kernel/debug/kho/out/fdt``. Other subsystems may also register
|
||||
their own preserved sub FDTs under
|
||||
``/sys/kernel/debug/kho/out/sub_fdts/``.
|
||||
|
||||
Next, load the target payload and kexec into it. It is important that you
|
||||
use the ``-s`` parameter to use the in-kernel kexec file loader, as user
|
||||
space kexec tooling currently has no support for KHO with the user space
|
||||
based file loader ::
|
||||
|
||||
# kexec -l /path/to/bzImage --initrd /path/to/initrd -s
|
||||
# kexec -e
|
||||
|
||||
The new kernel will boot up and contain some of the previous kernel's state.
|
||||
|
||||
For example, if you used ``reserve_mem`` command line parameter to create
|
||||
an early memory reservation, the new kernel will have that memory at the
|
||||
same physical address as the old kernel.
|
||||
|
||||
Abort a KHO exec
|
||||
================
|
||||
|
||||
You can move the system out of KHO finalization phase again by calling ::
|
||||
|
||||
$ echo 0 > /sys/kernel/debug/kho/out/active
|
||||
|
||||
After this command, the KHO FDT is no longer available in
|
||||
``/sys/kernel/debug/kho/out/fdt``.
|
||||
|
||||
debugfs Interfaces
|
||||
==================
|
||||
|
||||
Currently KHO creates the following debugfs interfaces. Notice that these
|
||||
interfaces may change in the future. They will be moved to sysfs once KHO is
|
||||
stabilized.
|
||||
|
||||
``/sys/kernel/debug/kho/out/finalize``
|
||||
Kexec HandOver (KHO) allows Linux to transition the state of
|
||||
compatible drivers into the next kexec'ed kernel. To do so,
|
||||
device drivers will instruct KHO to preserve memory regions,
|
||||
which could contain serialized kernel state.
|
||||
While the state is serialized, they are unable to perform
|
||||
any modifications to state that was serialized, such as
|
||||
handed over memory allocations.
|
||||
|
||||
When this file contains "1", the system is in the transition
|
||||
state. When contains "0", it is not. To switch between the
|
||||
two states, echo the respective number into this file.
|
||||
|
||||
``/sys/kernel/debug/kho/out/fdt``
|
||||
When KHO state tree is finalized, the kernel exposes the
|
||||
flattened device tree blob that carries its current KHO
|
||||
state in this file. Kexec user space tooling can use this
|
||||
as input file for the KHO payload image.
|
||||
|
||||
``/sys/kernel/debug/kho/out/scratch_len``
|
||||
Lengths of KHO scratch regions, which are physically contiguous
|
||||
memory regions that will always stay available for future kexec
|
||||
allocations. Kexec user space tools can use this file to determine
|
||||
where it should place its payload images.
|
||||
|
||||
``/sys/kernel/debug/kho/out/scratch_phys``
|
||||
Physical locations of KHO scratch regions. Kexec user space tools
|
||||
can use this file in conjunction to scratch_phys to determine where
|
||||
it should place its payload images.
|
||||
|
||||
``/sys/kernel/debug/kho/out/sub_fdts/``
|
||||
In the KHO finalization phase, KHO producers register their own
|
||||
FDT blob under this directory.
|
||||
|
||||
``/sys/kernel/debug/kho/in/fdt``
|
||||
When the kernel was booted with Kexec HandOver (KHO),
|
||||
the state tree that carries metadata about the previous
|
||||
kernel's state is in this file in the format of flattened
|
||||
device tree. This file may disappear when all consumers of
|
||||
it finished to interpret their metadata.
|
||||
|
||||
``/sys/kernel/debug/kho/in/sub_fdts/``
|
||||
Similar to ``kho/out/sub_fdts/``, but contains sub FDT blobs
|
||||
of KHO producers passed from the old kernel.
|
||||
@@ -151,8 +151,9 @@ generations less than or equal to ``min_gen_nr``.
|
||||
``min_gen_nr`` should be less than ``max_gen_nr-1``, since
|
||||
``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
|
||||
the active list) and therefore cannot be evicted. ``swappiness``
|
||||
overrides the default value in ``/proc/sys/vm/swappiness``.
|
||||
``nr_to_reclaim`` limits the number of pages to evict.
|
||||
overrides the default value in ``/proc/sys/vm/swappiness`` and the valid
|
||||
range is [0-200, max], with max being exclusively used for the reclamation
|
||||
of anonymous memory. ``nr_to_reclaim`` limits the number of pages to evict.
|
||||
|
||||
A typical use case is that a job scheduler runs this command before it
|
||||
tries to land a new job on a server. If it fails to materialize enough
|
||||
|
||||
@@ -250,6 +250,7 @@ Following flags about pages are currently supported:
|
||||
- ``PAGE_IS_PFNZERO`` - Page has zero PFN
|
||||
- ``PAGE_IS_HUGE`` - Page is PMD-mapped THP or Hugetlb backed
|
||||
- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
|
||||
- ``PAGE_IS_GUARD`` - Page is a part of a guard region
|
||||
|
||||
The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
|
||||
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
===========================
|
||||
Namespaces research control
|
||||
===========================
|
||||
====================================
|
||||
User namespaces and resource control
|
||||
====================================
|
||||
|
||||
There are a lot of kinds of objects in the kernel that don't have
|
||||
individual limits or that have limits that are ineffective when a set
|
||||
of processes is allowed to switch user ids. With user namespaces
|
||||
enabled in a kernel for people who don't trust their users or their
|
||||
users programs to play nice this problems becomes more acute.
|
||||
The kernel contains many kinds of objects that either don't have
|
||||
individual limits or that have limits which are ineffective when
|
||||
a set of processes is allowed to switch their UID. On a system
|
||||
where the admins don't trust their users or their users' programs,
|
||||
user namespaces expose the system to potential misuse of resources.
|
||||
|
||||
Therefore it is recommended that memory control groups be enabled in
|
||||
kernels that enable user namespaces, and it is further recommended
|
||||
that userspace configure memory control groups to limit how much
|
||||
memory user's they don't trust to play nice can use.
|
||||
In order to mitigate this, we recommend that admins enable memory
|
||||
control groups on any system that enables user namespaces.
|
||||
Furthermore, we recommend that admins configure the memory control
|
||||
groups to limit the maximum memory usable by any untrusted user.
|
||||
|
||||
Memory control groups can be configured by installing the libcgroup
|
||||
package present on most distros editing /etc/cgrules.conf,
|
||||
|
||||
@@ -231,7 +231,7 @@ are the following:
|
||||
present).
|
||||
|
||||
The existence of the limit may be a result of some (often unintentional)
|
||||
BIOS settings, restrictions coming from a service processor or another
|
||||
BIOS settings, restrictions coming from a service processor or other
|
||||
BIOS/HW-based mechanisms.
|
||||
|
||||
This does not cover ACPI thermal limitations which can be discovered
|
||||
@@ -258,8 +258,8 @@ are the following:
|
||||
extension on ARM). If one cannot be determined, this attribute should
|
||||
not be present.
|
||||
|
||||
Note, that failed attempt to retrieve current frequency for a given
|
||||
CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that
|
||||
Note that failed attempt to retrieve current frequency for a given
|
||||
CPU(s) will result in an appropriate error, i.e.: EAGAIN for CPU that
|
||||
remains idle (raised on ARM).
|
||||
|
||||
``cpuinfo_max_freq``
|
||||
@@ -499,7 +499,7 @@ This governor exposes the following tunables:
|
||||
represented by it to be 1.5 times as high as the transition latency
|
||||
(the default)::
|
||||
|
||||
# echo `$(($(cat cpuinfo_transition_latency) * 3 / 2)) > ondemand/sampling_rate
|
||||
# echo `$(($(cat cpuinfo_transition_latency) * 3 / 2))` > ondemand/sampling_rate
|
||||
|
||||
``up_threshold``
|
||||
If the estimated CPU load is above this value (in percent), the governor
|
||||
|
||||
@@ -38,6 +38,27 @@ instruction at all.
|
||||
only way to pass early-configuration-time parameters to it is via the kernel
|
||||
command line.
|
||||
|
||||
Sysfs Interface
|
||||
===============
|
||||
|
||||
The ``intel_idle`` driver exposes the following ``sysfs`` attributes in
|
||||
``/sys/devices/system/cpu/cpuidle/``:
|
||||
|
||||
``intel_c1_demotion``
|
||||
Enable or disable C1 demotion for all CPUs in the system. This file is
|
||||
only exposed on platforms that support the C1 demotion feature and where
|
||||
it was tested. Value 0 means that C1 demotion is disabled, value 1 means
|
||||
that it is enabled. Write 0 or 1 to disable or enable C1 demotion for
|
||||
all CPUs.
|
||||
|
||||
The C1 demotion feature involves the platform firmware demoting deep
|
||||
C-state requests from the OS (e.g., C6 requests) to C1. The idea is that
|
||||
firmware monitors CPU wake-up rate, and if it is higher than a
|
||||
platform-specific threshold, the firmware demotes deep C-state requests
|
||||
to C1. For example, Linux requests C6, but firmware noticed too many
|
||||
wake-ups per second, and it keeps the CPU in C1. When the CPU stays in
|
||||
C1 long enough, the platform promotes it back to C6. This may improve
|
||||
some workloads' performance, but it may also increase power consumption.
|
||||
|
||||
.. _intel-idle-enumeration-of-states:
|
||||
|
||||
|
||||
@@ -329,6 +329,106 @@ information listed above is the same for all of the processors supporting the
|
||||
HWP feature, which is why ``intel_pstate`` works with all of them.]
|
||||
|
||||
|
||||
Support for Hybrid Processors
|
||||
=============================
|
||||
|
||||
Some processors supported by ``intel_pstate`` contain two or more types of CPU
|
||||
cores differing by the maximum turbo P-state, performance vs power characteristics,
|
||||
cache sizes, and possibly other properties. They are commonly referred to as
|
||||
hybrid processors. To support them, ``intel_pstate`` requires HWP to be enabled
|
||||
and it assumes the HWP performance units to be the same for all CPUs in the
|
||||
system, so a given HWP performance level always represents approximately the
|
||||
same physical performance regardless of the core (CPU) type.
|
||||
|
||||
Hybrid Processors with SMT
|
||||
--------------------------
|
||||
|
||||
On systems where SMT (Simultaneous Multithreading), also referred to as
|
||||
HyperThreading (HT) in the context of Intel processors, is enabled on at least
|
||||
one core, ``intel_pstate`` assigns performance-based priorities to CPUs. Namely,
|
||||
the priority of a given CPU reflects its highest HWP performance level which
|
||||
causes the CPU scheduler to generally prefer more performant CPUs, so the less
|
||||
performant CPUs are used when the other ones are fully loaded. However, SMT
|
||||
siblings (that is, logical CPUs sharing one physical core) are treated in a
|
||||
special way such that if one of them is in use, the effective priority of the
|
||||
other ones is lowered below the priorities of the CPUs located in the other
|
||||
physical cores.
|
||||
|
||||
This approach maximizes performance in the majority of cases, but unfortunately
|
||||
it also leads to excessive energy usage in some important scenarios, like video
|
||||
playback, which is not generally desirable. While there is no other viable
|
||||
choice with SMT enabled because the effective capacity and utilization of SMT
|
||||
siblings are hard to determine, hybrid processors without SMT can be handled in
|
||||
more energy-efficient ways.
|
||||
|
||||
.. _CAS:
|
||||
|
||||
Capacity-Aware Scheduling Support
|
||||
---------------------------------
|
||||
|
||||
The capacity-aware scheduling (CAS) support in the CPU scheduler is enabled by
|
||||
``intel_pstate`` by default on hybrid processors without SMT. CAS generally
|
||||
causes the scheduler to put tasks on a CPU so long as there is a sufficient
|
||||
amount of spare capacity on it, and if the utilization of a given task is too
|
||||
high for it, the task will need to go somewhere else.
|
||||
|
||||
Since CAS takes CPU capacities into account, it does not require CPU
|
||||
prioritization and it allows tasks to be distributed more symmetrically among
|
||||
the more performant and less performant CPUs. Once placed on a CPU with enough
|
||||
capacity to accommodate it, a task may just continue to run there regardless of
|
||||
whether or not the other CPUs are fully loaded, so on average CAS reduces the
|
||||
utilization of the more performant CPUs which causes the energy usage to be more
|
||||
balanced because the more performant CPUs are generally less energy-efficient
|
||||
than the less performant ones.
|
||||
|
||||
In order to use CAS, the scheduler needs to know the capacity of each CPU in
|
||||
the system and it needs to be able to compute scale-invariant utilization of
|
||||
CPUs, so ``intel_pstate`` provides it with the requisite information.
|
||||
|
||||
First of all, the capacity of each CPU is represented by the ratio of its highest
|
||||
HWP performance level, multiplied by 1024, to the highest HWP performance level
|
||||
of the most performant CPU in the system, which works because the HWP performance
|
||||
units are the same for all CPUs. Second, the frequency-invariance computations,
|
||||
carried out by the scheduler to always express CPU utilization in the same units
|
||||
regardless of the frequency it is currently running at, are adjusted to take the
|
||||
CPU capacity into account. All of this happens when ``intel_pstate`` has
|
||||
registered itself with the ``CPUFreq`` core and it has figured out that it is
|
||||
running on a hybrid processor without SMT.
|
||||
|
||||
Energy-Aware Scheduling Support
|
||||
-------------------------------
|
||||
|
||||
If ``CONFIG_ENERGY_MODEL`` has been set during kernel configuration and
|
||||
``intel_pstate`` runs on a hybrid processor without SMT, in addition to enabling
|
||||
`CAS <CAS_>`_ it registers an Energy Model for the processor. This allows the
|
||||
Energy-Aware Scheduling (EAS) support to be enabled in the CPU scheduler if
|
||||
``schedutil`` is used as the ``CPUFreq`` governor which requires ``intel_pstate``
|
||||
to operate in the `passive mode <Passive Mode_>`_.
|
||||
|
||||
The Energy Model registered by ``intel_pstate`` is artificial (that is, it is
|
||||
based on abstract cost values and it does not include any real power numbers)
|
||||
and it is relatively simple to avoid unnecessary computations in the scheduler.
|
||||
There is a performance domain in it for every CPU in the system and the cost
|
||||
values for these performance domains have been chosen so that running a task on
|
||||
a less performant (small) CPU appears to be always cheaper than running that
|
||||
task on a more performant (big) CPU. However, for two CPUs of the same type,
|
||||
the cost difference depends on their current utilization, and the CPU whose
|
||||
current utilization is higher generally appears to be a more expensive
|
||||
destination for a given task. This helps to balance the load among CPUs of the
|
||||
same type.
|
||||
|
||||
Since EAS works on top of CAS, high-utilization tasks are always migrated to
|
||||
CPUs with enough capacity to accommodate them, but thanks to EAS, low-utilization
|
||||
tasks tend to be placed on the CPUs that look less expensive to the scheduler.
|
||||
Effectively, this causes the less performant and less loaded CPUs to be
|
||||
preferred as long as they have enough spare capacity to run the given task
|
||||
which generally leads to reduced energy usage.
|
||||
|
||||
The Energy Model created by ``intel_pstate`` can be inspected by looking at
|
||||
the ``energy_model`` directory in ``debugfs`` (typlically mounted on
|
||||
``/sys/kernel/debug/``).
|
||||
|
||||
|
||||
User Space Interface in ``sysfs``
|
||||
=================================
|
||||
|
||||
@@ -697,8 +797,8 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
|
||||
Limits`_ for details).
|
||||
|
||||
``no_cas``
|
||||
Do not enable capacity-aware scheduling (CAS) which is enabled by
|
||||
default on hybrid systems.
|
||||
Do not enable `capacity-aware scheduling <CAS_>`_ which is enabled by
|
||||
default on hybrid systems without SMT.
|
||||
|
||||
Diagnostics and Tuning
|
||||
======================
|
||||
|
||||
@@ -91,12 +91,22 @@ Attributes in each directory:
|
||||
``domain_id``
|
||||
This attribute is used to get the power domain id of this instance.
|
||||
|
||||
``die_id``
|
||||
This attribute is used to get the Linux die id of this instance.
|
||||
This attribute is only present for domains with core agents and
|
||||
when the CPUID leaf 0x1f presents die ID.
|
||||
|
||||
``fabric_cluster_id``
|
||||
This attribute is used to get the fabric cluster id of this instance.
|
||||
|
||||
``package_id``
|
||||
This attribute is used to get the package id of this instance.
|
||||
|
||||
``agent_types``
|
||||
This attribute displays all the hardware agents present within the
|
||||
domain. Each agent has the capability to control one or more hardware
|
||||
subsystems, which include: core, cache, memory, and I/O.
|
||||
|
||||
The other attributes are same as presented at package_*_die_* level.
|
||||
|
||||
In most of current use cases, the "max_freq_khz" and "min_freq_khz"
|
||||
|
||||
@@ -347,7 +347,7 @@ again.
|
||||
|
||||
[:ref:`details<uninstall>`]
|
||||
|
||||
.. _submit_improvements:
|
||||
.. _submit_improvements_qbtl:
|
||||
|
||||
Did you run into trouble following any of the above steps that is not cleared up
|
||||
by the reference section below? Or do you have ideas how to improve the text?
|
||||
@@ -1070,7 +1070,7 @@ complicated, and harder to follow.
|
||||
|
||||
That being said: this of course is a balancing act. Hence, if you think an
|
||||
additional use-case is worth describing, suggest it to the maintainers of this
|
||||
document, as :ref:`described above <submit_improvements>`.
|
||||
document, as :ref:`described above <submit_improvements_qbtl>`.
|
||||
|
||||
|
||||
..
|
||||
|
||||
@@ -41,7 +41,7 @@ If you are facing multiple issues with the Linux kernel at once, report each
|
||||
separately. While writing your report, include all information relevant to the
|
||||
issue, like the kernel and the distro used. In case of a regression, CC the
|
||||
regressions mailing list (regressions@lists.linux.dev) to your report. Also try
|
||||
to pin-point the culprit with a bisection; if you succeed, include its
|
||||
to pinpoint the culprit with a bisection; if you succeed, include its
|
||||
commit-id and CC everyone in the sign-off-by chain.
|
||||
|
||||
Once the report is out, answer any questions that come up and help where you
|
||||
@@ -206,7 +206,7 @@ Reporting issues only occurring in older kernel version lines
|
||||
This subsection is for you, if you tried the latest mainline kernel as outlined
|
||||
above, but failed to reproduce your issue there; at the same time you want to
|
||||
see the issue fixed in a still supported stable or longterm series or vendor
|
||||
kernels regularly rebased on those. If that the case, follow these steps:
|
||||
kernels regularly rebased on those. If that is the case, follow these steps:
|
||||
|
||||
* Prepare yourself for the possibility that going through the next few steps
|
||||
might not get the issue solved in older releases: the fix might be too big
|
||||
@@ -312,7 +312,7 @@ small modifications to a kernel based on a recent Linux version; that for
|
||||
example often holds true for the mainline kernels shipped by Debian GNU/Linux
|
||||
Sid or Fedora Rawhide. Some developers will also accept reports about issues
|
||||
with kernels from distributions shipping the latest stable kernel, as long as
|
||||
its only slightly modified; that for example is often the case for Arch Linux,
|
||||
it's only slightly modified; that for example is often the case for Arch Linux,
|
||||
regular Fedora releases, and openSUSE Tumbleweed. But keep in mind, you better
|
||||
want to use a mainline Linux and avoid using a stable kernel for this
|
||||
process, as outlined in the section 'Install a fresh kernel for testing' in more
|
||||
|
||||
@@ -75,6 +75,7 @@ Currently, these files are in /proc/sys/vm:
|
||||
- unprivileged_userfaultfd
|
||||
- user_reserve_kbytes
|
||||
- vfs_cache_pressure
|
||||
- vfs_cache_pressure_denom
|
||||
- watermark_boost_factor
|
||||
- watermark_scale_factor
|
||||
- zone_reclaim_mode
|
||||
@@ -131,6 +132,12 @@ to latency spikes in unsuspecting applications. The kernel employs
|
||||
various heuristics to avoid wasting CPU cycles if it detects that
|
||||
proactive compaction is not being effective.
|
||||
|
||||
Setting the value above 80 will, in addition to lowering the acceptable level
|
||||
of fragmentation, make the compaction code more sensitive to increases in
|
||||
fragmentation, i.e. compaction will trigger more often, but reduce
|
||||
fragmentation by a smaller amount.
|
||||
This makes the fragmentation level more stable over time.
|
||||
|
||||
Be careful when setting it to extreme values like 100, as that may
|
||||
cause excessive background compaction activity.
|
||||
|
||||
@@ -1017,19 +1024,28 @@ vfs_cache_pressure
|
||||
This percentage value controls the tendency of the kernel to reclaim
|
||||
the memory which is used for caching of directory and inode objects.
|
||||
|
||||
At the default value of vfs_cache_pressure=100 the kernel will attempt to
|
||||
reclaim dentries and inodes at a "fair" rate with respect to pagecache and
|
||||
swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
|
||||
to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
|
||||
never reclaim dentries and inodes due to memory pressure and this can easily
|
||||
lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
|
||||
causes the kernel to prefer to reclaim dentries and inodes.
|
||||
At the default value of vfs_cache_pressure=vfs_cache_pressure_denom the kernel
|
||||
will attempt to reclaim dentries and inodes at a "fair" rate with respect to
|
||||
pagecache and swapcache reclaim. Decreasing vfs_cache_pressure causes the
|
||||
kernel to prefer to retain dentry and inode caches. When vfs_cache_pressure=0,
|
||||
the kernel will never reclaim dentries and inodes due to memory pressure and
|
||||
this can easily lead to out-of-memory conditions. Increasing vfs_cache_pressure
|
||||
beyond vfs_cache_pressure_denom causes the kernel to prefer to reclaim dentries
|
||||
and inodes.
|
||||
|
||||
Increasing vfs_cache_pressure significantly beyond 100 may have negative
|
||||
performance impact. Reclaim code needs to take various locks to find freeable
|
||||
directory and inode objects. With vfs_cache_pressure=1000, it will look for
|
||||
ten times more freeable objects than there are.
|
||||
Increasing vfs_cache_pressure significantly beyond vfs_cache_pressure_denom may
|
||||
have negative performance impact. Reclaim code needs to take various locks to
|
||||
find freeable directory and inode objects. When vfs_cache_pressure equals
|
||||
(10 * vfs_cache_pressure_denom), it will look for ten times more freeable
|
||||
objects than there are.
|
||||
|
||||
Note: This setting should always be used together with vfs_cache_pressure_denom.
|
||||
|
||||
vfs_cache_pressure_denom
|
||||
========================
|
||||
|
||||
Defaults to 100 (minimum allowed value). Requires corresponding
|
||||
vfs_cache_pressure setting to take effect.
|
||||
|
||||
watermark_boost_factor
|
||||
======================
|
||||
|
||||
@@ -296,6 +296,39 @@ information is missing.
|
||||
To recover from this mode, one needs to flash a valid NVM image to the
|
||||
host controller in the same way it is done in the previous chapter.
|
||||
|
||||
Tunneling events
|
||||
----------------
|
||||
The driver sends ``KOBJ_CHANGE`` events to userspace when there is a
|
||||
tunneling change in the ``thunderbolt_domain``. The notification carries
|
||||
following environment variables::
|
||||
|
||||
TUNNEL_EVENT=<EVENT>
|
||||
TUNNEL_DETAILS=0:12 <-> 1:20 (USB3)
|
||||
|
||||
Possible values for ``<EVENT>`` are:
|
||||
|
||||
activated
|
||||
The tunnel was activated (created).
|
||||
|
||||
changed
|
||||
There is a change in this tunnel. For example bandwidth allocation was
|
||||
changed.
|
||||
|
||||
deactivated
|
||||
The tunnel was torn down.
|
||||
|
||||
low bandwidth
|
||||
The tunnel is not getting optimal bandwidth.
|
||||
|
||||
insufficient bandwidth
|
||||
There is not enough bandwidth for the current tunnel requirements.
|
||||
|
||||
The ``TUNNEL_DETAILS`` is only provided if the tunnel is known. For
|
||||
example, in case of Firmware Connection Manager this is missing or does
|
||||
not provide full tunnel information. In case of Software Connection Manager
|
||||
this includes full tunnel details. The format currently matches what the
|
||||
driver uses when logging. This may change over time.
|
||||
|
||||
Networking over Thunderbolt cable
|
||||
---------------------------------
|
||||
Thunderbolt technology allows software communication between two hosts
|
||||
|
||||
@@ -267,7 +267,7 @@ culprit might be known already. For further details on what actually qualifies
|
||||
as a regression check out Documentation/admin-guide/reporting-regressions.rst.
|
||||
|
||||
If you run into any problems while following this guide or have ideas how to
|
||||
improve it, :ref:`please let the kernel developers know <submit_improvements>`.
|
||||
improve it, :ref:`please let the kernel developers know <submit_improvements_vbbr>`.
|
||||
|
||||
.. _introprep_bissbs:
|
||||
|
||||
@@ -1055,7 +1055,7 @@ follow these instructions.
|
||||
|
||||
[:ref:`details <introoptional_bisref>`]
|
||||
|
||||
.. _submit_improvements:
|
||||
.. _submit_improvements_vbbr:
|
||||
|
||||
Conclusion
|
||||
----------
|
||||
|
||||
@@ -151,6 +151,17 @@ When mounting an XFS filesystem, the following options are accepted.
|
||||
optional, and the log section can be separate from the data
|
||||
section or contained within it.
|
||||
|
||||
max_atomic_write=value
|
||||
Set the maximum size of an atomic write. The size may be
|
||||
specified in bytes, in kilobytes with a "k" suffix, in megabytes
|
||||
with a "m" suffix, or in gigabytes with a "g" suffix. The size
|
||||
cannot be larger than the maximum write size, larger than the
|
||||
size of any allocation group, or larger than the size of a
|
||||
remapping operation that the log can complete atomically.
|
||||
|
||||
The default value is to set the maximum I/O completion size
|
||||
to allow each CPU to handle one at a time.
|
||||
|
||||
max_open_zones=value
|
||||
Specify the max number of zones to keep open for writing on a
|
||||
zoned rt device. Many open zones aids file data separation
|
||||
|
||||
@@ -72,14 +72,15 @@ there are some issues with their usage.
|
||||
process could be migrated to another CPU by the time it uses the
|
||||
register value, unless the CPU affinity is set. Hence, there is no
|
||||
guarantee that the value reflects the processor that it is
|
||||
currently executing on. The REVIDR is not exposed due to this
|
||||
constraint, as REVIDR makes sense only in conjunction with the
|
||||
MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs
|
||||
at::
|
||||
currently executing on. REVIDR and AIDR are not exposed due to this
|
||||
constraint, as these registers only make sense in conjunction with
|
||||
the MIDR. Alternately, MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are exposed
|
||||
via sysfs at::
|
||||
|
||||
/sys/devices/system/cpu/cpu$ID/regs/identification/
|
||||
\- midr
|
||||
\- revidr
|
||||
\- midr_el1
|
||||
\- revidr_el1
|
||||
\- aidr_el1
|
||||
|
||||
3. Implementation
|
||||
--------------------
|
||||
|
||||
@@ -57,6 +57,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne AC04 | AC04_CPU_23 | AMPERE_ERRATUM_AC04_CPU_23 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
||||
@@ -69,8 +69,8 @@ model features for SME is included in Appendix A.
|
||||
vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
|
||||
used for SVE vectors.
|
||||
|
||||
* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
|
||||
in which case it is set to 0.
|
||||
* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM
|
||||
is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0.
|
||||
|
||||
2. Vector lengths
|
||||
------------------
|
||||
@@ -115,7 +115,7 @@ be zeroed.
|
||||
5. Signal handling
|
||||
-------------------
|
||||
|
||||
* Signal handlers are invoked with streaming mode and ZA disabled.
|
||||
* Signal handlers are invoked with PSTATE.SM=0, PSTATE.ZA=0, and TPIDR2_EL0=0.
|
||||
|
||||
* A new signal frame record TPIDR2_MAGIC is added formatted as a struct
|
||||
tpidr2_context to allow access to TPIDR2_EL0 from signal handlers.
|
||||
@@ -241,7 +241,7 @@ prctl(PR_SME_SET_VL, unsigned long arg)
|
||||
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
|
||||
does not constitute a change to the vector length for this purpose.
|
||||
|
||||
* Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
|
||||
* Changing the vector length causes PSTATE.ZA to be cleared.
|
||||
Calling PR_SME_SET_VL with vl equal to the thread's current vector
|
||||
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
|
||||
does not constitute a change to the vector length for this purpose.
|
||||
|
||||
@@ -40,6 +40,12 @@ Build the Linux kernel as usual::
|
||||
make ARCH=openrisc CROSS_COMPILE="or1k-linux-" defconfig
|
||||
make ARCH=openrisc CROSS_COMPILE="or1k-linux-"
|
||||
|
||||
If you want to embed initramfs in the kernel, also pass ``CONFIG_INITRAMFS_SOURCE``. For example::
|
||||
|
||||
make ARCH=openrisc CROSS_COMPILE="or1k-linux-" CONFIG_INITRAMFS_SOURCE="path/to/rootfs path/to/devnodes"
|
||||
|
||||
For more information on this, please check Documentation/filesystems/ramfs-rootfs-initramfs.rst.
|
||||
|
||||
3) Running on FPGA (optional)
|
||||
|
||||
The OpenRISC community typically uses FuseSoC to manage building and programming
|
||||
|
||||
104
Documentation/arch/powerpc/htm.rst
Normal file
104
Documentation/arch/powerpc/htm.rst
Normal file
@@ -0,0 +1,104 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. _htm:
|
||||
|
||||
===================================
|
||||
HTM (Hardware Trace Macro)
|
||||
===================================
|
||||
|
||||
Athira Rajeev, 2 Mar 2025
|
||||
|
||||
.. contents::
|
||||
:depth: 3
|
||||
|
||||
|
||||
Basic overview
|
||||
==============
|
||||
|
||||
H_HTM is used as an interface for executing Hardware Trace Macro (HTM)
|
||||
functions, including setup, configuration, control and dumping of the HTM data.
|
||||
For using HTM, it is required to setup HTM buffers and HTM operations can
|
||||
be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip
|
||||
of the system from within a partition itself. To use this feature, a debugfs
|
||||
folder called "htmdump" is present under /sys/kernel/debug/powerpc.
|
||||
|
||||
|
||||
HTM debugfs example usage
|
||||
=========================
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# ls /sys/kernel/debug/powerpc/htmdump/
|
||||
coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup
|
||||
htmstart htmstatus htmtype nodalchipindex nodeindex trace
|
||||
|
||||
Details on each file:
|
||||
|
||||
* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for.
|
||||
* htmtype: specifies the type of HTM. Supported target is hardwareTarget.
|
||||
* trace: is to read the HTM data.
|
||||
* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure.
|
||||
* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing.
|
||||
* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation.
|
||||
* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2
|
||||
* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip.
|
||||
* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc.
|
||||
* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer.
|
||||
|
||||
To see the system processor configuration details:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file
|
||||
|
||||
The result can be interpreted using hexdump.
|
||||
|
||||
To collect HTM traces for a partition represented by nodeindex as
|
||||
zero, nodalchipindex as 1 and coreindexonchip as 12
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# cd /sys/kernel/debug/powerpc/htmdump/
|
||||
# echo 2 > htmtype
|
||||
# echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 )
|
||||
|
||||
This requires a CEC reboot to get the HTM buffers allocated.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# cd /sys/kernel/debug/powerpc/htmdump/
|
||||
# echo 2 > htmtype
|
||||
# echo 0 > nodeindex
|
||||
# echo 1 > nodalchipindex
|
||||
# echo 12 > coreindexonchip
|
||||
# echo 1 > htmflags # to set noWrap for HTM buffers
|
||||
# echo 1 > htmconfigure # Configure the HTM
|
||||
# echo 1 > htmstart # Start the HTM
|
||||
# echo 0 > htmstart # Stop the HTM
|
||||
# echo 0 > htmconfigure # Deconfigure the HTM
|
||||
# cat htmstatus # Dump the status of HTM entries as data
|
||||
|
||||
Above will set the htmtype and core details, followed by executing respective HTM operation.
|
||||
|
||||
Read the HTM trace data
|
||||
========================
|
||||
|
||||
After starting the trace collection, run the workload
|
||||
of interest. Stop the trace collection after required period
|
||||
of time, and read the trace file.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file
|
||||
|
||||
This trace file will contain the relevant instruction traces
|
||||
collected during the workload execution. And can be used as
|
||||
input file for trace decoders to understand data.
|
||||
|
||||
Benefits of using HTM debugfs interface
|
||||
=======================================
|
||||
|
||||
It is now possible to collect traces for a particular core/chip
|
||||
from within any partition of the system and decode it. Through
|
||||
this enablement, a small partition can be dedicated to collect the
|
||||
trace data and analyze to provide important information for Performance
|
||||
analysis, Software tuning, or Hardware debug.
|
||||
@@ -208,13 +208,9 @@ associated values for each ID in the GSB::
|
||||
flags:
|
||||
Bit 0: getGuestWideState: Request state of the Guest instead
|
||||
of an individual VCPU.
|
||||
Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking
|
||||
over ownership of the VCPU state and that the L0 can free
|
||||
the storage holding the state. The VCPU state will need to
|
||||
be returned to the Hypervisor via H_GUEST_SET_STATE prior
|
||||
to H_GUEST_RUN_VCPU being called for this VCPU. The data
|
||||
returned in the dataBuffer is in a Hypervisor internal
|
||||
format.
|
||||
Bit 1: getHostWideState: Request stats of the Host. This causes
|
||||
the guestId and vcpuId parameters to be ignored and attempting
|
||||
to get the VCPU/Guest state will cause an error.
|
||||
Bits 2-63: Reserved
|
||||
guestId: ID obtained from H_GUEST_CREATE
|
||||
vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
|
||||
@@ -406,9 +402,10 @@ the partition like the timebase offset and partition scoped page
|
||||
table information.
|
||||
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| ID | Size | RW | Thread | Details |
|
||||
| | Bytes | | Guest | |
|
||||
| | | | Scope | |
|
||||
| ID | Size | RW |(H)ost | Details |
|
||||
| | Bytes | |(G)uest | |
|
||||
| | | |(T)hread| |
|
||||
| | | |Scope | |
|
||||
+========+=======+====+========+==================================+
|
||||
| 0x0000 | | RW | TG | NOP element |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
@@ -434,6 +431,29 @@ table information.
|
||||
| | | | |- 0x8 Table size. |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0007-| | | | Reserved |
|
||||
| 0x07FF | | | | |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0800 | 0x08 | R | H | Current usage in bytes of the |
|
||||
| | | | | L0's Guest Management Space |
|
||||
| | | | | for an L1-Lpar. |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0801 | 0x08 | R | H | Max bytes available in the |
|
||||
| | | | | L0's Guest Management Space for |
|
||||
| | | | | an L1-Lpar |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0802 | 0x08 | R | H | Current usage in bytes of the |
|
||||
| | | | | L0's Guest Page Table Management |
|
||||
| | | | | Space for an L1-Lpar |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0803 | 0x08 | R | H | Max bytes available in the L0's |
|
||||
| | | | | Guest Page Table Management |
|
||||
| | | | | Space for an L1-Lpar |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from |
|
||||
| | | | | L0 Guest's Page Table Management |
|
||||
| | | | | Space due to overcommit |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0805-| | | | Reserved |
|
||||
| 0x0BFF | | | | |
|
||||
+--------+-------+----+--------+----------------------------------+
|
||||
| 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: |
|
||||
|
||||
@@ -10,13 +10,45 @@ modified by the program itself. Instruction storage and the instruction cache
|
||||
program must enforce its own synchronization with the unprivileged fence.i
|
||||
instruction.
|
||||
|
||||
However, the default Linux ABI prohibits the use of fence.i in userspace
|
||||
applications. At any point the scheduler may migrate a task onto a new hart. If
|
||||
migration occurs after the userspace synchronized the icache and instruction
|
||||
storage with fence.i, the icache on the new hart will no longer be clean. This
|
||||
is due to the behavior of fence.i only affecting the hart that it is called on.
|
||||
Thus, the hart that the task has been migrated to may not have synchronized
|
||||
instruction storage and icache.
|
||||
CMODX in the Kernel Space
|
||||
-------------------------
|
||||
|
||||
Dynamic ftrace
|
||||
---------------------
|
||||
|
||||
Essentially, dynamic ftrace directs the control flow by inserting a function
|
||||
call at each patchable function entry, and patches it dynamically at runtime to
|
||||
enable or disable the redirection. In the case of RISC-V, 2 instructions,
|
||||
AUIPC + JALR, are required to compose a function call. However, it is impossible
|
||||
to patch 2 instructions and expect that a concurrent read-side executes them
|
||||
without a race condition. This series makes atmoic code patching possible in
|
||||
RISC-V ftrace. Kernel preemption makes things even worse as it allows the old
|
||||
state to persist across the patching process with stop_machine().
|
||||
|
||||
In order to get rid of stop_machine() and run dynamic ftrace with full kernel
|
||||
preemption, we partially initialize each patchable function entry at boot-time,
|
||||
setting the first instruction to AUIPC, and the second to NOP. Now, atmoic
|
||||
patching is possible because the kernel only has to update one instruction.
|
||||
According to Ziccif, as long as an instruction is naturally aligned, the ISA
|
||||
guarantee an atomic update.
|
||||
|
||||
By fixing down the first instruction, AUIPC, the range of the ftrace trampoline
|
||||
is limited to +-2K from the predetermined target, ftrace_caller, due to the lack
|
||||
of immediate encoding space in RISC-V. To address the issue, we introduce
|
||||
CALL_OPS, where an 8B naturally align metadata is added in front of each
|
||||
pacthable function. The metadata is resolved at the first trampoline, then the
|
||||
execution can be derect to another custom trampoline.
|
||||
|
||||
CMODX in the User Space
|
||||
-----------------------
|
||||
|
||||
Though fence.i is an unprivileged instruction, the default Linux ABI prohibits
|
||||
the use of fence.i in userspace applications. At any point the scheduler may
|
||||
migrate a task onto a new hart. If migration occurs after the userspace
|
||||
synchronized the icache and instruction storage with fence.i, the icache on the
|
||||
new hart will no longer be clean. This is due to the behavior of fence.i only
|
||||
affecting the hart that it is called on. Thus, the hart that the task has been
|
||||
migrated to may not have synchronized instruction storage and icache.
|
||||
|
||||
There are two ways to solve this problem: use the riscv_flush_icache() syscall,
|
||||
or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
|
||||
|
||||
@@ -271,6 +271,10 @@ The following keys are defined:
|
||||
* :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as
|
||||
ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_EXT_ZABHA`: The Zabha extension is supported as
|
||||
ratified in commit 49f49c842ff9 ("Update to Rafified state") of
|
||||
riscv-zabha.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
|
||||
:c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was
|
||||
mistakenly classified as a bitmask rather than a value.
|
||||
@@ -335,3 +339,25 @@ The following keys are defined:
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which
|
||||
represents the size of the Zicbom block in bytes.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0`: A bitmask containing the
|
||||
sifive vendor extensions that are compatible with the
|
||||
:c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
|
||||
|
||||
* SIFIVE
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD`: The Xsfqmaccdod vendor
|
||||
extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
|
||||
Extensions Specification.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ`: The Xsfqmaccqoq vendor
|
||||
extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
|
||||
Instruction Extensions Specification.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF`: The Xsfvfnrclipxfqf
|
||||
vendor extension is supported in version 1.0 of SiFive FP32-to-int8 Ranged
|
||||
Clip Instructions Extensions Specification.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq
|
||||
vendor extension is supported in version 1.0 of Matrix Multiply Accumulate
|
||||
Instruction Extensions Specification.
|
||||
368
Documentation/arch/x86/amd-debugging.rst
Normal file
368
Documentation/arch/x86/amd-debugging.rst
Normal file
@@ -0,0 +1,368 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Debugging AMD Zen systems
|
||||
+++++++++++++++++++++++++
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document describes techniques that are useful for debugging issues with
|
||||
AMD Zen systems. It is intended for use by developers and technical users
|
||||
to help identify and resolve issues.
|
||||
|
||||
S3 vs s2idle
|
||||
============
|
||||
|
||||
On AMD systems, it's not possible to simultaneously support suspend-to-RAM (S3)
|
||||
and suspend-to-idle (s2idle). To confirm which mode your system supports you
|
||||
can look at ``cat /sys/power/mem_sleep``. If it shows ``s2idle [deep]`` then
|
||||
*S3* is supported. If it shows ``[s2idle]`` then *s2idle* is
|
||||
supported.
|
||||
|
||||
On systems that support *S3*, the firmware will be utilized to put all hardware into
|
||||
the appropriate low power state.
|
||||
|
||||
On systems that support *s2idle*, the kernel will be responsible for transitioning devices
|
||||
into the appropriate low power state. When all devices are in the appropriate low
|
||||
power state, the hardware will transition into a hardware sleep state.
|
||||
|
||||
After a suspend cycle you can tell how much time was spent in a hardware sleep
|
||||
state by looking at ``cat /sys/power/suspend_stats/last_hw_sleep``.
|
||||
|
||||
This flowchart explains how the AMD s2idle suspend flow works.
|
||||
|
||||
.. kernel-figure:: suspend.svg
|
||||
|
||||
This flowchart explains how the amd s2idle resume flow works.
|
||||
|
||||
.. kernel-figure:: resume.svg
|
||||
|
||||
s2idle debugging tool
|
||||
=====================
|
||||
|
||||
As there are a lot of places that problems can occur, a debugging tool has been
|
||||
created at
|
||||
`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
|
||||
that can help test for common problems and offer suggestions.
|
||||
|
||||
If you have an s2idle issue, it's best to start with this and follow instructions
|
||||
from its findings. If you continue to have an issue, raise a bug with the
|
||||
report generated from this script to
|
||||
`drm/amd gitlab <https://gitlab.freedesktop.org/drm/amd/-/issues/new?issuable_template=s2idle_BUG_TEMPLATE>`_.
|
||||
|
||||
Spurious s2idle wakeups from an IRQ
|
||||
===================================
|
||||
|
||||
Spurious wakeups will generally have an IRQ set to ``/sys/power/pm_wakeup_irq``.
|
||||
This can be matched to ``/proc/interrupts`` to determine what device woke the system.
|
||||
|
||||
If this isn't enough to debug the problem, then the following sysfs files
|
||||
can be set to add more verbosity to the wakeup process: ::
|
||||
|
||||
# echo 1 | sudo tee /sys/power/pm_debug_messages
|
||||
# echo 1 | sudo tee /sys/power/pm_print_times
|
||||
|
||||
After making those changes, the kernel will display messages that can
|
||||
be traced back to kernel s2idle loop code as well as display any active
|
||||
GPIO sources while waking up.
|
||||
|
||||
If the wakeup is caused by the ACPI SCI, additional ACPI debugging may be
|
||||
needed. These commands can enable additional trace data: ::
|
||||
|
||||
# echo enable | sudo tee /sys/module/acpi/parameters/trace_state
|
||||
# echo 1 | sudo tee /sys/module/acpi/parameters/aml_debug_output
|
||||
# echo 0x0800000f | sudo tee /sys/module/acpi/parameters/debug_level
|
||||
# echo 0xffff0000 | sudo tee /sys/module/acpi/parameters/debug_layer
|
||||
|
||||
Spurious s2idle wakeups from a GPIO
|
||||
===================================
|
||||
|
||||
If a GPIO is active when waking up the system ideally you would look at the
|
||||
schematic to determine what device it is associated with. If the schematic
|
||||
is not available, another tactic is to look at the ACPI _EVT() entry
|
||||
to determine what device is notified when that GPIO is active.
|
||||
|
||||
For a hypothetical example, say that GPIO 59 woke up the system. You can
|
||||
look at the SSDT to determine what device is notified when GPIO 59 is active.
|
||||
|
||||
First convert the GPIO number into hex. ::
|
||||
|
||||
$ python3 -c "print(hex(59))"
|
||||
0x3b
|
||||
|
||||
Next determine which ACPI table has the ``_EVT`` entry. For example: ::
|
||||
|
||||
$ sudo grep EVT /sys/firmware/acpi/tables/SSDT*
|
||||
grep: /sys/firmware/acpi/tables/SSDT27: binary file matches
|
||||
|
||||
Decode this table::
|
||||
|
||||
$ sudo cp /sys/firmware/acpi/tables/SSDT27 .
|
||||
$ sudo iasl -d SSDT27
|
||||
|
||||
Then look at the table and find the matching entry for GPIO 0x3b. ::
|
||||
|
||||
Case (0x3B)
|
||||
{
|
||||
M000 (0x393B)
|
||||
M460 (" Notify (\\_SB.PCI0.GP17.XHC1, 0x02)\n", Zero, Zero, Zero, Zero, Zero, Zero)
|
||||
Notify (\_SB.PCI0.GP17.XHC1, 0x02) // Device Wake
|
||||
}
|
||||
|
||||
You can see in this case that the device ``\_SB.PCI0.GP17.XHC1`` is notified
|
||||
when GPIO 59 is active. It's obvious this is an XHCI controller, but to go a
|
||||
step further you can figure out which XHCI controller it is by matching it to
|
||||
ACPI.::
|
||||
|
||||
$ grep "PCI0.GP17.XHC1" /sys/bus/acpi/devices/*/path
|
||||
/sys/bus/acpi/devices/device:2d/path:\_SB_.PCI0.GP17.XHC1
|
||||
/sys/bus/acpi/devices/device:2e/path:\_SB_.PCI0.GP17.XHC1.RHUB
|
||||
/sys/bus/acpi/devices/device:2f/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1
|
||||
/sys/bus/acpi/devices/device:30/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM0
|
||||
/sys/bus/acpi/devices/device:31/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM1
|
||||
/sys/bus/acpi/devices/device:32/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT2
|
||||
/sys/bus/acpi/devices/LNXPOWER:0d/path:\_SB_.PCI0.GP17.XHC1.PWRS
|
||||
|
||||
Here you can see it matches to ``device:2d``. Look at the ``physical_node``
|
||||
to determine what PCI device that actually is. ::
|
||||
|
||||
$ ls -l /sys/bus/acpi/devices/device:2d/physical_node
|
||||
lrwxrwxrwx 1 root root 0 Feb 12 13:22 /sys/bus/acpi/devices/device:2d/physical_node -> ../../../../../pci0000:00/0000:00:08.1/0000:c2:00.4
|
||||
|
||||
So there you have it: the PCI device associated with this GPIO wakeup was ``0000:c2:00.4``.
|
||||
|
||||
The ``amd_s2idle.py`` script will capture most of these artifacts for you.
|
||||
|
||||
s2idle PM debug messages
|
||||
========================
|
||||
|
||||
During the s2idle flow on AMD systems, the ACPI LPS0 driver is responsible
|
||||
to check all uPEP constraints. Failing uPEP constraints does not prevent
|
||||
s0i3 entry. This means that if some constraints are not met, it is possible
|
||||
the kernel may attempt to enter s2idle even if there are some known issues.
|
||||
|
||||
To activate PM debugging, either specify ``pm_debug_messagess`` kernel
|
||||
command-line option at boot or write to ``/sys/power/pm_debug_messages``.
|
||||
Unmet constraints will be displayed in the kernel log and can be
|
||||
viewed by logging tools that process kernel ring buffer like ``dmesg`` or
|
||||
``journalctl``."
|
||||
|
||||
If the system freezes on entry/exit before these messages are flushed, a
|
||||
useful debugging tactic is to unbind the ``amd_pmc`` driver to prevent
|
||||
notification to the platform to start s0i3 entry. This will stop the
|
||||
system from freezing on entry or exit and let you view all the failed
|
||||
constraints. ::
|
||||
|
||||
cd /sys/bus/platform/drivers/amd_pmc
|
||||
ls | grep AMD | sudo tee unbind
|
||||
|
||||
After doing this, run the suspend cycle and look specifically for errors around: ::
|
||||
|
||||
ACPI: LPI: Constraint not met; min power state:%s current power state:%s
|
||||
|
||||
Historical examples of s2idle issues
|
||||
====================================
|
||||
|
||||
To help understand the types of issues that can occur and how to debug them,
|
||||
here are some historical examples of s2idle issues that have been resolved.
|
||||
|
||||
Core offlining
|
||||
--------------
|
||||
An end user had reported that taking a core offline would prevent the system
|
||||
from properly entering s0i3. This was debugged using internal AMD tools
|
||||
to capture and display a stream of metrics from the hardware showing what changed
|
||||
when a core was offlined. It was determined that the hardware didn't get
|
||||
notification the offline cores were in the deepest state, and so it prevented
|
||||
CPU from going into the deepest state. The issue was debugged to a missing
|
||||
command to put cores into C3 upon offline.
|
||||
|
||||
`commit d6b88ce2eb9d2 ("ACPI: processor idle: Allow playing dead in C3 state") <https://git.kernel.org/torvalds/c/d6b88ce2eb9d2>`_
|
||||
|
||||
Corruption after resume
|
||||
-----------------------
|
||||
A big problem that occurred with Rembrandt was that there was graphical
|
||||
corruption after resume. This happened because of a misalignment of PSP
|
||||
and driver responsibility. The PSP will save and restore DMCUB, but the
|
||||
driver assumed it needed to reset DMCUB on resume.
|
||||
This actually was a misalignment for earlier silicon as well, but was not
|
||||
observed.
|
||||
|
||||
`commit 79d6b9351f086 ("drm/amd/display: Don't reinitialize DMCUB on s0ix resume") <https://git.kernel.org/torvalds/c/79d6b9351f086>`_
|
||||
|
||||
Back to Back suspends fail
|
||||
--------------------------
|
||||
When using a wakeup source that triggers the IRQ to wakeup, a bug in the
|
||||
pinctrl-amd driver may capture the wrong state of the IRQ and prevent the
|
||||
system going back to sleep properly.
|
||||
|
||||
`commit b8c824a869f22 ("pinctrl: amd: Don't save/restore interrupt status and wake status bits") <https://git.kernel.org/torvalds/c/b8c824a869f22>`_
|
||||
|
||||
Spurious timer based wakeup after 5 minutes
|
||||
-------------------------------------------
|
||||
The HPET was being used to program the wakeup source for the system, however
|
||||
this was causing a spurious wakeup after 5 minutes. The correct alarm to use
|
||||
was the ACPI alarm.
|
||||
|
||||
`commit 3d762e21d5637 ("rtc: cmos: Use ACPI alarm for non-Intel x86 systems too") <https://git.kernel.org/torvalds/c/3d762e21d5637>`_
|
||||
|
||||
Disk disappears after resume
|
||||
----------------------------
|
||||
After resuming from s2idle, the NVME disk would disappear. This was due to the
|
||||
BIOS not specifying the _DSD StorageD3Enable property. This caused the NVME
|
||||
driver not to put the disk into the expected state at suspend and to fail
|
||||
on resume.
|
||||
|
||||
`commit e79a10652bbd3 ("ACPI: x86: Force StorageD3Enable on more products") <https://git.kernel.org/torvalds/c/e79a10652bbd3>`_
|
||||
|
||||
Spurious IRQ1
|
||||
-------------
|
||||
A number of Renoir, Lucienne, Cezanne, & Barcelo platforms have a
|
||||
platform firmware bug where IRQ1 is triggered during s0i3 resume.
|
||||
|
||||
This was fixed in the platform firmware, but a number of systems didn't
|
||||
receive any more platform firmware updates.
|
||||
|
||||
`commit 8e60615e89321 ("platform/x86/amd: pmc: Disable IRQ1 wakeup for RN/CZN") <https://git.kernel.org/torvalds/c/8e60615e89321>`_
|
||||
|
||||
Hardware timeout
|
||||
----------------
|
||||
The hardware performs many actions besides accepting the values from
|
||||
amd-pmc driver. As the communication path with the hardware is a mailbox,
|
||||
it's possible that it might not respond quickly enough.
|
||||
This issue manifested as a failure to suspend: ::
|
||||
|
||||
PM: dpm_run_callback(): acpi_subsys_suspend_noirq+0x0/0x50 returns -110
|
||||
amd_pmc AMDI0005:00: PM: failed to suspend noirq: error -110
|
||||
|
||||
The timing problem was identified by comparing the values of the idle mask.
|
||||
|
||||
`commit 3c3c8e88c8712 ("platform/x86: amd-pmc: Increase the response register timeout") <https://git.kernel.org/torvalds/c/3c3c8e88c8712>`_
|
||||
|
||||
Failed to reach hardware sleep state with panel on
|
||||
--------------------------------------------------
|
||||
On some Strix systems certain panels were observed to block the system from
|
||||
entering a hardware sleep state if the internal panel was on during the sequence.
|
||||
|
||||
Even though the panel got turned off during suspend it exposed a timing problem
|
||||
where an interrupt caused the display hardware to wake up and block low power
|
||||
state entry.
|
||||
|
||||
`commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
|
||||
|
||||
Runtime power consumption issues
|
||||
================================
|
||||
|
||||
Runtime power consumption is influenced by many factors, including but not
|
||||
limited to the configuration of the PCIe Active State Power Management (ASPM),
|
||||
the display brightness, the EPP policy of the CPU, and the power management
|
||||
of the devices.
|
||||
|
||||
ASPM
|
||||
----
|
||||
For the best runtime power consumption, ASPM should be programmed as intended
|
||||
by the BIOS from the hardware vendor. To accomplish this the Linux kernel
|
||||
should be compiled with ``CONFIG_PCIEASPM_DEFAULT`` set to ``y`` and the
|
||||
sysfs file ``/sys/module/pcie_aspm/parameters/policy`` should not be modified.
|
||||
|
||||
Most notably, if L1.2 is not configured properly for any devices, the SoC
|
||||
will not be able to enter the deepest idle state.
|
||||
|
||||
EPP Policy
|
||||
----------
|
||||
The ``energy_performance_preference`` sysfs file can be used to set a bias
|
||||
of efficiency or performance for a CPU. This has a direct relationship on
|
||||
the battery life when more heavily biased towards performance.
|
||||
|
||||
|
||||
BIOS debug messages
|
||||
===================
|
||||
|
||||
Most OEM machines don't have a serial UART for outputting kernel or BIOS
|
||||
debug messages. However BIOS debug messages are useful for understanding
|
||||
both BIOS bugs and bugs with the Linux kernel drivers that call BIOS AML.
|
||||
|
||||
As the BIOS on most OEM AMD systems are based off an AMD reference BIOS,
|
||||
the infrastructure used for exporting debugging messages is often the same
|
||||
as AMD reference BIOS.
|
||||
|
||||
Manually Parsing
|
||||
----------------
|
||||
There is generally an ACPI method ``\M460`` that different paths of the AML
|
||||
will call to emit a message to the BIOS serial log. This method takes
|
||||
7 arguments, with the first being a string and the rest being optional
|
||||
integers::
|
||||
|
||||
Method (M460, 7, Serialized)
|
||||
|
||||
Here is an example of a string that BIOS AML may call out using ``\M460``::
|
||||
|
||||
M460 (" OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", DADR, Arg0, Arg1, PCSA, Zero, Zero)
|
||||
|
||||
Normally when executed, the ``\M460`` method would populate the additional
|
||||
arguments into the string. In order to get these messages from the Linux
|
||||
kernel a hook has been added into ACPICA that can capture the *arguments*
|
||||
sent to ``\M460`` and print them to the kernel ring buffer.
|
||||
For example the following message could be emitted into kernel ring buffer::
|
||||
|
||||
extrace-0174 ex_trace_args : " OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", ec106000, 2, 1, 1, 0, 0
|
||||
|
||||
In order to get these messages, you need to compile with ``CONFIG_ACPI_DEBUG``
|
||||
and then turn on the following ACPICA tracing parameters.
|
||||
This can be done either on the kernel command line or at runtime:
|
||||
|
||||
* ``acpi.trace_method_name=\M460``
|
||||
* ``acpi.trace_state=method``
|
||||
|
||||
NOTE: These can be very noisy at bootup. If you turn these parameters on
|
||||
the kernel command, please also consider turning up ``CONFIG_LOG_BUF_SHIFT``
|
||||
to a larger size such as 17 to avoid losing early boot messages.
|
||||
|
||||
Tool assisted Parsing
|
||||
---------------------
|
||||
As mentioned above, parsing by hand can be tedious, especially with a lot of
|
||||
messages. To help with this, a tool has been created at
|
||||
`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
|
||||
to help parse the messages.
|
||||
|
||||
Random reboot issues
|
||||
====================
|
||||
|
||||
When a random reboot occurs, the high-level reason for the reboot is stored
|
||||
in a register that will persist onto the next boot.
|
||||
|
||||
There are 6 classes of reasons for the reboot:
|
||||
* Software induced
|
||||
* Power state transition
|
||||
* Pin induced
|
||||
* Hardware induced
|
||||
* Remote reset
|
||||
* Internal CPU event
|
||||
|
||||
.. csv-table::
|
||||
:header: "Bit", "Type", "Reason"
|
||||
:align: left
|
||||
|
||||
"0", "Pin", "thermal pin BP_THERMTRIP_L was tripped"
|
||||
"1", "Pin", "power button was pressed for 4 seconds"
|
||||
"2", "Pin", "shutdown pin was tripped"
|
||||
"4", "Remote", "remote ASF power off command was received"
|
||||
"9", "Internal", "internal CPU thermal limit was tripped"
|
||||
"16", "Pin", "system reset pin BP_SYS_RST_L was tripped"
|
||||
"17", "Software", "software issued PCI reset"
|
||||
"18", "Software", "software wrote 0x4 to reset control register 0xCF9"
|
||||
"19", "Software", "software wrote 0x6 to reset control register 0xCF9"
|
||||
"20", "Software", "software wrote 0xE to reset control register 0xCF9"
|
||||
"21", "ACPI-state", "ACPI power state transition occurred"
|
||||
"22", "Pin", "keyboard reset pin KB_RST_L was tripped"
|
||||
"23", "Internal", "internal CPU shutdown event occurred"
|
||||
"24", "Hardware", "system failed to boot before failed boot timer expired"
|
||||
"25", "Hardware", "hardware watchdog timer expired"
|
||||
"26", "Remote", "remote ASF reset command was received"
|
||||
"27", "Internal", "an uncorrected error caused a data fabric sync flood event"
|
||||
"29", "Internal", "FCH and MP1 failed warm reset handshake"
|
||||
"30", "Internal", "a parity error occurred"
|
||||
"31", "Internal", "a software sync flood event occurred"
|
||||
|
||||
This information is read by the kernel at bootup and printed into
|
||||
the syslog. When a random reboot occurs this message can be helpful
|
||||
to determine the next component to debug.
|
||||
@@ -71,6 +71,28 @@ Note: lseek() is not supported as entire metrics table is read.
|
||||
Metrics table definitions will be documented as part of Public PPR.
|
||||
The same is defined in the amd_hsmp.h header.
|
||||
|
||||
2. HSMP telemetry sysfs files
|
||||
|
||||
Following sysfs files are available at /sys/devices/platform/AMDI0097:0X/.
|
||||
|
||||
* c0_residency_input: Percentage of cores in C0 state.
|
||||
* prochot_status: Reports 1 if the processor is at thermal threshold value,
|
||||
0 otherwise.
|
||||
* smu_fw_version: SMU firmware version.
|
||||
* protocol_version: HSMP interface version.
|
||||
* ddr_max_bw: Theoretical maximum DDR bandwidth in GB/s.
|
||||
* ddr_utilised_bw_input: Current utilized DDR bandwidth in GB/s.
|
||||
* ddr_utilised_bw_perc_input(%): Percentage of current utilized DDR bandwidth.
|
||||
* mclk_input: Memory clock in MHz.
|
||||
* fclk_input: Fabric clock in MHz.
|
||||
* clk_fmax: Maximum frequency of socket in MHz.
|
||||
* clk_fmin: Minimum frequency of socket in MHz.
|
||||
* cclk_freq_limit_input: Core clock frequency limit per socket in MHz.
|
||||
* pwr_current_active_freq_limit: Current active frequency limit of socket
|
||||
in MHz.
|
||||
* pwr_current_active_freq_limit_source: Source of current active frequency
|
||||
limit.
|
||||
|
||||
ACPI device object format
|
||||
=========================
|
||||
The ACPI object format expected from the amd_hsmp driver
|
||||
@@ -116,6 +138,14 @@ for socket with ID00 is given below::
|
||||
})
|
||||
}
|
||||
|
||||
HSMP HWMON interface
|
||||
====================
|
||||
HSMP power sensors are registered with the hwmon interface. A separate hwmon
|
||||
directory is created for each socket and the following files are generated
|
||||
within the hwmon directory.
|
||||
- power1_input (read only)
|
||||
- power1_cap_max (read only)
|
||||
- power1_cap (read, write)
|
||||
|
||||
An example
|
||||
==========
|
||||
|
||||
@@ -173,10 +173,10 @@ For example, when an old kernel is running on new hardware.
|
||||
The kernel disabled support for it at compile-time
|
||||
--------------------------------------------------
|
||||
|
||||
For example, if 5-level-paging is not enabled when building (i.e.,
|
||||
CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_.
|
||||
For example, if Linear Address Masking (LAM) is not enabled when building (i.e.,
|
||||
CONFIG_ADDRESS_MASKING is not selected) the flag "lam" will not show up.
|
||||
Even though the feature will still be detected via CPUID, the kernel disables
|
||||
it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57).
|
||||
it by clearing via setup_clear_cpu_cap(X86_FEATURE_LAM).
|
||||
|
||||
The feature is disabled at boot-time
|
||||
------------------------------------
|
||||
@@ -200,5 +200,3 @@ missing at runtime. For example, AVX flags will not show up if XSAVE feature
|
||||
is disabled since they depend on XSAVE feature. Another example would be broken
|
||||
CPUs and them missing microcode patches. Due to that, the kernel decides not to
|
||||
enable a feature.
|
||||
|
||||
.. [#f1] 5-level paging uses linear address of 57 bits.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user