sysfs Basics {:.gc-basic}
Basic
sysfs is a virtual filesystem mounted at /sys that exposes the kernel’s device model hierarchy to userspace. Each directory corresponds to a kobject, each file corresponds to an attribute. The rule is: one value per file — sysfs files should be simple, human-readable single values or short strings, not structured binary blobs.
#include <linux/device.h>
#include <linux/sysfs.h>
/* DEVICE_ATTR defines two functions: mydrv_temp_show and mydrv_temp_store */
/* and a struct device_attribute named dev_attr_temp */
static ssize_t temp_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct mydrv_priv *priv = dev_get_drvdata(dev);
int temp_mc; /* millidegrees Celsius */
temp_mc = read_temperature(priv);
/* buf is PAGE_SIZE bytes — always use scnprintf or sysfs_emit */
return sysfs_emit(buf, "%d\n", temp_mc);
}
static ssize_t temp_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct mydrv_priv *priv = dev_get_drvdata(dev);
int threshold_mc;
int ret;
ret = kstrtoint(buf, 10, &threshold_mc);
if (ret)
return ret;
priv->alert_threshold = threshold_mc;
return count;
}
/* DEVICE_ATTR(name, mode, show_fn, store_fn) */
static DEVICE_ATTR_RW(temp); /* 0644: read-write */
static DEVICE_ATTR_RO(status); /* 0444: read-only */
static DEVICE_ATTR_WO(reset); /* 0200: write-only */
/* Manual equivalent: */
/* static DEVICE_ATTR(temp, 0644, temp_show, temp_store); */
static int mydrv_probe(struct platform_device *pdev)
{
/* ... hardware init ... */
/* Create /sys/devices/.../mydrv/temp */
int ret = device_create_file(&pdev->dev, &dev_attr_temp);
if (ret)
dev_warn(&pdev->dev, "failed to create temp sysfs: %d\n", ret);
return 0;
}
static void mydrv_remove(struct platform_device *pdev)
{
device_remove_file(&pdev->dev, &dev_attr_temp);
}
# Read a sysfs attribute
cat /sys/devices/platform/mydrv/temp
# 42500
# Write a sysfs attribute
echo 50000 > /sys/devices/platform/mydrv/temp
# Find all attributes for a device
ls /sys/bus/platform/devices/mydrv/
Attribute Groups {:.gc-mid}
Intermediate
Instead of calling device_create_file for each attribute individually, group them with attribute_group. This ensures atomic creation/removal and enables conditional visibility.
#include <linux/sysfs.h>
#include <linux/device.h>
/* Forward declare show/store functions */
static ssize_t temp_show(struct device *dev, struct device_attribute *attr, char *buf);
static ssize_t voltage_show(struct device *dev, struct device_attribute *attr, char *buf);
static ssize_t enable_show(struct device *dev, struct device_attribute *attr, char *buf);
static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count);
static DEVICE_ATTR_RO(temp);
static DEVICE_ATTR_RO(voltage);
static DEVICE_ATTR_RW(enable);
static struct attribute *mydrv_attrs[] = {
&dev_attr_temp.attr,
&dev_attr_voltage.attr,
&dev_attr_enable.attr,
NULL, /* sentinel */
};
/* is_visible: hide voltage attribute if hardware doesn't support it */
static umode_t mydrv_attr_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
struct device *dev = kobj_to_dev(kobj);
struct mydrv_priv *priv = dev_get_drvdata(dev);
if (attr == &dev_attr_voltage.attr && !priv->has_voltage_sensor)
return 0; /* hide — not visible in sysfs */
return attr->mode; /* keep default permissions */
}
static const struct attribute_group mydrv_attr_group = {
.name = "mydrv", /* optional subdirectory name */
.attrs = mydrv_attrs,
.is_visible = mydrv_attr_visible,
};
static const struct attribute_group *mydrv_attr_groups[] = {
&mydrv_attr_group,
NULL,
};
/* Register all groups at once (also handles removal automatically
when the device is unregistered) */
static int mydrv_probe(struct platform_device *pdev)
{
int ret = sysfs_create_group(&pdev->dev.kobj, &mydrv_attr_group);
if (ret)
return ret;
/* ... */
return 0;
}
static void mydrv_remove(struct platform_device *pdev)
{
sysfs_remove_group(&pdev->dev.kobj, &mydrv_attr_group);
}
/* Alternative: assign to device during registration */
/* pdev->dev.groups = mydrv_attr_groups; */
Binary attributes allow arbitrary binary data (firmware blobs, calibration data):
static ssize_t calibration_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
/* fill buf from driver state */
return count;
}
static BIN_ATTR_RO(calibration, 256); /* 256-byte binary file */
sysfs_create_bin_file(&dev->kobj, &bin_attr_calibration);
kobject and kset {:.gc-mid}
Intermediate
Every directory in /sys is backed by a kobject. The Linux device model builds the hierarchy automatically for struct device objects, but you can create custom directories for standalone drivers.
#include <linux/kobject.h>
#include <linux/sysfs.h>
static struct kobject *mydrv_kobj;
static ssize_t version_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "1.2.3\n");
}
static struct kobj_attribute version_attr = __ATTR_RO(version);
static struct attribute *mydrv_kobj_attrs[] = {
&version_attr.attr,
NULL,
};
ATTRIBUTE_GROUPS(mydrv_kobj); /* creates mydrv_kobj_groups[] */
static int __init mydrv_init(void)
{
/* Create /sys/kernel/mydrv/ */
mydrv_kobj = kobject_create_and_add("mydrv", kernel_kobj);
if (!mydrv_kobj)
return -ENOMEM;
int ret = sysfs_create_group(mydrv_kobj, &mydrv_kobj_attr_group);
if (ret) {
kobject_put(mydrv_kobj);
return ret;
}
return 0;
}
static void __exit mydrv_exit(void)
{
sysfs_remove_group(mydrv_kobj, &mydrv_kobj_attr_group);
kobject_put(mydrv_kobj); /* decrement reference count, free if zero */
}
Sending a uevent from sysfs
/* Notify userspace of an event (e.g., threshold exceeded) */
char *envp[] = { "MYDRV_EVENT=TEMP_ALERT", NULL };
kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
udev and systemd receive the uevent via netlink and can run rules or scripts in response.
procfs with seq_file {:.gc-adv}
Advanced
/proc predates sysfs and is generally used for kernel diagnostics and process/system information. For driver debugging, prefer sysfs or debugfs. However, /proc/driver/ is the conventional location for driver-specific info.
Simple single-value /proc file
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
static int mydrv_proc_show(struct seq_file *m, void *v)
{
struct mydrv_priv *priv = m->private;
seq_printf(m, "Driver: mydrv v1.0\n");
seq_printf(m, "IRQ: %d\n", priv->irq);
seq_printf(m, "Base: %p\n", priv->base);
seq_printf(m, "RX pkts: %lu\n", priv->rx_count);
seq_printf(m, "TX pkts: %lu\n", priv->tx_count);
return 0;
}
/* proc_create_single: creates /proc/driver/mydrv with a single-shot show fn */
static struct proc_dir_entry *mydrv_proc;
static int mydrv_probe(struct platform_device *pdev)
{
/* ... */
mydrv_proc = proc_create_single_data("driver/mydrv", 0444,
NULL, mydrv_proc_show, priv);
return 0;
}
static void mydrv_remove(struct platform_device *pdev)
{
proc_remove(mydrv_proc);
}
Multi-line /proc file with seq_file iterator
static void *mydrv_seq_start(struct seq_file *m, loff_t *pos)
{
struct mydrv_priv *priv = m->private;
if (*pos >= priv->num_channels)
return NULL;
return &priv->channels[*pos];
}
static void *mydrv_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
struct mydrv_priv *priv = m->private;
++(*pos);
if (*pos >= priv->num_channels)
return NULL;
return &priv->channels[*pos];
}
static void mydrv_seq_stop(struct seq_file *m, void *v) {}
static int mydrv_seq_show(struct seq_file *m, void *v)
{
struct mydrv_channel *ch = v;
seq_printf(m, "CH%d: val=%d status=0x%04x\n",
ch->index, ch->value, ch->status);
return 0;
}
static const struct seq_operations mydrv_seq_ops = {
.start = mydrv_seq_start,
.next = mydrv_seq_next,
.stop = mydrv_seq_stop,
.show = mydrv_seq_show,
};
/* proc_create_seq_private handles open/read/release automatically */
proc_create_seq_private("driver/mydrv_channels", 0444,
NULL, &mydrv_seq_ops,
sizeof(struct mydrv_priv), priv);
Advanced: debugfs {:.gc-adv}
Advanced
debugfs is an in-memory filesystem for debug-only information. It has no ABI stability guarantee — perfect for development and testing, but entries should not be relied on by production userspace.
#include <linux/debugfs.h>
struct mydrv_priv {
struct dentry *debugfs_dir;
u32 reg_dump_addr;
bool trace_enabled;
};
static int mydrv_reg_dump_show(struct seq_file *m, void *v)
{
struct mydrv_priv *priv = m->private;
int i;
seq_printf(m, "Register dump (base=%p):\n", priv->base);
for (i = 0; i < 16; i++)
seq_printf(m, " [0x%02x] = 0x%08x\n",
i * 4, readl(priv->base + i * 4));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(mydrv_reg_dump);
static int mydrv_probe(struct platform_device *pdev)
{
struct mydrv_priv *priv;
/* ... */
/* Create /sys/kernel/debug/mydrv/ */
priv->debugfs_dir = debugfs_create_dir("mydrv", NULL);
/* Typed value files — read/write directly */
debugfs_create_u32("reg_dump_addr", 0644,
priv->debugfs_dir, &priv->reg_dump_addr);
debugfs_create_bool("trace_enabled", 0644,
priv->debugfs_dir, &priv->trace_enabled);
/* Custom seq_file-based file */
debugfs_create_file("reg_dump", 0444,
priv->debugfs_dir, priv,
&mydrv_reg_dump_fops);
return 0;
}
static void mydrv_remove(struct platform_device *pdev)
{
struct mydrv_priv *priv = platform_get_drvdata(pdev);
debugfs_remove_recursive(priv->debugfs_dir);
}
# Mount debugfs if not already mounted
mount -t debugfs none /sys/kernel/debug
# Access driver debug info
cat /sys/kernel/debug/mydrv/reg_dump
echo 1 > /sys/kernel/debug/mydrv/trace_enabled
When to use which interface
| Interface | Use for | ABI stable? |
|---|---|---|
sysfs (/sys) |
Driver configuration, sensor readings, device state | Yes — treat as ABI |
procfs (/proc) |
System and process information, legacy driver info | Partial |
debugfs (/sys/kernel/debug) |
Debug dumps, tracing, development testing | No — kernel can change anytime |
Interview Q&A {:.gc-iq}
Interview Q&A
Q1: What is the fundamental purpose difference between sysfs and procfs?
/proc was originally intended for process information and has grown to include miscellaneous kernel information, but has no strict format requirements and mixes binary and text files. /sys is a formal representation of the kernel device model — every directory is a kobject, every file is a typed attribute. The kernel documentation mandates that sysfs files contain one value per file. Drivers should use sysfs for device state and configuration and procfs only for legacy compatibility.
Q2: Why does kobject reference counting matter?
A kobject may be referenced by userspace (open sysfs file), by other kernel objects, and by the device model itself. kobject_get increments the count; kobject_put decrements it and frees the object when it reaches zero. If a module unloads while a sysfs file is still open (without proper reference counting), accessing the freed kobject causes a kernel oops. device_create_file / device_remove_file handle this correctly for struct device kobjects.
Q3: What are the rules for the sysfs show callback — can it sleep? How large can the buffer be?
The show callback is called in process context and can sleep. The buf argument is exactly PAGE_SIZE bytes (4096 bytes on most architectures). You must not write more than PAGE_SIZE bytes. Use sysfs_emit(buf, ...) instead of sprintf(buf, ...) — sysfs_emit checks the PAGE_SIZE boundary and returns -E2BIG if exceeded, preventing buffer overflows.
Q4: What is the DEVICE_ATTR naming convention?
DEVICE_ATTR(name, mode, show, store) creates a struct device_attribute named dev_attr_<name>. The corresponding sysfs filename is exactly <name>. By convention: use lowercase, underscore-separated names, avoid abbreviations, and use units-as-suffix (e.g., temp_millicelsius, timeout_ms). This convention is enforced by the ABI checker in Documentation/ABI/.
Q5: How does kobject_uevent trigger a udev rule?
kobject_uevent(kobj, action) sends a netlink message to userspace with the kobject’s path, the action (KOBJ_ADD, KOBJ_REMOVE, KOBJ_CHANGE, etc.), and any additional environment strings. udevd receives this message and matches it against rules in /etc/udev/rules.d/. This is how device node creation (KOBJ_ADD), removal (KOBJ_REMOVE), and custom events like threshold alerts (KOBJ_CHANGE) are handled.
Q6: When should you prefer debugfs over sysfs for driver debug information?
Use debugfs when: the information is only useful during development or debugging, the format may change between kernel versions, or the data is too verbose for normal operation (e.g., full register dumps, DMA descriptor ring state, firmware trace logs). Because debugfs has no ABI stability guarantee, you are free to restructure it without breaking userspace. Sysfs attributes, once exported, become kernel ABI and cannot be renamed or removed without a deprecation period.
References {:.gc-ref}
References
| Resource | Link |
|---|---|
| kernel.org: sysfs rules | https://www.kernel.org/doc/html/latest/filesystems/sysfs.html |
| kernel.org: kobject documentation | https://www.kernel.org/doc/html/latest/core-api/kobject.html |
| kernel.org: sysfs ABI documentation | https://www.kernel.org/doc/html/latest/ABI/stable/ |
| kernel.org: debugfs documentation | https://www.kernel.org/doc/html/latest/filesystems/debugfs.html |
| seq_file interface documentation | https://www.kernel.org/doc/html/latest/filesystems/seq_file.html |
| Device model and sysfs (LWN) | https://lwn.net/Articles/646617/ |
| Writing sysfs attributes correctly | https://www.kernel.org/doc/html/latest/driver-api/driver-model/index.html |