File Descriptors and Basic I/O
{:.gc-basic}
Basic
Every open file, socket, pipe, or device in Linux is represented by an integer file descriptor (fd). The kernel maintains a per-process table of open file descriptions.
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
// Standard file descriptors
// STDIN_FILENO = 0
// STDOUT_FILENO = 1
// STDERR_FILENO = 2
// open() — returns fd or -1 on error (check errno)
int fd = open("/dev/ttyS0", O_RDWR | O_NOCTTY | O_NDELAY);
if (fd == -1) {
perror("open");
exit(EXIT_FAILURE);
}
// read() — returns bytes read, 0 on EOF, -1 on error
uint8_t buf[256];
ssize_t n = read(fd, buf, sizeof(buf));
// write() — returns bytes written, -1 on error
const char* msg = "Hello\r\n";
ssize_t w = write(fd, msg, strlen(msg));
// close() — always close when done!
close(fd);
Important open() Flags
| Flag | Meaning |
|---|---|
O_RDONLY |
Read only |
O_WRONLY |
Write only |
O_RDWR |
Read + write |
O_CREAT |
Create file if it doesn’t exist |
O_TRUNC |
Truncate file to zero length |
O_APPEND |
All writes go to end of file |
O_NONBLOCK |
Non-blocking mode |
O_SYNC |
Sync writes to disk before returning |
O_CLOEXEC |
Close fd on exec() (avoid fd leaks) |
// Create or overwrite a file with mode 0644
int fd = open("output.bin", O_WRONLY | O_CREAT | O_TRUNC, 0644);
Robust Read/Write Loops
// read() may return fewer bytes than requested (partial read)!
// Always loop until all bytes are read.
ssize_t read_all(int fd, void* buf, size_t count) {
size_t total = 0;
uint8_t* p = (uint8_t*)buf;
while (total < count) {
ssize_t n = read(fd, p + total, count - total);
if (n == 0) return total; // EOF
if (n == -1) {
if (errno == EINTR) continue; // interrupted by signal
return -1; // real error
}
total += n;
}
return total;
}
ssize_t write_all(int fd, const void* buf, size_t count) {
size_t total = 0;
const uint8_t* p = (const uint8_t*)buf;
while (total < count) {
ssize_t n = write(fd, p + total, count - total);
if (n == -1) {
if (errno == EINTR) continue;
return -1;
}
total += n;
}
return total;
}
lseek, File Positions, and ioctl
{:.gc-mid}
Intermediate
#include <sys/ioctl.h>
// lseek — reposition file offset
off_t pos = lseek(fd, 0, SEEK_SET); // go to start
off_t cur = lseek(fd, 0, SEEK_CUR); // current position
off_t end = lseek(fd, 0, SEEK_END); // file size
lseek(fd, 100, SEEK_SET); // go to byte 100
lseek(fd, -4, SEEK_CUR); // go back 4 bytes from current
// ioctl — device-specific control
#include <linux/serial.h>
struct serial_struct ss;
ioctl(fd, TIOCGSERIAL, &ss); // get serial info
ss.flags |= ASYNC_LOW_LATENCY;
ioctl(fd, TIOCSSERIAL, &ss); // set serial info
// Terminal I/O (tty)
#include <termios.h>
struct termios tty;
tcgetattr(fd, &tty);
cfsetispeed(&tty, B115200);
cfsetospeed(&tty, B115200);
tty.c_cflag &= ~PARENB; // no parity
tty.c_cflag &= ~CSTOPB; // 1 stop bit
tty.c_cflag |= CS8; // 8 data bits
tty.c_cflag |= CREAD | CLOCAL;
tcsetattr(fd, TCSANOW, &tty);
// Get number of bytes waiting in serial receive buffer
int bytes_waiting;
ioctl(fd, FIONREAD, &bytes_waiting);
Memory-Mapped I/O (mmap)
{:.gc-adv}
Advanced
mmap maps a file (or device register space) directly into the process’s virtual address space. Reads and writes go to/from the kernel’s page cache — no read()/write() syscalls needed.
#include <sys/mman.h>
// Map a file into memory (read-only)
int fd = open("large_data.bin", O_RDONLY);
struct stat sb;
fstat(fd, &sb);
uint8_t* data = mmap(NULL, sb.st_size,
PROT_READ,
MAP_PRIVATE, fd, 0);
if (data == MAP_FAILED) { perror("mmap"); exit(1); }
close(fd); // fd can be closed after mmap
// Access file contents as array — zero copies!
process_data(data, sb.st_size);
munmap(data, sb.st_size);
mmap for Hardware Registers (Embedded Linux)
#include <sys/mman.h>
#define GPIO_BASE 0x3F200000 // Raspberry Pi GPIO base
#define BLOCK_SIZE 4096
int mem_fd = open("/dev/mem", O_RDWR | O_SYNC);
volatile uint32_t* gpio = mmap(NULL, BLOCK_SIZE,
PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, GPIO_BASE);
if (gpio == MAP_FAILED) {
perror("mmap"); exit(1);
}
// Direct register access — no syscall overhead!
gpio[0] = (gpio[0] & ~(7 << 21)) | (1 << 21); // GPIO 17 output
gpio[7] = (1 << 17); // GPIO_SET: pin 17 high
gpio[10] = (1 << 17); // GPIO_CLR: pin 17 low
mmap Flags
| Flag | Meaning |
|---|---|
MAP_PRIVATE |
Copy-on-write; changes not visible to other processes |
MAP_SHARED |
Changes visible to other processes sharing the mapping |
MAP_ANONYMOUS |
Not backed by a file (use for shared memory between fork’d processes) |
PROT_READ |
Pages can be read |
PROT_WRITE |
Pages can be written |
PROT_EXEC |
Pages can be executed |
I/O Multiplexing: select, poll, epoll
{:.gc-adv}
Watch multiple file descriptors simultaneously — without blocking on any single one.
poll — Simpler than select
#include <poll.h>
int fd1 = open("/dev/ttyS0", O_RDWR | O_NONBLOCK);
int fd2 = open("/dev/ttyS1", O_RDWR | O_NONBLOCK);
struct pollfd fds[2] = {
{ .fd = fd1, .events = POLLIN },
{ .fd = fd2, .events = POLLIN },
};
while (1) {
int ret = poll(fds, 2, 1000); // timeout: 1000ms
if (ret == -1) { perror("poll"); break; }
if (ret == 0) { puts("timeout"); continue; }
if (fds[0].revents & POLLIN) {
uint8_t buf[64];
ssize_t n = read(fd1, buf, sizeof(buf));
handle_data(buf, n);
}
if (fds[1].revents & POLLIN) {
// read fd2
}
}
epoll — Scalable for Many File Descriptors
#include <sys/epoll.h>
int epfd = epoll_create1(0); // create epoll instance
// Add file descriptors to watch
struct epoll_event ev = { .events = EPOLLIN, .data.fd = fd1 };
epoll_ctl(epfd, EPOLL_CTL_ADD, fd1, &ev);
ev.data.fd = fd2;
epoll_ctl(epfd, EPOLL_CTL_ADD, fd2, &ev);
struct epoll_event events[10];
while (1) {
int n = epoll_wait(epfd, events, 10, -1); // -1 = wait forever
for (int i = 0; i < n; i++) {
if (events[i].events & EPOLLIN) {
int ready_fd = events[i].data.fd;
uint8_t buf[256];
read(ready_fd, buf, sizeof(buf));
// process...
}
}
}
close(epfd);
poll vs epoll:
poll |
epoll |
|
|---|---|---|
| FD limit | System limit | Thousands |
| Scan per call | O(n) — scans all FDs | O(ready) — only ready FDs |
| Use case | Few FDs (< ~100) | Many FDs (servers, daemons) |
| API | Simple | More complex (create/ctl/wait) |
Interview Q&A
{:.gc-iq}
Interview Q&A
Q1 — Basic: What is a file descriptor and what does O_CLOEXEC do?
A file descriptor is a small non-negative integer that is an index into the kernel’s per-process open file table.
O_CLOEXECmarks the fd to be automatically closed whenexec()is called. Without it, open fds are inherited byexec()’d child processes — a security risk and resource leak. Best practice: always useO_CLOEXECwhen opening fds you don’t intend to pass to child processes.
Q2 — Basic: Why must read() and write() be called in a loop?
POSIX allows
read()andwrite()to transfer fewer bytes than requested — a partial read/write. This can happen due to signals (EINTR), kernel buffer limits, or available data. Aread()of 1000 bytes on a socket may return 400. If you don’t loop until all bytes are transferred, your application will silently process incomplete data. Always wrap I/O in a loop that checks the return value and accounts for bothEINTRand partial transfers.
Q3 — Intermediate: When would you use mmap instead of read()/write()?
mmapis ideal when: (a) processing large files where you want random access without seeking, (b) sharing memory between processes (MAP_SHARED), (c) mapping hardware register space in embedded Linux drivers (/dev/mem), or (d) zero-copy file-to-network transfers withsendfile.read()/write()is better for sequential streaming I/O, small amounts of data, and when you need explicit control over buffering.mmapavoids double-copying (kernel buffer → user buffer) but has overhead from page faults on first access.
Q4 — Advanced: Explain the difference between poll and epoll, and when you’d choose epoll.
Both monitor file descriptors for events.
polltakes the entire list of watched FDs on every call — it runs in O(n) where n is the number of FDs, regardless of how many are ready.epollmaintains a kernel-side interest list; you add/remove FDs withepoll_ctl.epoll_waitonly returns the ready FDs — O(ready events), not O(all FDs). For a server with 10,000 connections but only 10 active at a time,pollscans all 10,000 whileepollreturns the 10. Chooseepollfor high-connection-count servers;pollfor simple programs with a handful of FDs.
References
{:.gc-ref}
References
| Resource | Link |
|---|---|
man 2 open |
POSIX open manual |
man 2 mmap |
mmap manual |
man 7 epoll |
epoll API overview |
| The Linux Programming Interface (TLPI) | M. Kerrisk — definitive POSIX reference |
man 7 signal |
Signal handling reference |