Question 1: argc
Consider the following simple C program:
simple.c
int
main(int argc, char *argv[])
{
printf("argc = %d\n");
}
If we compile and run this program like this:
$ simple 1 2 3
What is the output?
Solution: The program has a bug: the printf statement has a format specifier %d
but doesn’t provide a corresponding argument to be printed. This is undefined behavior in C. The printf function will attempt to access a value from the stack that wasn’t provided, resulting in a random/garbage value being printed.
If the printf statement was correctly written as printf("argc = %d\n", argc);
, the output would be argc = 4
(the program name counts as the first argument).
Question 2: cat.c
Consider the original implementation of cat.c
found in xv6 shown below. Modify this version of cat to accept a command line option -m cols
that will truncate each line to cols
columns. That is cols
is the maximum line length. Make neat code changes/additions and explain your solution.
cat.c
#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "user/user.h"
char buf[512];
void
cat(int fd)
{
int n;
while((n = read(fd, buf, sizeof(buf))) > 0) {
if (write(1, buf, n) != n) {
fprintf(2, "cat: write error\n");
exit(1);
}
}
if(n < 0){
fprintf(2, "cat: read error\n");
exit(1);
}
}
int
main(int argc, char *argv[])
{
int fd, i;
if(argc <= 1){
cat(0);
exit(0);
}
for(i = 1; i < argc; i++){
if((fd = open(argv[i], O_RDONLY)) < 0){
fprintf(2, "cat: cannot open %s\n", argv[i]);
exit(1);
}
cat(fd);
close(fd);
}
exit(0);
}
Solution: Here’s the modified version of cat.c that implements the -m cols
option:
#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "user/user.h"
char buf[512];
int max_cols = 0; // Default is unlimited
void
cat(int fd)
{
int n;
int col = 0; // Current column position
int i;
while((n = read(fd, buf, sizeof(buf))) > 0) {
// Process buffer character by character to track columns
for(i = 0; i < n; i++) {
// Reset column counter on newline
if(buf[i] == '\n') {
col = 0;
if(write(1, &buf[i], 1) != 1) {
fprintf(2, "cat: write error\n");
exit(1);
}
}
// Only write character if we're within the column limit
else if(max_cols == 0 || col < max_cols) {
col++;
if(write(1, &buf[i], 1) != 1) {
fprintf(2, "cat: write error\n");
exit(1);
}
}
// If we've reached the max column and next char isn't newline,
// artificially add a newline and reset column counter
else if(max_cols > 0 && col >= max_cols && buf[i] != '\n') {
char nl = '\n';
if(write(1, &nl, 1) != 1) {
fprintf(2, "cat: write error\n");
exit(1);
}
col = 1; // Reset to 1 because we're writing the current char
if(write(1, &buf[i], 1) != 1) {
fprintf(2, "cat: write error\n");
exit(1);
}
}
}
}
if(n < 0){
fprintf(2, "cat: read error\n");
exit(1);
}
}
int
main(int argc, char *argv[])
{
int fd, i;
int start_index = 1;
// Check for -m option
if(argc > 2 && strcmp(argv[1], "-m") == 0) {
max_cols = atoi(argv[2]);
if(max_cols <= 0) {
fprintf(2, "cat: invalid column value\n");
exit(1);
}
start_index = 3; // Skip the option and its value
}
// If no files specified, read from stdin
if(start_index >= argc) {
cat(0);
exit(0);
}
// Process each file
for(i = start_index; i < argc; i++) {
if((fd = open(argv[i], O_RDONLY)) < 0) {
fprintf(2, "cat: cannot open %s\n", argv[i]);
exit(1);
}
cat(fd);
close(fd);
}
exit(0);
}
Explanation of changes:
- Added a global
max_cols
variable that defaults to 0 (unlimited) - Modified the
cat()
function to process the buffer character by character instead of in chunks - Added column position tracking that resets on newline characters
- Only writes characters if they’re within the max column limit
- Adds a newline character and resets the column counter when max columns is reached
- Added command-line option parsing for
-m cols
in the main function
Question 3: buggy
What is a problem with this code? Assume foo.txt
looks like this:
foo.txt
$ cat foo.txt
This-is-a-test-file.
bad.c
int
main(int argc, char *argv[])
{
int fd, n;
char buf[128];
fd = open("foo.txt", O_RDONLY);
if (fd < 0)
exit(-1);
n = read(fd, buf, 10);
printf("buf = %s\n", buf);
close(fd);
}
Solution: The primary issue with this code is that it doesn’t null-terminate the buffer after reading from the file.
When read(fd, buf, 10)
is called, it reads 10 bytes from the file into the buffer, but it doesn’t automatically add a null terminator (\0
) at the end. Then, when printf("buf = %s\n", buf)
is called, the %s
format specifier expects a null-terminated string, but since buf
doesn’t have a null terminator, printf
will continue reading memory beyond the intended buffer until it encounters a random null byte in memory, causing undefined behavior.
The fix would be to add a line after the read call to null-terminate the buffer:
n = read(fd, buf, 10);
buf[n] = '\0'; // Add null terminator
printf("buf = %s\n", buf);
Question 4: wc
Explain what the following code snippet from the wc
command does:
if(strchr(" \r\t\n\v", buf[i]))
inword = 0;
else if(!inword){
w++;
inword = 1;
}
Solution: This code snippet implements word counting logic in the wc
(word count) utility. Let’s break it down:
strchr(" \r\t\n\v", buf[i])
checks if the current characterbuf[i]
is one of the whitespace characters (space, carriage return, tab, newline, or vertical tab).If the current character is a whitespace character,
inword
is set to 0, indicating that we are not currently inside a word.else if(!inword)
checks if we’re not currently inside a word (i.e.,inword
is 0) AND the current character is not a whitespace (because we’re in the else clause).If both conditions are true (we’re not in a word and the current character is not whitespace), then:
w++
increments the word count, as we’ve just entered a new wordinword = 1
sets the flag to indicate we are now inside a word
This algorithm counts words by detecting transitions from whitespace to non-whitespace characters. Each such transition is counted as the start of a new word. It’s an efficient way to count words in a stream of characters without having to store the entire text in memory.
Question 5: simdiff.c
Write a program called simdiff.c
that just determines if two files are the same or not. It works like this:
$ simdiff foo.txt bar.txt
SAME
$ simdiff foo.txt baz.txt
DIFFERENT
Note that two files are different if any corresponding characters in both files are different or if the files are different sizes.
Solution: Here’s an implementation of simdiff.c:
#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "user/user.h"
#define BUFSIZE 512
int
same_files(int fd1, int fd2)
{
char buf1[BUFSIZE];
char buf2[BUFSIZE];
char c1, c2;
int n1, n2, i1, i2;
// Buffer indices
i1 = i2 = BUFSIZE;
while(1) {
// Refill buffer 1 if needed
if(i1 >= n1) {
n1 = read(fd1, buf1, BUFSIZE);
if(n1 < 0) {
fprintf(2, "simdiff: read error on file 1\n");
exit(1);
}
i1 = 0;
}
// Refill buffer 2 if needed
if(i2 >= n2) {
n2 = read(fd2, buf2, BUFSIZE);
if(n2 < 0) {
fprintf(2, "simdiff: read error on file 2\n");
exit(1);
}
i2 = 0;
}
// If either file reached EOF
if(n1 == 0 || n2 == 0) {
// Files are same only if both reached EOF
return (n1 == 0 && n2 == 0);
}
// Compare one character at a time
c1 = buf1[i1++];
c2 = buf2[i2++];
if(c1 != c2) {
return 0; // Characters differ
}
}
}
int
main(int argc, char *argv[])
{
int fd1, fd2;
if(argc != 3) {
fprintf(2, "Usage: simdiff file1 file2\n");
exit(1);
}
if((fd1 = open(argv[1], O_RDONLY)) < 0) {
fprintf(2, "simdiff: cannot open %s\n", argv[1]);
exit(1);
}
if((fd2 = open(argv[2], O_RDONLY)) < 0) {
fprintf(2, "simdiff: cannot open %s\n", argv[2]);
close(fd1);
exit(1);
}
// Compare files one character at a time
if(same_files(fd1, fd2))
printf("SAME\n");
else
printf("DIFFERENT\n");
close(fd1);
close(fd2);
exit(0);
}
This program:
- Opens both files and directly compares them character by character
- Uses an efficient buffered reading approach for better performance
- Detects differences when one file is longer than the other
- Returns “DIFFERENT” if any corresponding characters differ or if one file has more content
- Returns “SAME” only if all characters match and both files reach EOF at the same time
- Properly handles errors and resources (closing file descriptors)