Lab02 Problems Sol

Question 1: argc

Consider the following simple C program:

simple.c

int
main(int argc, char *argv[])
{
    printf("argc = %d\n");
}

If we compile and run this program like this:

$ simple 1 2 3

What is the output?

Solution: The program has a bug: the printf statement has a format specifier %d but doesn’t provide a corresponding argument to be printed. This is undefined behavior in C. The printf function will attempt to access a value from the stack that wasn’t provided, resulting in a random/garbage value being printed.

If the printf statement was correctly written as printf("argc = %d\n", argc);, the output would be argc = 4 (the program name counts as the first argument).

Question 2: cat.c

Consider the original implementation of cat.c found in xv6 shown below. Modify this version of cat to accept a command line option -m cols that will truncate each line to cols columns. That is cols is the maximum line length. Make neat code changes/additions and explain your solution.

cat.c

#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "user/user.h"

char buf[512];

void
cat(int fd)
{
  int n;

  while((n = read(fd, buf, sizeof(buf))) > 0) {
    if (write(1, buf, n) != n) {
      fprintf(2, "cat: write error\n");
      exit(1);
    }
  }
  if(n < 0){
    fprintf(2, "cat: read error\n");
    exit(1);
  }
}

int
main(int argc, char *argv[])
{
  int fd, i;

  if(argc <= 1){
    cat(0);
    exit(0);
  }

  for(i = 1; i < argc; i++){
    if((fd = open(argv[i], O_RDONLY)) < 0){
      fprintf(2, "cat: cannot open %s\n", argv[i]);
      exit(1);
    }
    cat(fd);
    close(fd);
  }
  exit(0);
}

Solution: Here’s the modified version of cat.c that implements the -m cols option:

#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "user/user.h"

char buf[512];
int max_cols = 0;  // Default is unlimited

void
cat(int fd)
{
  int n;
  int col = 0;  // Current column position
  int i;

  while((n = read(fd, buf, sizeof(buf))) > 0) {
    // Process buffer character by character to track columns
    for(i = 0; i < n; i++) {
      // Reset column counter on newline
      if(buf[i] == '\n') {
        col = 0;
        if(write(1, &buf[i], 1) != 1) {
          fprintf(2, "cat: write error\n");
          exit(1);
        }
      } 
      // Only write character if we're within the column limit
      else if(max_cols == 0 || col < max_cols) {
        col++;
        if(write(1, &buf[i], 1) != 1) {
          fprintf(2, "cat: write error\n");
          exit(1);
        }
      }
      // If we've reached the max column and next char isn't newline,
      // artificially add a newline and reset column counter
      else if(max_cols > 0 && col >= max_cols && buf[i] != '\n') {
        char nl = '\n';
        if(write(1, &nl, 1) != 1) {
          fprintf(2, "cat: write error\n");
          exit(1);
        }
        col = 1;  // Reset to 1 because we're writing the current char
        if(write(1, &buf[i], 1) != 1) {
          fprintf(2, "cat: write error\n");
          exit(1);
        }
      }
    }
  }
  if(n < 0){
    fprintf(2, "cat: read error\n");
    exit(1);
  }
}

int
main(int argc, char *argv[])
{
  int fd, i;
  int start_index = 1;

  // Check for -m option
  if(argc > 2 && strcmp(argv[1], "-m") == 0) {
    max_cols = atoi(argv[2]);
    if(max_cols <= 0) {
      fprintf(2, "cat: invalid column value\n");
      exit(1);
    }
    start_index = 3;  // Skip the option and its value
  }

  // If no files specified, read from stdin
  if(start_index >= argc) {
    cat(0);
    exit(0);
  }

  // Process each file
  for(i = start_index; i < argc; i++) {
    if((fd = open(argv[i], O_RDONLY)) < 0) {
      fprintf(2, "cat: cannot open %s\n", argv[i]);
      exit(1);
    }
    cat(fd);
    close(fd);
  }
  exit(0);
}

Explanation of changes:

Added a global max_cols variable that defaults to 0 (unlimited)
Modified the cat() function to process the buffer character by character instead of in chunks
Added column position tracking that resets on newline characters
Only writes characters if they’re within the max column limit
Adds a newline character and resets the column counter when max columns is reached
Added command-line option parsing for -m cols in the main function

Question 3: buggy

What is a problem with this code? Assume foo.txt looks like this:

foo.txt

$ cat foo.txt
This-is-a-test-file.

bad.c

int
main(int argc, char *argv[])
{
  int fd, n;
  char buf[128];

  fd = open("foo.txt", O_RDONLY);
  if (fd < 0)
    exit(-1);
  n = read(fd, buf, 10);
  printf("buf = %s\n", buf);
  close(fd);
}

Solution: The primary issue with this code is that it doesn’t null-terminate the buffer after reading from the file.

When read(fd, buf, 10) is called, it reads 10 bytes from the file into the buffer, but it doesn’t automatically add a null terminator (\0) at the end. Then, when printf("buf = %s\n", buf) is called, the %s format specifier expects a null-terminated string, but since buf doesn’t have a null terminator, printf will continue reading memory beyond the intended buffer until it encounters a random null byte in memory, causing undefined behavior.

The fix would be to add a line after the read call to null-terminate the buffer:

n = read(fd, buf, 10);
buf[n] = '\0';  // Add null terminator
printf("buf = %s\n", buf);

Question 4: wc

Explain what the following code snippet from the wc command does:

if(strchr(" \r\t\n\v", buf[i]))
  inword = 0;
else if(!inword){
  w++;
  inword = 1;
}

Solution: This code snippet implements word counting logic in the wc (word count) utility. Let’s break it down:

strchr(" \r\t\n\v", buf[i]) checks if the current character buf[i] is one of the whitespace characters (space, carriage return, tab, newline, or vertical tab).
If the current character is a whitespace character, inword is set to 0, indicating that we are not currently inside a word.
else if(!inword) checks if we’re not currently inside a word (i.e., inword is 0) AND the current character is not a whitespace (because we’re in the else clause).
If both conditions are true (we’re not in a word and the current character is not whitespace), then:
- w++ increments the word count, as we’ve just entered a new word
- inword = 1 sets the flag to indicate we are now inside a word

This algorithm counts words by detecting transitions from whitespace to non-whitespace characters. Each such transition is counted as the start of a new word. It’s an efficient way to count words in a stream of characters without having to store the entire text in memory.

Question 5: simdiff.c

Write a program called simdiff.c that just determines if two files are the same or not. It works like this:

$ simdiff foo.txt bar.txt
SAME
$ simdiff foo.txt baz.txt
DIFFERENT

Note that two files are different if any corresponding characters in both files are different or if the files are different sizes.

Solution: Here’s an implementation of simdiff.c:

#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "user/user.h"

#define BUFSIZE 512

int
same_files(int fd1, int fd2)
{
  char buf1[BUFSIZE];
  char buf2[BUFSIZE];
  char c1, c2;
  int n1, n2, i1, i2;
  
  // Buffer indices
  i1 = i2 = BUFSIZE;
  
  while(1) {
    // Refill buffer 1 if needed
    if(i1 >= n1) {
      n1 = read(fd1, buf1, BUFSIZE);
      if(n1 < 0) {
        fprintf(2, "simdiff: read error on file 1\n");
        exit(1);
      }
      i1 = 0;
    }
    
    // Refill buffer 2 if needed
    if(i2 >= n2) {
      n2 = read(fd2, buf2, BUFSIZE);
      if(n2 < 0) {
        fprintf(2, "simdiff: read error on file 2\n");
        exit(1);
      }
      i2 = 0;
    }
    
    // If either file reached EOF
    if(n1 == 0 || n2 == 0) {
      // Files are same only if both reached EOF
      return (n1 == 0 && n2 == 0);
    }
    
    // Compare one character at a time
    c1 = buf1[i1++];
    c2 = buf2[i2++];
    
    if(c1 != c2) {
      return 0;  // Characters differ
    }
  }
}

int
main(int argc, char *argv[])
{
  int fd1, fd2;
  
  if(argc != 3) {
    fprintf(2, "Usage: simdiff file1 file2\n");
    exit(1);
  }
  
  if((fd1 = open(argv[1], O_RDONLY)) < 0) {
    fprintf(2, "simdiff: cannot open %s\n", argv[1]);
    exit(1);
  }
  
  if((fd2 = open(argv[2], O_RDONLY)) < 0) {
    fprintf(2, "simdiff: cannot open %s\n", argv[2]);
    close(fd1);
    exit(1);
  }
  
  // Compare files one character at a time
  if(same_files(fd1, fd2))
    printf("SAME\n");
  else
    printf("DIFFERENT\n");
  
  close(fd1);
  close(fd2);
  exit(0);
}

This program:

Opens both files and directly compares them character by character
Uses an efficient buffered reading approach for better performance
Detects differences when one file is longer than the other
Returns “DIFFERENT” if any corresponding characters differ or if one file has more content
Returns “SAME” only if all characters match and both files reach EOF at the same time
Properly handles errors and resources (closing file descriptors)