Monday, August 8, 2011

Interview question from an OS vendor

This question was given to me by a friend that was interviewing with an OS vendor. I couldn't resist a friendly competition, so I took a shot. This was only one hour of work, and it didn't get the exact answer as I didn't handle the word wraps correctly (waistcoat-pocket was counted as waistcoatpocket). Test file and test output.







/*
* Problem
* =======
*
* Write an _efficient_ program that reads an ascii file in, and prints
* out all the words in the file in frequency order (most frequent
* first). Each output line should be of the following form:
*
* > <word> <count>
*
* You have 1 hour.
*
*
* Comments
* ========
*
* An example input file 'alice-in-wonderland.txt' has been provided
* along with 10 lines of example output ('alice-in-wonderland.output').
*
* The problem is deliberately vague; do not ask for clarifications.
* Instead do something sensible.
*
* This will be marked by reading your code, as well as running it on
* some test cases.
*/



#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

struct element
{
int count;
char *word;
struct element *next;
};


// modified this from http://lernc.blogspot.com/2008/12/bubble-sort-linked-list.html
/* preform a bubble sort on the linked list */
struct element *bubble_sort(struct element *head) {
struct element *a = NULL;
struct element *b = NULL;
struct element *c = NULL;
struct element *e = NULL;
struct element *tmp = NULL;

/*
// the `c' node precedes the `a' and `e' node
// pointing up the node to which the comparisons
// are being made.
*/

while(e != head->next)
{
c = a = head;
b = a->next;
while(a != e)
{
if(a->count < b->count)
{
if(a == head)
{
tmp = b -> next;
b->next = a;
a->next = tmp;
head = b;
c = b;
} else {
tmp = b->next;
b->next = a;
a->next = tmp;
c->next = b;
c = b;
}
} else
{
c = a;
a = a->next;
}
b = a->next;
if(b == e)
e = a;
}
}

return head;
}

struct element *make_new_node(const char *word)
{
struct element *new_node = (struct element *)malloc(sizeof(struct element));

new_node->word = (char *)malloc(strlen(word)+1);
strcpy(new_node->word, word);

new_node->next = NULL;
new_node->count = 0;

return new_node;
}

struct element *initialize_list()
{
struct element *element;
struct element *head;
char c;

element = make_new_node("a");
if(element == NULL)
{
return NULL;
}

head = element;

for(c = 'b'; c<='z'; ++c)
{
char word[2];
sprintf(word, "%c", c);
element->next = make_new_node(word);
if(element->next == NULL)
{
return NULL;
}

element = element->next;
}

return head;
}

int error()
{
printf("out of memory\n");
return -1;
}

void print_list(struct element *list)
{
while(list)
{
printf("%s %d\n", list->word, list->count);
list = list->next;
}
}

void initialize_index(struct element **index, struct element *list)
{
int i = 0;

while(list)
{
index[i] = list;
++i;
list = list->next;
}

if(i != 26)
{
printf("problems\n");
}
}

void clean_up(char* str)
{
char ch;
int i;
int ii = strlen(str);
int write_index = 0;
char word[ii+1];
for (i=0; i <ii;i++)
{
strncpy(&ch,str+i,1);
if(isalpha(ch))
{
if (ch>='A' && ch<='Z')
{
ch = tolower(ch);
}
memcpy(word+write_index,&ch,1);
write_index++;
word[write_index]='\0';
}
}

strcpy(str, word);
}


struct element *get_indexed_element(struct element **index, const char *word)
{
return index[word[0]-'a'];
}

int main()
{
struct element *index[26];
struct element *head = initialize_list();

if(head == NULL)
{
return error();
}

initialize_index(index, head);

// read book and insert elements
char word[256];
FILE *f;

f=fopen("alice.txt","r");
while (!feof(f))
{
fscanf(f,"%s",word);

// make it all lowercase
// remove whitespace and punctuation
clean_up(word);

// add it to the running total
struct element *elm = get_indexed_element(index, word);

while(elm->next && (strcmp(elm->next->word, word) != 1))
{
elm = elm->next;
}

if(strcmp(elm->word, word) == 0)
{
elm->count++;
}
else
{
struct element *new_node = make_new_node(word);
if(new_node == NULL)
{
return error();
}
new_node->next = elm->next;

new_node->count++;
elm->next = new_node;
}

}
// sort list
head = bubble_sort(head);

print_list(head);

// delete memory

while(head)
{
struct element *elm = head;
head=head->next;

free(elm->word);
free(elm);
}

fclose(f);

return 0;
}




No comments:

Post a Comment