/*
* Problem
* =======
*
* Write an _efficient_ program that reads an ascii file in, and prints
* out all the words in the file in frequency order (most frequent
* first). Each output line should be of the following form:
*
* > <word> <count>
*
* You have 1 hour.
*
*
* Comments
* ========
*
* An example input file 'alice-in-wonderland.txt' has been provided
* along with 10 lines of example output ('alice-in-wonderland.output').
*
* The problem is deliberately vague; do not ask for clarifications.
* Instead do something sensible.
*
* This will be marked by reading your code, as well as running it on
* some test cases.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
struct element
{
int count;
char *word;
struct element *next;
};
// modified this from http://lernc.blogspot.com/2008/12/bubble-sort-linked-list.html
/* preform a bubble sort on the linked list */
struct element *bubble_sort(struct element *head) {
struct element *a = NULL;
struct element *b = NULL;
struct element *c = NULL;
struct element *e = NULL;
struct element *tmp = NULL;
/*
// the `c' node precedes the `a' and `e' node
// pointing up the node to which the comparisons
// are being made.
*/
while(e != head->next)
{
c = a = head;
b = a->next;
while(a != e)
{
if(a->count < b->count)
{
if(a == head)
{
tmp = b -> next;
b->next = a;
a->next = tmp;
head = b;
c = b;
} else {
tmp = b->next;
b->next = a;
a->next = tmp;
c->next = b;
c = b;
}
} else
{
c = a;
a = a->next;
}
b = a->next;
if(b == e)
e = a;
}
}
return head;
}
struct element *make_new_node(const char *word)
{
struct element *new_node = (struct element *)malloc(sizeof(struct element));
new_node->word = (char *)malloc(strlen(word)+1);
strcpy(new_node->word, word);
new_node->next = NULL;
new_node->count = 0;
return new_node;
}
struct element *initialize_list()
{
struct element *element;
struct element *head;
char c;
element = make_new_node("a");
if(element == NULL)
{
return NULL;
}
head = element;
for(c = 'b'; c<='z'; ++c)
{
char word[2];
sprintf(word, "%c", c);
element->next = make_new_node(word);
if(element->next == NULL)
{
return NULL;
}
element = element->next;
}
return head;
}
int error()
{
printf("out of memory\n");
return -1;
}
void print_list(struct element *list)
{
while(list)
{
printf("%s %d\n", list->word, list->count);
list = list->next;
}
}
void initialize_index(struct element **index, struct element *list)
{
int i = 0;
while(list)
{
index[i] = list;
++i;
list = list->next;
}
if(i != 26)
{
printf("problems\n");
}
}
void clean_up(char* str)
{
char ch;
int i;
int ii = strlen(str);
int write_index = 0;
char word[ii+1];
for (i=0; i <ii;i++)
{
strncpy(&ch,str+i,1);
if(isalpha(ch))
{
if (ch>='A' && ch<='Z')
{
ch = tolower(ch);
}
memcpy(word+write_index,&ch,1);
write_index++;
word[write_index]='\0';
}
}
strcpy(str, word);
}
struct element *get_indexed_element(struct element **index, const char *word)
{
return index[word[0]-'a'];
}
int main()
{
struct element *index[26];
struct element *head = initialize_list();
if(head == NULL)
{
return error();
}
initialize_index(index, head);
// read book and insert elements
char word[256];
FILE *f;
f=fopen("alice.txt","r");
while (!feof(f))
{
fscanf(f,"%s",word);
// make it all lowercase
// remove whitespace and punctuation
clean_up(word);
// add it to the running total
struct element *elm = get_indexed_element(index, word);
while(elm->next && (strcmp(elm->next->word, word) != 1))
{
elm = elm->next;
}
if(strcmp(elm->word, word) == 0)
{
elm->count++;
}
else
{
struct element *new_node = make_new_node(word);
if(new_node == NULL)
{
return error();
}
new_node->next = elm->next;
new_node->count++;
elm->next = new_node;
}
}
// sort list
head = bubble_sort(head);
print_list(head);
// delete memory
while(head)
{
struct element *elm = head;
head=head->next;
free(elm->word);
free(elm);
}
fclose(f);
return 0;
}
This blog is set up as a portfolio of the projects that I've worked on over the past 10 years. A lot of them I no longer have the dates when I originally worked on them, but I included them anyway. It's mostly so I can share them with others, so if there's a better way to share it, let me know.
Monday, August 8, 2011
Interview question from an OS vendor
This question was given to me by a friend that was interviewing with an OS vendor. I couldn't resist a friendly competition, so I took a shot. This was only one hour of work, and it didn't get the exact answer as I didn't handle the word wraps correctly (waistcoat-pocket was counted as waistcoatpocket). Test file and test output.