
/*
 *
 * Probe's multiple link eliminator for Matt's free for all links page
 * Version 1.0
 * Usama Wazeer (usamaw@cs.utexas.edu)
 * URL: http://www.cs.utexas.edu/users/usamaw
 *
 * #  Define the Output and Input files below..
 *    DONT define the same file for both this will
 *    delete your file and you will lose all data.
 * #  Compile this using your favorite C compiler..
 *    ex: gcc -o urlchk urlchk.c
 * #  and then just run: urlchk
 *
 * The program will display the line number and entry for each
 * link that is repeated and create the output file.
 *
 * Feel free to copy or change this program in any way,
 * as long as you give me credit. :)
 *
 */

/******** DEFINE THESE TWO VARIABLES ********/

#define INPUT_FILE "/u/usamaw/www/links.html"
#define OUTPUT_FILE "/u/usamaw/www/links.html.out"

/********************************************/

#include <stdio.h>
#include <string.h>

typedef struct URLType
{
  char url[150];
  struct URLType *next;
} URLStruct;

URLStruct *URLList;

#ifndef NULL
#define NULL(type) (type)0;
#endif

URLStruct *init_urllist( void )
{
  URLStruct *l_list;
  l_list = (URLStruct *) malloc (sizeof(l_list));
  l_list = NULL;
  return (l_list);
}

URLStruct *find_url(char *userhost)
{
  URLStruct *User;

  if (!userhost)
    return NULL;

  for( User = URLList; User; User = User->next )
    if( !strcasecmp( User->url, userhost ) )
      return(User);
  return(NULL);
}

int  readln_from_a_file( FILE *stream, char *lin)
{
        char *p;

        do
                p = fgets( lin, 1000, stream );
        while( ( p != NULL ) && ( *lin == '#') );

        if( p == NULL )
                return( 0 );
        if (strchr(lin, '\n'))
          *strchr(lin, '\n') = '\0';
        if (strchr(lin, '\r'))
          *strchr(lin, '\r') = '\0';
        return( 1 );
}

char *furl( char *userhost )
{
  URLStruct   *dummy;

  if( (dummy = find_url(userhost)) != NULL )
    return (dummy->url);
  return(NULL);
}

int add_to_urllist( char *url)
{
  URLStruct *New_user;
  char buffer[200];

  if( (New_user = find_url(url)) != NULL )
    return 0;
  if( (New_user = (URLStruct *) malloc (sizeof(*New_user))) == NULL)
    return 0;

  strcpy(New_user->url, url);
  New_user->next = URLList;
  URLList = New_user;
  return 1;
}

int checkurl(char *url2)
{
    char  *url;
    url = strtok(url2, ">");

    if(furl(url))
      return 0;
    else
      {
        add_to_urllist(url);
        return 1;
      }
  }
int  read_urllist( char *filename, char *filename2 )
{
  FILE  *fp;
  int i = 0;
  char  lin[2000];
  char  url2[200];
  char  rest[2000];
  URLStruct *dummy;
  FILE *list_file;

  if( ( fp = fopen( filename, "r" ) ) == NULL )
    return 0;

  if( ( list_file = fopen( filename2, "w" ) ) == NULL )
    return 0;

  for( dummy = URLList; dummy; dummy = dummy->next )
    free(dummy);

  URLList = init_urllist();

  while( readln_from_a_file( fp, lin) )
    {
      i++;
      strcpy(url2, "");
      sscanf(lin, "<li><a href=%s %s</a>\n", url2, rest);
      if(!*url2)
          fprintf( list_file, "%s\n", lin);
      else
        {
          if(checkurl(url2))
            fprintf( list_file, "%s\n", lin);
          else
            printf("%-4i Entry: %s \n", i, lin);
        }
    }
  fclose( fp );
  fclose( list_file );
  return( 1 );
}

int main()
{
  printf("Here we go.... \n\n");
  read_urllist(INPUT_FILE, OUTPUT_FILE);
  printf("\n\nAll done!!!!!\n");
}



