/*
 *========================================================================
 * $Id: get_proc_pids.c 163 2005-09-14 22:54:20Z rgb $
 *
 * See copyright in copyright.h and the accompanying file COPYING
 *========================================================================
 */

#include "xmlsysd.h"

/*
 * This routine zips through /proc/(pid)/* and extracts things of
 * interest.  It is derived from code in fs/proc/array.c in the
 * kernel source, from code in procps (top.c, vmstat.c, proc/readproc.c,
 * proc/status.c) and from serious experimentation.
 *
 * get_proc_pids() is controlled by a number of parameters in the daemon
 * control struct, dctl.  dctl.pids determines whether or not it is
 * called at all (one level up).  dctl.running determines whether
 * only running processes (instead of all processes that match the
 * other control parameters) are extracted.  dctl.root controls whether
 * or not it displays root processes.  Using just these controls one can
 * easily display all running, non-root-owned processes (one group of
 * interest).
 *
 * For finer control, one must use dctl.userlist and/or dctl.tasklist.
 * These are linked lists of usernames and tasknames.  If they are empty,
 * only the controls above are used.  If they are non-empty, dctl.pids
 * and dctl.running still work, but only tasks that >>match<< a username
 * and/or taskname in the lists will be displayed.  This supercedes the
 * use of the dctl.root flag -- if root is in the userlist it will be
 * displayed even if root display is turned off by the flag.  Note that
 * user and task list presence is processed in the most restrictive way,
 * with an implicit "and" operator.  Only tasks that belong to a listed
 * user AND have a listed taskname will be displayed, if both lists are
 * non-empty.
 *
 * The expected use of these is to permit the monitoring of a selected
 * group of users (ones with permission to run cluster tasks on a node),
 * a selected group of tasks (tasks being run in a distributed way on 
 * a user workstation, for example, where local user tasks should be
 * ignored) or both.  If they prove to be inadequate for task monitoring
 * and control, additional controls can easily be added.
 */

void get_proc_pids(xmlNodePtr pids)
{

 int i,ifld,do_pid,running;
 char *lfptr;
 struct timeval tv;
 struct passwd *passwd;
 xmlNodePtr pid,pidname;

 /* needed to loop through all possible pid directories */
 static struct direct *ent;
 int fd;
 DIR *procdir;
 char *tmp;
 ListElement *element;

 /* 
  * These are the four things we extract to send back.  We may or
  * may not process them.  On the face of it we should just send
  * stat, statm and cmdline, and abstract certain bits of information
  * from status (which appears a bit redundant and non-compact).
  */
 static struct stat pidstat;
 static char cmdline[K];
 static char stat_buf[K];
 static char statm_buf[K];
 static char name[32];
 static char state[32];
 static int nice,uid,time,spid;
 static long int utime,stime;
 static long unsigned vsize,rss;

 if((verbose == D_ALL) || (verbose == D_PIDS)){
   printf("D_PIDS: Starting get_proc_pids().  Use -v %d to focus.\n",D_PIDS);
 }

 /* 
  * We have to search all the directories in /proc for ones that
  * are pid's.  So first we open /proc as a directory.
  */
 if ((procdir = opendir("/proc"))==NULL) {
   fprintf(stderr,"Error: directory /proc cannot be opened");
   exit(1);
 }

 /* 
  * loop through every directory in proc, counting ones that are
  * pids
  */
 while( ent=readdir(procdir) ) {

   /* 
    * only use those that start with a digit, presumed PIDs.  If this
    * routine ends up being costly in time, we can probably do better
    * than this if we maintain a table of processes we DON'T care about.
    * 
    * Things like top and ps tend to build tables, but I'm going to avoid
    * that as it is too costly.  Instead we'll only return "interesting"
    * pid information as noted above.
    *
    * We are going to try to minimize the time and computational effort
    * required to generate our return.  We'll open the stat file as a
    * file descriptor, not a file stream.  That way, we can use fstat
    * to return the (presumably already generated) stat struct for
    * the open file instead of stat-ing it a second time or opening
    * e.g. /proc/PID/status to extract the user and group name from
    * data.  We can then match usernames or exclude root before we even
    * read a line from stat, and can then further match on the basis of
    * state (running, sleeping) and task name.  If the PID matches these
    * criteria at this point, we'll initiate the send.
    */
   if(ent->d_name[0] >= '0' && ent->d_name[0] <= '9'){

     /* This is the summary stat file for the given PID */
     sprintf(pidpath, "/proc/%s/stat", ent->d_name);
     /* If we can't open it, we don't want to open it */
     fd = open(pidpath,O_RDONLY);
     fstat(fd,&pidstat);	/* to check ownership and so forth */
     if((verbose == D_ALL) || (verbose == D_PIDS)){
       printf("D_PIDS: Checking process %s\n",ent->d_name);
       printf("D_PIDS: Owner uid = %d, owner gid = %d\n",pidstat.st_uid,pidstat.st_gid);
     }

     /*
      * At this point we only do the following if certain conditions
      * are satisfied.  Those conditions are controlled by a variety of
      * of flags in dctl, as well as the uidlist and tasklist there.  We
      * check each one until we set do_pid to 1 (yes) or conclude that
      * the pid doesn't satisfy the criteria for transmitting.
      */
     do_pid = 0;	/* do NOT send */
     running = 0;	/* set if the task is running */

     /*
      * The following is basically how one walks the linked list to check
      * each list member.  We start by comparing the pid's uid (pidstat.st_uid)
      * to each uid in the list, quitting as soon as we get a hit.
      */
     ifld = 0;
     element = dctl.uidlist->head;
     while (element != NULL) {
       if(  pidstat.st_uid == atoi(element->data) ) {
         if((verbose == D_ALL) || (verbose == D_PIDS)){
           printf("D_PIDS: Matched task uid! set do_pid for pid = %s\n",ent->d_name);
	 }
         do_pid = 1;
         break;
       }
        element = element->next;
        ifld++;
     }

     /*
      * We don't bother to check wither do_pid is set yet, since we
      * HAVE to read stat regardless to check the name of the command
      * and its run/sleep status (either of which could still trigger
      * an independent decision to send later on).
      *
      * We're going presume that read is sufficiently atomic within
      * proc that it will definitely return the single line in one call.
      * If we wore suspenders AND a belt we'd check for a terminating
      * LF or read again and verify that we've reached EOF.
      */
     bzero(stat_buf,K-1);
     if(read(fd,stat_buf,K-1) >= 0){

       /* 
        * We're going to do this slightly differently than stat2proc()
        * does it, because their methodology no longer seems relevant.
        */
       if((verbose == D_ALL) || (verbose == D_PIDS)){
         printf("D_PIDS: Parsing stat:\n%s\n",stat_buf);
       }

       /* 
        * parse beginning after trailing ') ' in split.
        * Only a very few "interesting" fields are pulled.
        */
       sscanf(stat_buf,
           "%d %s %s %*d %*d %*d %*d %*d %*lu %*lu %*lu %*lu %*lu %ld %ld %*ld %*ld %*d %d %*lu %*lu %*ld %lu %lu %*lu %*lu %*lu %*lu %*lu %*lu %*LX %*LX %*LX %*LX %*lu",
           &spid,name,state, &utime, &stime, &nice, &vsize, &rss);
       /*
        * Now we shift'n'NULL away those pesky ()'s in name.  We start at the
        * first character in name[], which might or might not be a leading (.
        */
       i=0;
       while( name[i+1] != ')' ){
         name[i] = name[i+1];
         i++;
       }
       /*
        * This nulls the last two characters -- the left shifted one and the
        * trailing ) -- terminating the shifted string correctly.
        */
       name[i++] = (char) NULL;
       name[i] = (char) NULL;
       if(state[0] == 'R') running = 1;
     }
     close(fd);

     /* 
      * name now contains the job name.  Time to see if it is on our
      * "definitely send" list.
      */

     ifld = 0;
     element = dctl.tasklist->head;
     while (element != NULL) {
       if( strcmp(element->data,name) == 0 ){
         if((verbose == D_ALL) || (verbose == D_PIDS)){
           printf("D_PIDS: Matched task name! Setting do_pid for pid = %s\n",ent->d_name);
	 }
         do_pid = 1;
         break;
       }
        element = element->next;
        ifld++;
     }

     /*
      * The following is a bit odd, but it comes from our wish to
      * EITHER track ONLY jobs that match on either uidlist OR tasklist
      * OR to track all jobs POSSIBLY EXCEPT root owned jobs that are
      * OR aren't running, all on different flags.
      *
      * SO, if both lists are empty...
      */
      if(dctl.userlist->size == 0 &&  dctl.tasklist->size == 0) {
        do_pid = 1;	/* We assume that we will send it */
        /*
         * but if it is owned by root and dctl.root is off we don't.
         */
        if( dctl.root == 0 && pidstat.st_uid == 0 ) do_pid = 0;
        if((verbose == D_ALL) || (verbose == D_PIDS)){
          printf("D_PIDS: No list tasks. pid = %s has do_pids = %d\n",ent->d_name,do_pid);
	}
      }

     /*
      * If the running flag is set, we don't display the task
      * unless it is running, no matter what the earlier decisions.
      * We zero the do_pid flag in this case.
      */
     if ( !running && dctl.running) {
       do_pid = 0;
       if((verbose == D_ALL) || (verbose == D_PIDS)){
          printf("D_PIDS: Task not running. Will not display pid = %s\n",ent->d_name);
       }
     }

     /* 
      * Ignore processes that have accumulated less than ten seconds
      * of cpu (arbitrarily).
      */
     if( (time = (int)(utime + stime) / HZ) < dctl.min_runtime) {
       do_pid = 0;
       if((verbose == D_ALL) || (verbose == D_PIDS)){
          printf("D_PIDS: Task has time of only %d, needs %d. Will not display pid = %s\n",time,dctl.min_runtime,ent->d_name);
       }
     }
     

     if((verbose == D_ALL) || (verbose == D_PIDS)){
        printf("D_PIDS: Final decision for pid = %s -- do_pid = %d\n",ent->d_name,do_pid);
     }
     /*
      * Provided I didn't screw up the logic above, do_pid is the
      * result of the correct decision process.  Either we are tracking
      * only pids that match an entry on either list or we are tracking
      * all pids, except maybe not root, and possibly only running,
      * and in any event we ignore jobs that have less than 10 seconds
      * accumulated time.
      *
      * Got it?
      */
     if(do_pid) {

       if((verbose == D_ALL) || (verbose == D_PIDS)){
         printf("D_PIDS: So, we should now be sending pid = %s\n",ent->d_name);
       }

       /*
        * This is a hopefully valid process, so start a tag
        */
       pid = xmlNewChild(pids,NULL,(xmlChar*) "pid",NULL);
       xmlSetProp(pid,(xmlChar*) "pid",(xmlChar*) ent->d_name);
       

       /*
        * We can fractionate this further as required with additional
        * flags.  We only send /proc/PID/[stat,statm] if dctl.pidstats is
        * 1 (true).
        */
       if(dctl.pidstats){
         /*
          * First pid tag entry is contents of stat, since
          * it is still in stat_buf (I expect).  At the moment it is
          * terminated with a LF which middlin' sucks, so we strip it.
          */
         lfptr = index(stat_buf,(char)10);
         *lfptr = (char)0;
         pidname = xmlNewChild(pid,NULL,(xmlChar*) "stat",(xmlChar*) stat_buf);

         /*
          * Now we send statm.  It too, ends in an LF.  Of course
          * cmdline doesn't.  We wouldn't expect anything in /proc to
          * be actually consistent, would we?
          */
         sprintf(pidpath, "/proc/%s/statm", ent->d_name);
         fd = open(pidpath,O_RDONLY);
         /* A single line again */
         bzero(statm_buf,K-1);
         read(fd,statm_buf,K-1);
         close(fd);
         lfptr = index(statm_buf,(char)10);
         *lfptr = (char)0;
         pidname = xmlNewChild(pid,NULL,(xmlChar*) "statm",(xmlChar*) statm_buf);
       }
    
       /*
        * We only send /proc/PID/cmdline if dctl.cmdline is 1 (true).
        */
       if(dctl.cmdline){

	 /*
          * Next we send the contents of cmdline.  Note that cmdline
	  * is a vector of null terminated strings with no additional
	  * separator.  We therefore need to loop them into an identical
	  * memory buffer, replacing the string-terminating nulls with
	  * a whitespace blank.
          */
         sprintf(pidpath, "/proc/%s/cmdline", ent->d_name);
         if((verbose == D_ALL) || (verbose == D_PIDS)){
	   printf("D_PIDS: Getting cmdline from %s\n",pidpath);
	 }
         fd = open(pidpath,O_RDONLY);
         /* Read in as a single line */
         bzero(cmdline,K-1);
         read(fd,cmdline,K-1);
         close(fd);
	 /* Convert single nulls to spaces, terminate on double null */
         if((verbose == D_ALL) || (verbose == D_PIDS)){
	   printf("D_PIDS: Converting nulls in cmdline starting with %s\n",cmdline);
	 }
	 for(i=0;i<K-2;i++){
	   if(cmdline[i] == (char)NULL){
	     if(cmdline[i+1] == (char)NULL) break;
	     cmdline[i] = (char)32;
	   }
	 }
         if((verbose == D_ALL) || (verbose == D_PIDS)){
	   printf("D_PIDS: Done.  Wrapping %s in <cmdline> tag\n",cmdline);
	 }
         pidname = xmlNewChild(pid,NULL,(xmlChar*) "cmdline",(xmlChar*) cmdline);

       }
    
       /*
        * We ALWAYS send the following.  In fact, in most cases
        * this will be all that we send, since it is all that we
        * need.
        */
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "taskname",(xmlChar*) name);
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "state",(xmlChar*) state);
       passwd = getpwuid(pidstat.st_uid);
       sprintf(outbuf, "%s", passwd->pw_name);
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "user",(xmlChar*) outbuf);
       sprintf(outbuf, "%d",pidstat.st_uid);
       xmlSetProp(pidname,(xmlChar*) "uid",(xmlChar*) outbuf);
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "uid",(xmlChar*) outbuf);
       sprintf(outbuf, "%s", scale_time(time,6));
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "time",(xmlChar*) outbuf);
       snprintf(outbuf,K,"%d",nice);
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "nice",(xmlChar*) outbuf);
       snprintf(outbuf,K,"%s",scale_k(vsize,5,0));
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "vsize",(xmlChar*) outbuf);
       snprintf(outbuf,K,"%s",scale_k(rss*4096,5,0));
       pidname = xmlNewChild(pid,NULL,(xmlChar*) "rss",(xmlChar*) outbuf);
     }
   }
 }
 closedir(procdir);

}

