From: David Woodhouse <dwmw2@infradead.org>

The POSIX standard provides a portable way for applications to atomically wait
for _either_ a signal or an event on a file descriptor, without race
conditions.  The Linux kernel does not; glibc is reduced to trying to emulate
it, badly.  This is a bad thing.

The man page says it best...

   The idea of pselect is that if one wants to wait for an event, either a
   signal or something on a file descriptor, an atomic test is needed to
   prevent race conditions.  (Suppose the signal handler sets a global flag
   and returns.  Then a test of this global flag followed by a call of
   select() could hang indefinitely if the signal arrived just after the test
   but just before the call.  On the other hand, pselect allows one to first
   block signals, handle the signals that have come in, then call pselect()
   with the desired sigmask, avoiding the race.) Since Linux today does not
   have a pselect() system call, the current glibc2 routine still contains
   this race.



This patch implements the POSIX pselect() system call, which atomically
installs a new signal mask and enters select(), thus allowing userspace to
safely and portably wait for either signals or events on file descriptors,
without the fairly disgusting hacks which would otherwise be necessary for
glibc to provide this standard function.  

The patch also fixes the fact that select() on a 32-bit architecture with
1000 HZ timer ticks would silently do the wrong thing if asked for a
timeout greater than 24 days (LONG_MAX jiffies).  It would give an infinite
timeout instead.

It also implements ppoll().

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Cc: <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/i386/kernel/syscall_table.S |    2 
 arch/ppc/kernel/misc.S           |    2 
 arch/ppc64/kernel/misc.S         |    4 
 arch/ppc64/kernel/signal32.c     |   11 -
 fs/compat.c                      |  207 ++++++++++++++++++++++++++------
 fs/select.c                      |  249 +++++++++++++++++++++++++++++++--------
 include/asm-i386/unistd.h        |    4 
 include/asm-ppc/unistd.h         |    4 
 include/asm-ppc64/unistd.h       |    4 
 include/linux/compat.h           |    1 
 include/linux/poll.h             |    1 
 11 files changed, 392 insertions(+), 97 deletions(-)

diff -puN arch/i386/kernel/syscall_table.S~pselect-ppoll-system-calls arch/i386/kernel/syscall_table.S
--- devel/arch/i386/kernel/syscall_table.S~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/arch/i386/kernel/syscall_table.S	2005-08-30 18:41:49.000000000 -0700
@@ -294,3 +294,5 @@ ENTRY(sys_call_table)
 	.long sys_inotify_init
 	.long sys_inotify_add_watch
 	.long sys_inotify_rm_watch
+	.long sys_pselect6
+	.long sys_ppoll			/* 295 */
diff -puN arch/ppc64/kernel/misc.S~pselect-ppoll-system-calls arch/ppc64/kernel/misc.S
--- devel/arch/ppc64/kernel/misc.S~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/arch/ppc64/kernel/misc.S	2005-08-30 18:41:49.000000000 -0700
@@ -1230,6 +1230,8 @@ _GLOBAL(sys_call_table32)
 	.llong .sys_inotify_init	/* 275 */
 	.llong .sys_inotify_add_watch
 	.llong .sys_inotify_rm_watch
+	.llong .compat_sys_pselect6
+	.llong .compat_sys_ppoll
 
 	.balign 8
 _GLOBAL(sys_call_table)
@@ -1511,3 +1513,5 @@ _GLOBAL(sys_call_table)
 	.llong .sys_inotify_init	/* 275 */
 	.llong .sys_inotify_add_watch
 	.llong .sys_inotify_rm_watch
+	.llong .sys_pselect6
+	.llong .sys_ppoll
diff -puN arch/ppc64/kernel/signal32.c~pselect-ppoll-system-calls arch/ppc64/kernel/signal32.c
--- devel/arch/ppc64/kernel/signal32.c~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/arch/ppc64/kernel/signal32.c	2005-08-30 18:41:49.000000000 -0700
@@ -112,17 +112,6 @@ static inline void compat_from_sigset(co
 	}
 }
 
-static inline void sigset_from_compat(sigset_t *set, compat_sigset_t *compat)
-{
-	switch (_NSIG_WORDS) {
-	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32);
-	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32);
-	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32);
-	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32);
-	}
-}
-
-
 /*
  * Save the current user registers on the user stack.
  * We only save the altivec registers if the process has used
diff -puN arch/ppc/kernel/misc.S~pselect-ppoll-system-calls arch/ppc/kernel/misc.S
--- devel/arch/ppc/kernel/misc.S~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/arch/ppc/kernel/misc.S	2005-08-30 18:41:49.000000000 -0700
@@ -1455,3 +1455,5 @@ _GLOBAL(sys_call_table)
 	.long sys_inotify_init		/* 275 */
 	.long sys_inotify_add_watch
 	.long sys_inotify_rm_watch
+	.long sys_pselect6
+	.long sys_ppoll
diff -puN fs/compat.c~pselect-ppoll-system-calls fs/compat.c
--- devel/fs/compat.c~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/fs/compat.c	2005-08-30 18:41:49.000000000 -0700
@@ -1691,35 +1691,13 @@ static void select_bits_free(void *bits,
 #define MAX_SELECT_SECONDS \
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
-asmlinkage long
-compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
-		compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
+int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+			   compat_ulong_t __user *exp, long *timeout)
 {
 	fd_set_bits fds;
 	char *bits;
-	long timeout;
 	int size, max_fdset, ret = -EINVAL;
 
-	timeout = MAX_SCHEDULE_TIMEOUT;
-	if (tvp) {
-		time_t sec, usec;
-
-		if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
-		    || __get_user(sec, &tvp->tv_sec)
-		    || __get_user(usec, &tvp->tv_usec)) {
-			ret = -EFAULT;
-			goto out_nofds;
-		}
-
-		if (sec < 0 || usec < 0)
-			goto out_nofds;
-
-		if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-			timeout = ROUND_UP(usec, 1000000/HZ);
-			timeout += sec * (unsigned long) HZ;
-		}
-	}
-
 	if (n < 0)
 		goto out_nofds;
 
@@ -1753,19 +1731,7 @@ compat_sys_select(int n, compat_ulong_t 
 	zero_fd_set(n, fds.res_out);
 	zero_fd_set(n, fds.res_ex);
 
-	ret = do_select(n, &fds, &timeout);
-
-	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
-		time_t sec = 0, usec = 0;
-		if (timeout) {
-			sec = timeout / HZ;
-			usec = timeout % HZ;
-			usec *= (1000000/HZ);
-		}
-		if (put_user(sec, &tvp->tv_sec) ||
-		    put_user(usec, &tvp->tv_usec))
-			ret = -EFAULT;
-	}
+	ret = do_select(n, &fds, timeout);
 
 	if (ret < 0)
 		goto out;
@@ -1786,6 +1752,173 @@ out_nofds:
 	return ret;
 }
 
+asmlinkage long
+compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct compat_timeval tv;
+	int ret;
+
+	if (tvp) {
+		if (copy_from_user(&tv, tvp, sizeof(tv)))
+			return -EFAULT;
+
+		if (tv.tv_sec < 0 || tv.tv_usec < 0)
+			return -EINVAL;
+	}
+
+	do {
+		if (tvp) {
+			if ((unsigned long) tv.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(tv.tv_usec, 1000000/HZ);
+				timeout += tv.tv_sec * (unsigned long) HZ;
+				tv.tv_sec = 0;
+				tv.tv_usec = 0;
+			} else {
+				tv.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tvp && (tv.tv_sec || tv.tv_usec));
+
+	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
+		tv.tv_sec += timeout / HZ;
+		tv.tv_usec += (timeout % HZ) * 1000000/HZ;
+		if (tv.tv_usec >= 1000000) {
+			tv.tv_sec++;
+			tv.tv_usec -= 1000000;
+		}
+		(void)copy_to_user(tvp, &tv, sizeof(tv));
+	}
+
+	return ret;
+}
+asmlinkage long
+compat_sys_pselect7(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		    compat_ulong_t __user *exp, struct compat_timespec __user *tsp,
+		    compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t ksigmask, sigsaved;
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct compat_timespec ts;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+			return -EINVAL;
+	}
+
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&s32, sigmask, sizeof(s32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &s32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	do {
+		if (tsp) {
+			if ((unsigned long) ts.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
+				timeout += ts.tv_sec * (unsigned long) HZ;
+				ts.tv_sec = 0;
+				ts.tv_nsec = 0;
+			} else {
+				ts.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
+
+	if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
+		ts.tv_sec += timeout / HZ;
+		ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
+		if (ts.tv_nsec >= 1000000000) {
+			ts.tv_sec++;
+			ts.tv_nsec -= 1000000000;
+		}
+		(void)copy_to_user(tsp, &ts, sizeof(ts));
+	}
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+asmlinkage long
+compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		   compat_ulong_t __user *exp, struct compat_timespec __user *tsp,
+		    void __user *sig)
+{
+	compat_size_t sigsetsize = 0;
+	compat_uptr_t up = 0;
+
+	if (sig) {
+		if (!access_ok(VERIFY_READ, sig, sizeof(compat_uptr_t) + sizeof(compat_size_t))
+		    || __get_user(up, (compat_uptr_t __user *)sig)
+		    || __get_user(sigsetsize, (compat_size_t __user *)(sig+sizeof(up))))
+			return -EFAULT;
+	}
+	return compat_sys_pselect7(n, inp, outp, exp, tsp, compat_ptr(up), sigsetsize);
+}
+
+#define MAX_INT64_SECONDS (((int64_t)(~((uint64_t)0)>>1)/HZ)-1)
+
+asmlinkage long
+compat_sys_ppoll(struct pollfd __user * ufds, unsigned int nfds, struct compat_timespec __user *tsp,
+		 const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t ksigmask, sigsaved;
+	struct compat_timespec ts;
+	int64_t timeout = -1;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < MAX_INT64_SECONDS) {
+			timeout = ROUND_UP(ts.tv_sec, 1000000000/HZ);
+			timeout += ts.tv_sec * HZ;
+		} else
+			timeout = MAX_SCHEDULE_TIMEOUT;
+	}
+
+	if (sigmask) {
+		if (sigsetsize |= sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&s32, sigmask, sizeof(s32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &s32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_sys_poll(ufds, nfds, timeout);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
 #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
 /* Stuff for NFS server syscalls... */
 struct compat_nfsctl_svc {
diff -puN fs/select.c~pselect-ppoll-system-calls fs/select.c
--- devel/fs/select.c~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/fs/select.c	2005-08-30 18:41:49.000000000 -0700
@@ -292,35 +292,13 @@ static void select_bits_free(void *bits,
 #define MAX_SELECT_SECONDS \
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
-asmlinkage long
-sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+			   fd_set __user *exp, long *timeout)
 {
 	fd_set_bits fds;
 	char *bits;
-	long timeout;
 	int ret, size, max_fdset;
 
-	timeout = MAX_SCHEDULE_TIMEOUT;
-	if (tvp) {
-		time_t sec, usec;
-
-		if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
-		    || __get_user(sec, &tvp->tv_sec)
-		    || __get_user(usec, &tvp->tv_usec)) {
-			ret = -EFAULT;
-			goto out_nofds;
-		}
-
-		ret = -EINVAL;
-		if (sec < 0 || usec < 0)
-			goto out_nofds;
-
-		if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-			timeout = ROUND_UP(usec, 1000000/HZ);
-			timeout += sec * (unsigned long) HZ;
-		}
-	}
-
 	ret = -EINVAL;
 	if (n < 0)
 		goto out_nofds;
@@ -355,18 +333,7 @@ sys_select(int n, fd_set __user *inp, fd
 	zero_fd_set(n, fds.res_out);
 	zero_fd_set(n, fds.res_ex);
 
-	ret = do_select(n, &fds, &timeout);
-
-	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
-		time_t sec = 0, usec = 0;
-		if (timeout) {
-			sec = timeout / HZ;
-			usec = timeout % HZ;
-			usec *= (1000000/HZ);
-		}
-		put_user(sec, &tvp->tv_sec);
-		put_user(usec, &tvp->tv_usec);
-	}
+	ret = do_select(n, &fds, timeout);
 
 	if (ret < 0)
 		goto out;
@@ -388,6 +355,133 @@ out_nofds:
 	return ret;
 }
 
+asmlinkage long
+sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct timeval tv;
+	int ret;
+
+	if (tvp) {
+		if (copy_from_user(&tv, tvp, sizeof(tv)))
+			return -EFAULT;
+
+		if (tv.tv_sec < 0 || tv.tv_usec < 0)
+			return -EINVAL;
+	}
+
+	do {
+		if (tvp) {
+			if ((unsigned long) tv.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(tv.tv_usec, 1000000/HZ);
+				timeout += tv.tv_sec * (unsigned long) HZ;
+				tv.tv_sec = 0;
+				tv.tv_usec = 0;
+			} else {
+				tv.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tvp && (tv.tv_sec || tv.tv_usec));
+
+	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
+		tv.tv_sec += timeout / HZ;
+		tv.tv_usec += (timeout % HZ) * 1000000/HZ;
+		if (tv.tv_usec >= 1000000) {
+			tv.tv_sec++;
+			tv.tv_usec -= 1000000;
+		}
+		(void)copy_to_user(tvp, &tv, sizeof(tv));
+	}
+
+	return ret;
+}
+
+asmlinkage long
+sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
+	     struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	sigset_t ksigmask, sigsaved;
+	struct timespec ts;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+			return -EINVAL;
+	}
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	do {
+		if (tsp) {
+			if ((unsigned long) ts.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
+				timeout += ts.tv_sec * (unsigned long) HZ;
+				ts.tv_sec = 0;
+				ts.tv_nsec = 0;
+			} else {
+				ts.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
+
+	if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
+		ts.tv_sec += timeout / HZ;
+		ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
+		if (ts.tv_nsec >= 1000000000) {
+			ts.tv_sec++;
+			ts.tv_nsec -= 1000000000;
+		}
+		(void)copy_to_user(tsp, &ts, sizeof(ts));
+	}
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	return ret;
+}
+
+/* Most architectures can't handle 7-argument syscalls. So we provide
+   a 6-argument version where the sixth argument is a pointer to a
+   structure which has a pointer to the sigset_t itself followed by
+   a size_t containing the sigset size. */
+asmlinkage long
+sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
+	     struct timespec __user *tsp, void __user *sig)
+{
+	size_t sigsetsize = 0;
+	sigset_t __user *up = NULL;
+
+	if (sig) {
+		if (!access_ok(VERIFY_READ, sig, sizeof(void *) + sizeof(size_t))
+		    || __get_user(up, (sigset_t * __user *)sig)
+		    || __get_user(sigsetsize, (size_t * __user)(sig+sizeof(void *))))
+			return -EFAULT;
+	}
+
+	return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
+}
+
+
 struct poll_list {
 	struct poll_list *next;
 	int len;
@@ -457,7 +551,7 @@ static int do_poll(unsigned int nfds,  s
 	return count;
 }
 
-asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
+int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, int64_t timeout)
 {
 	struct poll_wqueues table;
  	int fdcount, err;
@@ -469,14 +563,6 @@ asmlinkage long sys_poll(struct pollfd _
 	if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
 		return -EINVAL;
 
-	if (timeout) {
-		/* Careful about overflow in the intermediate values */
-		if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
-			timeout = (unsigned long)(timeout*HZ+999)/1000+1;
-		else /* Negative or overflow */
-			timeout = MAX_SCHEDULE_TIMEOUT;
-	}
-
 	poll_initwait(&table);
 
 	head = NULL;
@@ -506,7 +592,25 @@ asmlinkage long sys_poll(struct pollfd _
 		}
 		i -= pp->len;
 	}
-	fdcount = do_poll(nfds, head, &table, timeout);
+
+	do {
+		long timeo;
+
+		if (unlikely(timeout >= (int64_t)MAX_SCHEDULE_TIMEOUT - 1)) {
+			timeo = MAX_SCHEDULE_TIMEOUT - 1;
+			timeout -= timeo;
+		} else {
+			if (timeout < 0)
+				timeo = MAX_SCHEDULE_TIMEOUT;
+			else
+				timeo = timeout;
+
+			timeout = 0;
+		}
+
+	    fdcount = do_poll(nfds, head, &table, timeo);
+
+	} while (!fdcount && !signal_pending(current) && timeout);
 
 	/* OK, now copy the revents fields back to user space. */
 	walk = head;
@@ -534,3 +638,56 @@ out_fds:
 	poll_freewait(&table);
 	return err;
 }
+
+asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
+{
+	if (timeout) {
+		/* Careful about overflow in the intermediate values */
+		if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
+			timeout = (unsigned long)(timeout*HZ+999)/1000+1;
+		else /* Negative or overflow */
+			timeout = -1;
+	}
+
+	return do_sys_poll(ufds, nfds, (int64_t)timeout);
+}
+
+#define MAX_INT64_SECONDS (((int64_t)(~((uint64_t)0)>>1)/HZ)-1)
+
+asmlinkage long sys_ppoll(struct pollfd __user * ufds, unsigned int nfds, struct timespec __user *tsp,
+			  const sigset_t __user *sigmask, size_t sigsetsize)
+{
+	sigset_t ksigmask, sigsaved;
+	struct timespec ts;
+	int64_t timeout = -1;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < MAX_INT64_SECONDS) {
+			timeout = ROUND_UP(ts.tv_sec, 1000000000/HZ);
+			timeout += ts.tv_sec * HZ;
+		} else
+			timeout = MAX_SCHEDULE_TIMEOUT;
+	}
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_sys_poll(ufds, nfds, timeout);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
diff -puN include/asm-i386/unistd.h~pselect-ppoll-system-calls include/asm-i386/unistd.h
--- devel/include/asm-i386/unistd.h~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/include/asm-i386/unistd.h	2005-08-30 18:41:49.000000000 -0700
@@ -299,8 +299,10 @@
 #define __NR_inotify_init	291
 #define __NR_inotify_add_watch	292
 #define __NR_inotify_rm_watch	293
+#define __NR_pselect6		294
+#define __NR_ppoll		295
 
-#define NR_syscalls 294
+#define NR_syscalls 296
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff -puN include/asm-ppc64/unistd.h~pselect-ppoll-system-calls include/asm-ppc64/unistd.h
--- devel/include/asm-ppc64/unistd.h~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/include/asm-ppc64/unistd.h	2005-08-30 18:41:49.000000000 -0700
@@ -288,8 +288,10 @@
 #define __NR_inotify_init	275
 #define __NR_inotify_add_watch	276
 #define __NR_inotify_rm_watch	277
+#define __NR_pselect6		278
+#define __NR_ppoll		279
 
-#define __NR_syscalls		278
+#define __NR_syscalls		280
 #ifdef __KERNEL__
 #define NR_syscalls	__NR_syscalls
 #endif
diff -puN include/asm-ppc/unistd.h~pselect-ppoll-system-calls include/asm-ppc/unistd.h
--- devel/include/asm-ppc/unistd.h~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/include/asm-ppc/unistd.h	2005-08-30 18:41:49.000000000 -0700
@@ -282,8 +282,10 @@
 #define __NR_inotify_init	275
 #define __NR_inotify_add_watch	276
 #define __NR_inotify_rm_watch	277
+#define __NR_pselect6		278
+#define __NR_ppoll		279
 
-#define __NR_syscalls		278
+#define __NR_syscalls		280
 
 #define __NR(n)	#n
 
diff -puN include/linux/compat.h~pselect-ppoll-system-calls include/linux/compat.h
--- devel/include/linux/compat.h~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/include/linux/compat.h	2005-08-30 18:41:49.000000000 -0700
@@ -48,6 +48,7 @@ typedef struct {
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
+extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
 extern int cp_compat_stat(struct kstat *, struct compat_stat __user *);
 extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *);
 extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *);
diff -puN include/linux/poll.h~pselect-ppoll-system-calls include/linux/poll.h
--- devel/include/linux/poll.h~pselect-ppoll-system-calls	2005-08-30 18:41:49.000000000 -0700
+++ devel-akpm/include/linux/poll.h	2005-08-30 18:41:49.000000000 -0700
@@ -93,6 +93,7 @@ void zero_fd_set(unsigned long nr, unsig
 }
 
 extern int do_select(int n, fd_set_bits *fds, long *timeout);
+extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, int64_t timeout);
 
 #endif /* KERNEL */
 
_