Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.43
diff -u -r1.43 uipc_socket.c
--- kern/uipc_socket.c	2002/06/11 05:07:43	1.43
+++ kern/uipc_socket.c	2002/07/21 23:03:05
@@ -36,6 +36,44 @@
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
+/* 
+ * portions of this code are:
+ *
+ * Copyright (c) 2002 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the NetBSD
+ *      Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
@@ -52,6 +90,24 @@
 #include <sys/resourcevar.h>
 #include <sys/pool.h>
 
+#include <uvm/uvm.h>
+
+/* modified from NetBSD */
+
+/*
+ * initializer for an event count structure.  the lengths are initted and
+ * it is added to the evcnt list at attach time.
+ */
+#define EVCNT_INITIALIZER(type, name)                    		\
+    {                                                                   \
+        0,                      /* ev_count */                          \ 
+        type,                   /* ev_type */                           \
+        0,                      /* ev_grouplen */                       \
+        0,                      /* ev_namelen */                        \
+        0,                      /* ev_pad1 */                           \
+        name,                   /* ev_name */                           \
+    }
+
 void 	filt_sordetach(struct knote *kn);
 int 	filt_soread(struct knote *kn, long hint);
 void 	filt_sowdetach(struct knote *kn);
@@ -75,13 +131,207 @@
 
 struct pool socket_pool;
 
+#ifdef SOSEND_COUNTERS
+#include <sys/device.h>
+
+struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, "loan big");
+struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, "copy big");
+struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, "copy small");
+struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, "kva limit");
+    
+
+#define        SOSEND_COUNTER_INCR(ev)         (ev)->ev_count++
+
+#else
+
+#define        SOSEND_COUNTER_INCR(ev)         /* nothing */
+
+#endif /* SOSEND_COUNTERS */
+
 void
 soinit(void)
 {
 
 	pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL);
+#ifdef SOSEND_COUNTERS
+        evcnt_attach(&sosend_loan_big);
+        evcnt_attach(&sosend_copy_big);
+        evcnt_attach(&sosend_copy_small);
+        evcnt_attach(&sosend_kvalimit);
+#endif /* SOSEND_COUNTERS */
+}
+
+#ifdef SOSEND_LOAN
+
+struct mbuf *so_pendfree;
+
+int somaxkva = 16 * 1024 * 1024;
+int socurkva;
+int sokvawaiters;
+
+#define SOCK_LOAN_THRESH        4096
+#define SOCK_LOAN_CHUNK         65536
+
+static void
+sodoloanfree(caddr_t buf, u_int size)
+{
+        struct vm_page **pgs;
+        vaddr_t va, sva, eva;
+        vsize_t len;
+        paddr_t pa;
+        int i, npgs;
+
+        eva = round_page((vaddr_t) buf + size);
+        sva = trunc_page((vaddr_t) buf);
+        len = eva - sva;
+        npgs = len >> PAGE_SHIFT;
+
+        pgs = alloca(npgs * sizeof(*pgs));
+
+        for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) {
+                if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
+                        panic("sodoloanfree: va 0x%lx not mapped", va);
+                pgs[i] = PHYS_TO_VM_PAGE(pa);
+        }
+
+        pmap_kremove(sva, len);
+        pmap_update(pmap_kernel());
+        uvm_unloanpage(pgs, npgs);
+        uvm_km_free(kernel_map, sva, len);
+        socurkva -= len;
+        if (sokvawaiters)
+                wakeup(&socurkva);
+}
+
+static size_t
+sodopendfree(struct socket *so)
+{
+        struct mbuf *m;
+        size_t rv = 0;
+        int s;
+
+        s = splvm();
+
+        for (;;) {
+                m = so_pendfree;
+                if (m == NULL)
+                        break;
+                so_pendfree = m->m_next;
+                splx(s);
+
+                rv += m->m_ext.ext_size;
+                sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
+                s = splvm();
+                pool_cache_put(&mbpool_cache, m);
+        }
+
+        for (;;) {
+                m = so->so_pendfree;
+                if (m == NULL)
+                        break;
+                so->so_pendfree = m->m_next;
+                splx(s);
+
+                rv += m->m_ext.ext_size;
+                sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
+                s = splvm();
+                pool_cache_put(&mbpool_cache, m);
+        }
+
+        splx(s);
+        return (rv);
+}
+
+static void
+soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg)
+{
+        struct socket *so = arg;
+        int s;
+
+        if (m == NULL) {
+                sodoloanfree(buf, size);
+                return;
+        }
+
+        s = splvm();
+        m->m_next = so->so_pendfree;
+        so->so_pendfree = m;
+        splx(s);
+        if (sokvawaiters)
+                wakeup(&socurkva);
+}
+
+static long
+sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space)
+{
+        struct iovec *iov = uio->uio_iov;
+        vaddr_t sva, eva;
+        vsize_t len;
+        struct vm_page **pgs;
+        vaddr_t lva, va;
+        int npgs, s, i, error;
+
+        if (uio->uio_segflg != UIO_USERSPACE)
+                return (0);
+
+        if (iov->iov_len < (size_t) space)
+                space = iov->iov_len;
+        if (space > SOCK_LOAN_CHUNK)
+                space = SOCK_LOAN_CHUNK;
+
+        eva = round_page((vaddr_t) iov->iov_base + space);
+        sva = trunc_page((vaddr_t) iov->iov_base);
+        len = eva - sva;
+        npgs = len >> PAGE_SHIFT;
+
+        while (socurkva + len > somaxkva) {
+                if (sodopendfree(so))
+                        continue;
+                SOSEND_COUNTER_INCR(&sosend_kvalimit);
+                s = splvm();
+                sokvawaiters++;
+                (void) tsleep(&socurkva, PVM, "sokva", 0);
+                sokvawaiters--;
+                splx(s);
+        }
+
+        lva = uvm_km_valloc_wait(kernel_map, len);
+        if (lva == 0)
+                return (0);
+        socurkva += len;
+
+        pgs = alloca(npgs * sizeof(*pgs));
+
+        error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
+            pgs, UVM_LOAN_TOPAGE);
+        if (error) {
+                uvm_km_free(kernel_map, lva, len);
+                socurkva -= len;
+                return (0);
+        }
+
+        for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
+                pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ);
+        pmap_update(pmap_kernel());
+
+        lva += (vaddr_t) iov->iov_base & PAGE_MASK;
+
+        MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
+
+        uio->uio_resid -= space;
+        /* uio_offset not updated, not set/used for write(2) */
+        uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space;
+        uio->uio_iov->iov_len -= space;
+        if (uio->uio_iov->iov_len == 0) {
+                uio->uio_iov++;
+                uio->uio_iovcnt--;
+        }
+
+        return (space);
 }
 
+#endif /* SOSEND_LOAN */
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -186,6 +436,10 @@
 void
 sofree(struct socket *so)
 {
+#ifdef SOSEND_LOAN
+        struct mbuf *m;
+#endif /* SOSEND_LOAN */
+
 	splassert(IPL_SOFTNET);
 
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
@@ -201,6 +455,13 @@
 	}
 	sbrelease(&so->so_snd);
 	sorflush(so);
+#ifdef SOSEND_LOAN
+        while ((m = so->so_pendfree) != NULL) {
+                so->so_pendfree = m->m_next;
+                m->m_next = so_pendfree;
+                so_pendfree = m;
+        }
+#endif /* SOSEND_LOAN */
 	pool_put(&socket_pool, so);
 }
 
@@ -357,6 +618,9 @@
 					   NULL);
 bad:
 	splx(s);
+#ifdef SOSEND_LOAN
+        sodopendfree(so);
+#endif /* SOSEND_LOAN */
 	return (error);
 }
 
@@ -387,6 +651,9 @@
 	struct mbuf *control;
 	int flags;
 {
+#ifdef SOSEND_LOAN
+        sodopendfree(so);
+#endif /* SOSEND_LOAN */
 	struct proc *p = curproc;		/* XXX */
 	struct mbuf **mp;
 	struct mbuf *m;
@@ -479,6 +746,16 @@
 					MGET(m, M_WAIT, MT_DATA);
 					mlen = MLEN;
 				}
+#ifdef SOSEND_LOAN
+                                if (uio->uio_iov->iov_len >= SOCK_LOAN_THRESH &&
+                                    space >= SOCK_LOAN_THRESH &&
+                                    (len = sosend_loan(so, uio, m,
+                                                       space)) != 0) {
+                                        SOSEND_COUNTER_INCR(&sosend_loan_big);
+                                        space -= len;
+                                        goto have_data;
+                                }
+#endif /* SOSEND_LOAN */
 				if (resid >= MINCLSIZE && space >= MCLBYTES) {
 					MCLGET(m, M_WAIT);
 					if ((m->m_flags & M_EXT) == 0)
@@ -492,6 +769,7 @@
 					space -= len;
 				} else {
 nopages:
+                                        SOSEND_COUNTER_INCR(&sosend_copy_small);
 					len = lmin(lmin(mlen, resid), space);
 					space -= len;
 					/*
@@ -503,6 +781,9 @@
 				}
 				error = uiomove(mtod(m, caddr_t), (int)len,
 				    uio);
+#ifdef SOSEND_LOAN
+ have_data:
+#endif /* SOSEND_LOAN */
 				resid = uio->uio_resid;
 				m->m_len = len;
 				*mp = m;
@@ -569,6 +850,9 @@
 	struct mbuf **controlp;
 	int *flagsp;
 {
+#ifdef SOSEND_LOAN
+        sodopendfree(so);
+#endif /* SOSEND_LOAN */
 	register struct mbuf *m, **mp;
 	register int flags, len, error, s, offset;
 	struct protosw *pr = so->so_proto;
 
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.28
diff -u -r1.28 socketvar.h
--- sys/socketvar.h	2002/07/03 21:19:08	1.28
+++ sys/socketvar.h	2002/07/21 23:03:08
@@ -107,6 +107,7 @@
 	caddr_t	so_upcallarg;		/* Arg for above */
 	uid_t	so_euid, so_ruid;	/* who opened the socket */
 	gid_t	so_egid, so_rgid;
+	struct mbuf     *so_pendfree;   /* loaned-page mbufs w/ frees pending */
 };
 
 /*
