/* * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This software is a component of "386BSD" developed by William F. Jolitz, TeleMuse. * 4. Neither the name of the developer nor the name "386BSD" * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT * NOT MAKE USE THIS WORK. * * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. * * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: vfs__bio.c,v 1.7 1993/10/19 01:06:29 nate Exp $ */ #include "param.h" #include "systm.h" #include "proc.h" #include "vnode.h" #include "buf.h" #include "specdev.h" #include "mount.h" #include "malloc.h" #include "vm/vm.h" #include "resourcevar.h" static struct buf *getnewbuf(int); extern vm_map_t buffer_map; /* * Initialize buffer headers and related structures. */ void bufinit() { struct bufhd *bh; struct buf *bp; /* first, make a null hash table */ for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) { bh->b_flags = 0; bh->b_forw = (struct buf *)bh; bh->b_back = (struct buf *)bh; } /* next, make a null set of free lists */ for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) { bp->b_flags = 0; bp->av_forw = bp; bp->av_back = bp; bp->b_forw = bp; bp->b_back = bp; } /* finally, initialize each buffer header and stick on empty q */ for(bp = buf; bp < buf + nbuf ; bp++) { bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */ bp->b_dev = NODEV; bp->b_vp = 0; binstailfree(bp, bfreelist + BQ_EMPTY); binshash(bp, bfreelist + BQ_EMPTY); } } /* * Find the block in the buffer pool. * If the buffer is not present, allocate a new buffer and load * its contents according to the filesystem fill routine. */ int bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred, struct buf **bpp) { struct buf *bp; int rv = 0; bp = getblk (vp, blkno, size); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ bp->b_rcred = cred; VOP_STRATEGY(bp); rv = biowait (bp); } *bpp = bp; return (rv); } /* * Operates like bread, but also starts I/O on the specified * read-ahead block. [See page 55 of Bach's Book] */ int breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize, struct ucred *cred, struct buf **bpp) { struct buf *bp, *rabp; int rv = 0, needwait = 0; bp = getblk (vp, blkno, size); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ bp->b_rcred = cred; VOP_STRATEGY(bp); needwait++; } rabp = getblk (vp, rablkno, rabsize); /* if not found in cache, do some I/O (overlapped with first) */ if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) { if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_inblock++; rabp->b_flags |= B_READ | B_ASYNC; rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ rabp->b_rcred = cred; VOP_STRATEGY(rabp); } else brelse(rabp); /* wait for original I/O */ if (needwait) rv = biowait (bp); *bpp = bp; return (rv); } /* * Synchronous write. * Release buffer on completion. */ int bwrite(register struct buf *bp) { int rv; if(bp->b_flags & B_INVAL) { brelse(bp); return (0); } else { int wasdelayed; if(!(bp->b_flags & B_BUSY)) panic("bwrite: not busy"); wasdelayed = bp->b_flags & B_DELWRI; bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI); if(wasdelayed) reassignbuf(bp, bp->b_vp); if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_oublock++; bp->b_flags |= B_DIRTY; bp->b_vp->v_numoutput++; VOP_STRATEGY(bp); rv = biowait(bp); brelse(bp); return (rv); } } /* * Delayed write. * * The buffer is marked dirty, but is not queued for I/O. * This routine should be used when the buffer is expected * to be modified again soon, typically a small write that * partially fills a buffer. * * NB: magnetic tapes cannot be delayed; they must be * written in the order that the writes are requested. */ void bdwrite(register struct buf *bp) { if(!(bp->b_flags & B_BUSY)) panic("bdwrite: not busy"); if(bp->b_flags & B_INVAL) { brelse(bp); return; } if(bp->b_flags & B_TAPE) { bwrite(bp); return; } bp->b_flags &= ~(B_READ|B_DONE); bp->b_flags |= B_DIRTY|B_DELWRI; reassignbuf(bp, bp->b_vp); brelse(bp); return; } /* * Asynchronous write. * Start I/O on a buffer, but do not wait for it to complete. * The buffer is released when the I/O completes. */ void bawrite(register struct buf *bp) { if(!(bp->b_flags & B_BUSY)) panic("bawrite: not busy"); if(bp->b_flags & B_INVAL) brelse(bp); else { int wasdelayed; wasdelayed = bp->b_flags & B_DELWRI; bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if(wasdelayed) reassignbuf(bp, bp->b_vp); if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_oublock++; bp->b_flags |= B_DIRTY | B_ASYNC; bp->b_vp->v_numoutput++; VOP_STRATEGY(bp); } } /* * Release a buffer. * Even if the buffer is dirty, no I/O is started. */ void brelse(register struct buf *bp) { int x; /* anyone need a "free" block? */ x=splbio(); if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) { (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED; wakeup(bfreelist); } /* anyone need this very block? */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~B_WANTED; wakeup(bp); } if (bp->b_flags & (B_INVAL|B_ERROR)) { bp->b_flags |= B_INVAL; bp->b_flags &= ~(B_DELWRI|B_CACHE); if(bp->b_vp) brelvp(bp); } /* enqueue */ /* just an empty buffer head ... */ /*if(bp->b_flags & B_HEAD) binsheadfree(bp, bfreelist + BQ_EMPTY)*/ /* buffers with junk contents */ /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE)) binsheadfree(bp, bfreelist + BQ_AGE) /* buffers with stale but valid contents */ else if(bp->b_flags & B_AGE) binstailfree(bp, bfreelist + BQ_AGE) /* buffers with valid and quite potentially reuseable contents */ else binstailfree(bp, bfreelist + BQ_LRU) /* unlock */ bp->b_flags &= ~B_BUSY; splx(x); } int freebufspace; int allocbufspace; /* * Find a buffer which is available for use. * If free memory for buffer space and an empty header from the empty list, * use that. Otherwise, select something from a free list. * Preference is to AGE list, then LRU list. */ static struct buf * getnewbuf(int sz) { struct buf *bp; int x; x = splbio(); start: /* can we constitute a new buffer? */ if (freebufspace > sz && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) { caddr_t addr; /*#define notyet*/ #ifndef notyet if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree; #else /* notyet */ /* get new memory buffer */ if (round_page(sz) == sz) addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz); else addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK); /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/ bzero(addr, sz); #endif /* notyet */ freebufspace -= sz; allocbufspace += sz; bp = bfreelist[BQ_EMPTY].av_forw; bp->b_flags = B_BUSY | B_INVAL; bremfree(bp); bp->b_un.b_addr = addr; bp->b_bufsize = sz; /* 20 Aug 92*/ goto fillin; } tryfree: if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) { bp = bfreelist[BQ_AGE].av_forw; bremfree(bp); } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) { bp = bfreelist[BQ_LRU].av_forw; bremfree(bp); } else { /* wait for a free buffer of any kind */ (bfreelist + BQ_AGE)->b_flags |= B_WANTED; sleep(bfreelist, PRIBIO); splx(x); return (0); } /* if we are a delayed write, convert to an async write! */ if (bp->b_flags & B_DELWRI) { bp->b_flags |= B_BUSY; bawrite (bp); goto start; } if(bp->b_vp) brelvp(bp); /* we are not free, nor do we contain interesting data */ if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/ if (bp->b_wcred != NOCRED) crfree(bp->b_wcred); bp->b_flags = B_BUSY; fillin: bremhash(bp); splx(x); bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_wcred = bp->b_rcred = NOCRED; if (bp->b_bufsize != sz) allocbuf(bp, sz); bp->b_bcount = bp->b_bufsize = sz; bp->b_dirtyoff = bp->b_dirtyend = 0; return (bp); } /* * Check to see if a block is currently memory resident. */ struct buf * incore(struct vnode *vp, daddr_t blkno) { struct buf *bh; struct buf *bp; bh = BUFHASH(vp, blkno); /* Search hash chain */ bp = bh->b_forw; while (bp != (struct buf *) bh) { /* hit */ if (bp->b_lblkno == blkno && bp->b_vp == vp && (bp->b_flags & B_INVAL) == 0) return (bp); bp = bp->b_forw; } return(0); } /* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the * block cache, mark it as having been found, make it busy * and return it. Otherwise, return an empty block of the * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ struct buf * getblk(register struct vnode *vp, daddr_t blkno, int size) { struct buf *bp, *bh; int x; for (;;) { if (bp = incore(vp, blkno)) { x = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; sleep (bp, PRIBIO); splx(x); continue; } bp->b_flags |= B_BUSY | B_CACHE; bremfree(bp); if (size > bp->b_bufsize) panic("now what do we do?"); /* if (bp->b_bufsize != size) allocbuf(bp, size); */ } else { if((bp = getnewbuf(size)) == 0) continue; bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); x = splbio(); bh = BUFHASH(vp, blkno); binshash(bp, bh); bp->b_flags = B_BUSY; } splx(x); return (bp); } } /* * Get an empty, disassociated buffer of given size. */ struct buf * geteblk(int size) { struct buf *bp; int x; while ((bp = getnewbuf(size)) == 0) ; x = splbio(); binshash(bp, bfreelist + BQ_AGE); splx(x); return (bp); } /* * Exchange a buffer's underlying buffer storage for one of different * size, taking care to maintain contents appropriately. When buffer * increases in size, caller is responsible for filling out additional * contents. When buffer shrinks in size, data is lost, so caller must * first return it to backing store before shrinking the buffer, as * no implied I/O will be done. * * Expanded buffer is returned as value. */ void allocbuf(register struct buf *bp, int size) { caddr_t newcontents; /* get new memory buffer */ #ifndef notyet newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK); #else /* notyet */ if (round_page(size) == size) newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size); else newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK); #endif /* notyet */ /* copy the old into the new, up to the maximum that will fit */ bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size)); /* return old contents to free heap */ #ifndef notyet free (bp->b_un.b_addr, M_TEMP); #else /* notyet */ if (round_page(bp->b_bufsize) == bp->b_bufsize) kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize); else free (bp->b_un.b_addr, M_TEMP); #endif /* notyet */ /* adjust buffer cache's idea of memory allocated to buffer contents */ freebufspace -= size - bp->b_bufsize; allocbufspace += size - bp->b_bufsize; /* update buffer header */ bp->b_un.b_addr = newcontents; bp->b_bcount = bp->b_bufsize = size; } /* * Patiently await operations to complete on this buffer. * When they do, extract error value and return it. * Extract and return any errors associated with the I/O. * If an invalid block, force it off the lookup hash chains. */ int biowait(register struct buf *bp) { int x; x = splbio(); while ((bp->b_flags & B_DONE) == 0) sleep((caddr_t)bp, PRIBIO); if((bp->b_flags & B_ERROR) || bp->b_error) { if ((bp->b_flags & B_INVAL) == 0) { bp->b_flags |= B_INVAL; bremhash(bp); binshash(bp, bfreelist + BQ_AGE); } if (!bp->b_error) bp->b_error = EIO; else bp->b_flags |= B_ERROR; splx(x); return (bp->b_error); } else { splx(x); return (0); } } /* * Finish up operations on a buffer, calling an optional * function (if requested), and releasing the buffer if * marked asynchronous. Then mark this buffer done so that * others biowait()'ing for it will notice when they are * woken up from sleep(). */ int biodone(register struct buf *bp) { int x; x = splbio(); if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp); bp->b_flags &= ~B_CALL; if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) { bp->b_flags &= ~B_DIRTY; vwakeup(bp); } if (bp->b_flags & B_ASYNC) brelse(bp); bp->b_flags &= ~B_ASYNC; bp->b_flags |= B_DONE; wakeup(bp); splx(x); }