X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_log.c;h=afaee301b0ee1fa2e1c491f5bd2109ed277fabf1;hb=1bb7d6b5a82f1d9487fd44415484a368f7c87bed;hp=9bfb69e1e885d2fcf7891392c664b4110e4c9161;hpb=af7b83f9324a77ef9a9080044bf0461f444ca651;p=linux-2.6 diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9bfb69e1e8..afaee301b0 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -41,6 +41,7 @@ #include "xfs_inode.h" #include "xfs_rw.h" +kmem_zone_t *xfs_log_ticket_zone; #define xlog_write_adv_cnt(ptr, len, off, bytes) \ { (ptr) += (bytes); \ @@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log, xlog_ticket_t *ticket, int *continued_write, int *logoffsetp); -STATIC void xlog_state_put_ticket(xlog_t *log, - xlog_ticket_t *tic); STATIC int xlog_state_release_iclog(xlog_t *log, xlog_in_core_t *iclog); STATIC void xlog_state_switch_iclogs(xlog_t *log, @@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, /* local ticket functions */ -STATIC void xlog_state_ticket_alloc(xlog_t *log); STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, int unit_bytes, int count, @@ -252,6 +250,29 @@ xlog_grant_add_space(struct log *log, int bytes) xlog_grant_add_space_reserve(log, bytes); } +static void +xlog_tic_reset_res(xlog_ticket_t *tic) +{ + tic->t_res_num = 0; + tic->t_res_arr_sum = 0; + tic->t_res_num_ophdrs = 0; +} + +static void +xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) +{ + if (tic->t_res_num == XLOG_TIC_LEN_MAX) { + /* add to overflow and start again */ + tic->t_res_o_flow += tic->t_res_arr_sum; + tic->t_res_num = 0; + tic->t_res_arr_sum = 0; + } + + tic->t_res_arr[tic->t_res_num].r_len = len; + tic->t_res_arr[tic->t_res_num].r_type = type; + tic->t_res_arr_sum += len; + tic->t_res_num++; +} /* * NOTES: @@ -307,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp, */ xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); - xlog_state_put_ticket(log, ticket); + xlog_ticket_put(log, ticket); } else { xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); @@ -361,7 +382,27 @@ _xfs_log_force( return xlog_state_sync_all(log, flags, log_flushed); else return xlog_state_sync(log, lsn, flags, log_flushed); -} /* xfs_log_force */ +} /* _xfs_log_force */ + +/* + * Wrapper for _xfs_log_force(), to be used when caller doesn't care + * about errors or whether the log was flushed or not. This is the normal + * interface to use when trying to unpin items or move the log forward. + */ +void +xfs_log_force( + xfs_mount_t *mp, + xfs_lsn_t lsn, + uint flags) +{ + int error; + error = _xfs_log_force(mp, lsn, flags, NULL); + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " + "error %d returned.", error); + } +} + /* * Attaches a new iclog I/O completion callback routine during @@ -374,12 +415,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ void *iclog_hndl, /* iclog to hang callback off */ xfs_log_callback_t *cb) { - xlog_t *log = mp->m_log; xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; - int abortflg, spl; + int abortflg; - cb->cb_next = NULL; - spl = LOG_LOCK(log); + spin_lock(&iclog->ic_callback_lock); abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); if (!abortflg) { ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || @@ -388,7 +427,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ *(iclog->ic_callback_tail) = cb; iclog->ic_callback_tail = &(cb->cb_next); } - LOG_UNLOCK(log, spl); + spin_unlock(&iclog->ic_callback_lock); return abortflg; } /* xfs_log_notify */ @@ -448,6 +487,8 @@ xfs_log_reserve(xfs_mount_t *mp, /* may sleep if need to allocate more tickets */ internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, client, flags); + if (!internal_ticket) + return XFS_ERROR(ENOMEM); internal_ticket->t_trans_type = t_type; *ticket = internal_ticket; xlog_trace_loggrant(log, internal_ticket, @@ -475,41 +516,52 @@ xfs_log_reserve(xfs_mount_t *mp, * Return error or zero. */ int -xfs_log_mount(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks) +xfs_log_mount( + xfs_mount_t *mp, + xfs_buftarg_t *log_target, + xfs_daddr_t blk_offset, + int num_bblks) { + int error; + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); else { cmn_err(CE_NOTE, "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", mp->m_fsname); - ASSERT(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY); + ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + /* + * Initialize the AIL now we have a log. + */ + spin_lock_init(&mp->m_ail_lock); + error = xfs_trans_ail_init(mp); + if (error) { + cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); + goto error; + } + /* * skip log recovery on a norecovery mount. pretend it all * just worked. */ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - bhv_vfs_t *vfsp = XFS_MTOVFS(mp); - int error, readonly = (vfsp->vfs_flag & VFS_RDONLY); + int readonly = (mp->m_flags & XFS_MOUNT_RDONLY); if (readonly) - vfsp->vfs_flag &= ~VFS_RDONLY; + mp->m_flags &= ~XFS_MOUNT_RDONLY; error = xlog_recover(mp->m_log); if (readonly) - vfsp->vfs_flag |= VFS_RDONLY; + mp->m_flags |= XFS_MOUNT_RDONLY; if (error) { cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); - xlog_dealloc_log(mp->m_log); - return error; + goto error; } } @@ -518,6 +570,9 @@ xfs_log_mount(xfs_mount_t *mp, /* End mounting message in xfs_log_mount_finish */ return 0; +error: + xfs_log_unmount_dealloc(mp); + return error; } /* xfs_log_mount */ /* @@ -537,7 +592,7 @@ xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) error = xlog_recover_finish(mp->m_log, mfsi_flags); else { error = 0; - ASSERT(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY); + ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } return error; @@ -584,7 +639,6 @@ xfs_log_unmount_write(xfs_mount_t *mp) xfs_log_ticket_t tic = NULL; xfs_lsn_t lsn; int error; - SPLDECL(s); /* the data section must be 32 bit size aligned */ struct { @@ -597,10 +651,11 @@ xfs_log_unmount_write(xfs_mount_t *mp) * Don't write out unmount record on read-only mounts. * Or, if we are doing a forced umount (typically because of IO errors). */ - if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) + if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; - xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); + error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); + ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); #ifdef DEBUG first_iclog = iclog = log->l_iclog; @@ -637,29 +692,29 @@ xfs_log_unmount_write(xfs_mount_t *mp) } - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; - iclog->ic_refcnt++; - LOG_UNLOCK(log, s); + atomic_inc(&iclog->ic_refcnt); + spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); - (void) xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { sv_wait(&iclog->ic_forcesema, PMEM, &log->l_icloglock, s); } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } if (tic) { xlog_trace_loggrant(log, tic, "unmount rec"); xlog_ungrant_log_space(log, tic); - xlog_state_put_ticket(log, tic); + xlog_ticket_put(log, tic); } } else { /* @@ -675,15 +730,15 @@ xfs_log_unmount_write(xfs_mount_t *mp) * a file system that went into forced_shutdown as * the result of an unmount.. */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; - iclog->ic_refcnt++; - LOG_UNLOCK(log, s); + atomic_inc(&iclog->ic_refcnt); + spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); - (void) xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY @@ -692,19 +747,23 @@ xfs_log_unmount_write(xfs_mount_t *mp) sv_wait(&iclog->ic_forcesema, PMEM, &log->l_icloglock, s); } else { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } } - return 0; + return error; } /* xfs_log_unmount_write */ /* * Deallocate log structures for unmount/relocation. + * + * We need to stop the aild from running before we destroy + * and deallocate the log as the aild references the log. */ void xfs_log_unmount_dealloc(xfs_mount_t *mp) { + xfs_trans_ail_destroy(mp); xlog_dealloc_log(mp->m_log); } @@ -740,20 +799,18 @@ xfs_log_move_tail(xfs_mount_t *mp, xlog_ticket_t *tic; xlog_t *log = mp->m_log; int need_bytes, free_bytes, cycle, bytes; - SPLDECL(s); if (XLOG_FORCED_SHUTDOWN(log)) return; - ASSERT(!XFS_FORCED_SHUTDOWN(mp)); if (tail_lsn == 0) { /* needed since sync_lsn is 64 bits */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); tail_lsn = log->l_last_sync_lsn; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); /* Also an invalid lsn. 1 implies that we aren't passing in a valid * tail_lsn. @@ -802,7 +859,7 @@ xfs_log_move_tail(xfs_mount_t *mp, tic = tic->t_next; } while (tic != log->l_reserve_headq); } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); } /* xfs_log_move_tail */ /* @@ -814,14 +871,13 @@ xfs_log_move_tail(xfs_mount_t *mp, int xfs_log_need_covered(xfs_mount_t *mp) { - SPLDECL(s); int needed = 0, gen; xlog_t *log = mp->m_log; if (!xfs_fs_writable(mp)) return 0; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) && !xfs_trans_first_ail(mp, &gen) @@ -834,7 +890,7 @@ xfs_log_need_covered(xfs_mount_t *mp) } needed = 1; } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return needed; } @@ -859,17 +915,16 @@ xfs_lsn_t xlog_assign_tail_lsn(xfs_mount_t *mp) { xfs_lsn_t tail_lsn; - SPLDECL(s); xlog_t *log = mp->m_log; tail_lsn = xfs_trans_tail_ail(mp); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); if (tail_lsn != 0) { log->l_tail_lsn = tail_lsn; } else { tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return tail_lsn; } /* xlog_assign_tail_lsn */ @@ -889,7 +944,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) * the tail. The details of this case are described below, but the end * result is that we return the size of the log as the amount of space left. */ -int +STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes) { int free_bytes; @@ -948,6 +1003,19 @@ xlog_iodone(xfs_buf_t *bp) */ l = iclog->ic_log; + /* + * If the ordered flag has been removed by a lower + * layer, it means the underlyin device no longer supports + * barrier I/O. Warn loudly and turn off barriers. + */ + if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { + l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; + xfs_fs_cmn_err(CE_WARN, l->l_mp, + "xlog_iodone: Barriers are no longer supported" + " by device. Disabling barriers\n"); + xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp); + } + /* * Race to shutdown the filesystem if we see an error. */ @@ -1012,10 +1080,7 @@ xlog_bdstrat_cb(struct xfs_buf *bp) /* * Return size of each in-core log record buffer. * - * Low memory machines only get 2 16KB buffers. We don't want to waste - * memory here. However, all other machines get at least 2 32KB buffers. - * The number is hard coded because we don't care about the minimum - * memory size, just 32MB systems. + * All machines get 8 x 32KB buffers by default, unless tuned otherwise. * * If the filesystem blocksize is too large, we may need to choose a * larger size since the directory code currently logs entire blocks. @@ -1028,17 +1093,10 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp, int size; int xhdrs; - if (mp->m_logbufs <= 0) { - if (xfs_physmem <= btoc(128*1024*1024)) { - log->l_iclog_bufs = XLOG_MIN_ICLOGS; - } else if (xfs_physmem <= btoc(400*1024*1024)) { - log->l_iclog_bufs = XLOG_MED_ICLOGS; - } else { /* 256K with 32K bufs */ - log->l_iclog_bufs = XLOG_MAX_ICLOGS; - } - } else { + if (mp->m_logbufs <= 0) + log->l_iclog_bufs = XLOG_MAX_ICLOGS; + else log->l_iclog_bufs = mp->m_logbufs; - } /* * Buffer size passed in from mount system call. @@ -1051,7 +1109,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp, size >>= 1; } - if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { + if (xfs_sb_version_haslogv2(&mp->m_sb)) { /* # headers = size / 32K * one header holds cycles from 32K of data */ @@ -1069,18 +1127,9 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp, goto done; } - /* - * Special case machines that have less than 32MB of memory. - * All machines with more memory use 32KB buffers. - */ - if (xfs_physmem <= btoc(32*1024*1024)) { - /* Don't change; min configuration */ - log->l_iclog_size = XLOG_RECORD_BSIZE; /* 16k */ - log->l_iclog_size_log = XLOG_RECORD_BSHIFT; - } else { - log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; /* 32k */ - log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; - } + /* All machines use 32KB buffers by default. */ + log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; + log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; /* the default log size is 16k or 32k which is one header sector */ log->l_iclog_hsize = BBSIZE; @@ -1149,20 +1198,20 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; - ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, 1, 0); + log->l_tail_lsn = xlog_assign_lsn(1, 0); /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ log->l_grant_reserve_cycle = 1; log->l_grant_write_cycle = 1; - if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) { + if (xfs_sb_version_hassector(&mp->m_sb)) { log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); /* for larger sector sizes, must have v2 or external log */ ASSERT(log->l_sectbb_log == 0 || log->l_logBBstart == 0 || - XFS_SB_VERSION_HASLOGV2(&mp->m_sb)); + xfs_sb_version_haslogv2(&mp->m_sb)); ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); } log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; @@ -1177,10 +1226,9 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); log->l_xbuf = bp; - spinlock_init(&log->l_icloglock, "iclog"); - spinlock_init(&log->l_grant_lock, "grhead_iclog"); + spin_lock_init(&log->l_icloglock); + spin_lock_init(&log->l_grant_lock); initnsema(&log->l_flushsema, 0, "ic-flush"); - xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); @@ -1210,23 +1258,24 @@ xlog_alloc_log(xfs_mount_t *mp, XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; iclog->hic_data = bp->b_addr; - +#ifdef DEBUG log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); - +#endif head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); - INT_SET(head->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); - INT_SET(head->h_version, ARCH_CONVERT, - XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); - INT_SET(head->h_size, ARCH_CONVERT, log->l_iclog_size); + head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); + head->h_version = cpu_to_be32( + xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); + head->h_size = cpu_to_be32(log->l_iclog_size); /* new fields */ - INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); + head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); - iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; + atomic_set(&iclog->ic_refcnt, 0); + spin_lock_init(&iclog->ic_callback_lock); iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; @@ -1277,7 +1326,7 @@ xlog_commit_record(xfs_mount_t *mp, * pushes on an lsn which is further along in the log once we reach the high * water mark. In this manner, we would be creating a low water mark. */ -void +STATIC void xlog_grant_push_ail(xfs_mount_t *mp, int need_bytes) { @@ -1289,11 +1338,10 @@ xlog_grant_push_ail(xfs_mount_t *mp, int threshold_block; /* block in lsn we'd like to be at */ int threshold_cycle; /* lsn cycle we'd like to be at */ int free_threshold; - SPLDECL(s); ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, log->l_grant_reserve_bytes); @@ -1315,8 +1363,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, threshold_block -= log->l_logBBsize; threshold_cycle += 1; } - ASSIGN_ANY_LSN_HOST(threshold_lsn, threshold_cycle, - threshold_block); + threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); /* Don't pass in an lsn greater than the lsn of the last * log record known to be on disk. @@ -1324,7 +1371,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) threshold_lsn = log->l_last_sync_lsn; } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); /* * Get the transaction layer to kick the dirty buffers out to @@ -1362,23 +1409,22 @@ xlog_grant_push_ail(xfs_mount_t *mp, * is added immediately before calling bwrite(). */ -int +STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog) { xfs_caddr_t dptr; /* pointer to byte sized element */ xfs_buf_t *bp; - int i, ops; + int i; uint count; /* byte count of bwrite */ uint count_init; /* initial count before roundup */ int roundoff; /* roundoff to BB or stripe */ int split = 0; /* split write into two regions */ int error; - SPLDECL(s); - int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); + int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); XFS_STATS_INC(xs_log_writes); - ASSERT(iclog->ic_refcnt == 0); + ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* Add for LR header */ count_init = log->l_iclog_hsize + iclog->ic_offset; @@ -1399,30 +1445,26 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, roundoff); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); /* real byte length */ if (v2) { - INT_SET(iclog->ic_header.h_len, - ARCH_CONVERT, - iclog->ic_offset + roundoff); + iclog->ic_header.h_len = + cpu_to_be32(iclog->ic_offset + roundoff); } else { - INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset); + iclog->ic_header.h_len = + cpu_to_be32(iclog->ic_offset); } - /* put ops count in correct order */ - ops = iclog->ic_header.h_num_logops; - INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); - bp = iclog->ic_bp; ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); + XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1485,10 +1527,10 @@ xlog_sync(xlog_t *log, * a new cycle. Watch out for the header magic number * case, though. */ - for (i=0; il_iclog; for (i=0; il_iclog_bufs; i++) { sv_destroy(&iclog->ic_forcesema); @@ -1537,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log) spinlock_destroy(&log->l_icloglock); spinlock_destroy(&log->l_grant_lock); - /* XXXsup take a look at this again. */ - if ((log->l_ticket_cnt != log->l_ticket_tcnt) && - !XLOG_FORCED_SHUTDOWN(log)) { - xfs_fs_cmn_err(CE_WARN, log->l_mp, - "xlog_dealloc_log: (cnt: %d, total: %d)", - log->l_ticket_cnt, log->l_ticket_tcnt); - /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ - - } else { - tic = log->l_unmount_free; - while (tic) { - next_tic = tic->t_next; - kmem_free(tic, NBPP); - tic = next_tic; - } - } xfs_buf_free(log->l_xbuf); #ifdef XFS_LOG_TRACE if (log->l_trace != NULL) { @@ -1576,14 +1600,12 @@ xlog_state_finish_copy(xlog_t *log, int record_cnt, int copy_bytes) { - SPLDECL(s); - - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); - iclog->ic_header.h_num_logops += record_cnt; + be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt); iclog->ic_offset += copy_bytes; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_finish_copy */ @@ -1736,7 +1758,7 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) * we don't update ic_offset until the end when we know exactly how many * bytes have been written out. */ -int +STATIC int xlog_write(xfs_mount_t * mp, xfs_log_iovec_t reg[], int nentries, @@ -1771,14 +1793,14 @@ xlog_write(xfs_mount_t * mp, len = 0; if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */ len += sizeof(xlog_op_header_t); - XLOG_TIC_ADD_OPHDR(ticket); + ticket->t_res_num_ophdrs++; } for (index = 0; index < nentries; index++) { len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ - XLOG_TIC_ADD_OPHDR(ticket); + ticket->t_res_num_ophdrs++; len += reg[index].i_len; - XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type); + xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type); } contwr = *start_lsn = 0; @@ -1807,7 +1829,7 @@ xlog_write(xfs_mount_t * mp, /* start_lsn is the first lsn written to. That's all we need. */ if (! *start_lsn) - *start_lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); + *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); /* This loop writes out as many regions as can fit in the amount * of space which was allocated by xlog_state_get_iclog_space(). @@ -1823,7 +1845,7 @@ xlog_write(xfs_mount_t * mp, */ if (ticket->t_flags & XLOG_TIC_INITED) { logop_head = (xlog_op_header_t *)ptr; - INT_SET(logop_head->oh_tid, ARCH_CONVERT, ticket->t_tid); + logop_head->oh_tid = cpu_to_be32(ticket->t_tid); logop_head->oh_clientid = ticket->t_clientid; logop_head->oh_len = 0; logop_head->oh_flags = XLOG_START_TRANS; @@ -1837,7 +1859,7 @@ xlog_write(xfs_mount_t * mp, /* Copy log operation header directly into data section */ logop_head = (xlog_op_header_t *)ptr; - INT_SET(logop_head->oh_tid, ARCH_CONVERT, ticket->t_tid); + logop_head->oh_tid = cpu_to_be32(ticket->t_tid); logop_head->oh_clientid = ticket->t_clientid; logop_head->oh_res2 = 0; @@ -1872,13 +1894,14 @@ xlog_write(xfs_mount_t * mp, copy_off = partial_copy_len; if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ - INT_SET(logop_head->oh_len, ARCH_CONVERT, copy_len = need_copy); + copy_len = need_copy; + logop_head->oh_len = cpu_to_be32(copy_len); if (partial_copy) logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); partial_copy_len = partial_copy = 0; } else { /* partial write */ copy_len = iclog->ic_size - log_offset; - INT_SET(logop_head->oh_len, ARCH_CONVERT, copy_len); + logop_head->oh_len = cpu_to_be32(copy_len); logop_head->oh_flags |= XLOG_CONTINUE_TRANS; if (partial_copy) logop_head->oh_flags |= XLOG_WAS_CONT_TRANS; @@ -1887,7 +1910,7 @@ xlog_write(xfs_mount_t * mp, len += sizeof(xlog_op_header_t); /* from splitting of region */ /* account for new log op header */ ticket->t_curr_res -= sizeof(xlog_op_header_t); - XLOG_TIC_ADD_OPHDR(ticket); + ticket->t_res_num_ophdrs++; } xlog_verify_dest_ptr(log, ptr); @@ -1966,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log) if (iclog->ic_state == XLOG_STATE_DIRTY) { iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_offset = 0; - iclog->ic_callback = NULL; /* don't need to free */ + ASSERT(iclog->ic_callback == NULL); /* * If the number of ops in this iclog indicate it just * contains the dummy transaction, we can @@ -1976,7 +1999,8 @@ xlog_state_clean_log(xlog_t *log) * We don't need to cover the dummy. */ if (!changed && - (INT_GET(iclog->ic_header.h_num_logops, ARCH_CONVERT) == XLOG_COVER_OPS)) { + (be32_to_cpu(iclog->ic_header.h_num_logops) == + XLOG_COVER_OPS)) { changed = 1; } else { /* @@ -2044,7 +2068,7 @@ xlog_get_lowest_lsn( lowest_lsn = 0; do { if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { - lsn = INT_GET(lsn_log->ic_header.h_lsn, ARCH_CONVERT); + lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); if ((lsn && !lowest_lsn) || (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { lowest_lsn = lsn; @@ -2073,9 +2097,8 @@ xlog_state_do_callback( int funcdidcallbacks; /* flag: function did callbacks */ int repeats; /* for issuing console warnings if * looping too many times */ - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); first_iclog = iclog = log->l_iclog; ioerrors = 0; funcdidcallbacks = 0; @@ -2120,7 +2143,7 @@ xlog_state_do_callback( * to DO_CALLBACK, we will not process it when * we retry since a previous iclog is in the * CALLBACK and the state cannot change since - * we are holding the LOG_LOCK. + * we are holding the l_icloglock. */ if (!(iclog->ic_state & (XLOG_STATE_DONE_SYNC | @@ -2146,11 +2169,9 @@ xlog_state_do_callback( */ lowest_lsn = xlog_get_lowest_lsn(log); - if (lowest_lsn && ( - XFS_LSN_CMP( - lowest_lsn, - INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) - )<0)) { + if (lowest_lsn && + XFS_LSN_CMP(lowest_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { iclog = iclog->ic_next; continue; /* Leave this iclog for * another thread */ @@ -2158,51 +2179,53 @@ xlog_state_do_callback( iclog->ic_state = XLOG_STATE_CALLBACK; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* l_last_sync_lsn field protected by - * GRANT_LOCK. Don't worry about iclog's lsn. + * l_grant_lock. Don't worry about iclog's lsn. * No one else can be here except us. */ - s = GRANT_LOCK(log); - ASSERT(XFS_LSN_CMP( - log->l_last_sync_lsn, - INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) - )<=0); - log->l_last_sync_lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); - GRANT_UNLOCK(log, s); + spin_lock(&log->l_grant_lock); + ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); + log->l_last_sync_lsn = + be64_to_cpu(iclog->ic_header.h_lsn); + spin_unlock(&log->l_grant_lock); - /* - * Keep processing entries in the callback list - * until we come around and it is empty. We - * need to atomically see that the list is - * empty and change the state to DIRTY so that - * we don't miss any more callbacks being added. - */ - s = LOG_LOCK(log); } else { + spin_unlock(&log->l_icloglock); ioerrors++; } - cb = iclog->ic_callback; + /* + * Keep processing entries in the callback list until + * we come around and it is empty. We need to + * atomically see that the list is empty and change the + * state to DIRTY so that we don't miss any more + * callbacks being added. + */ + spin_lock(&iclog->ic_callback_lock); + cb = iclog->ic_callback; while (cb) { iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback = NULL; - LOG_UNLOCK(log, s); + spin_unlock(&iclog->ic_callback_lock); /* perform callbacks in the order given */ for (; cb; cb = cb_next) { cb_next = cb->cb_next; cb->cb_func(cb->cb_arg, aborted); } - s = LOG_LOCK(log); + spin_lock(&iclog->ic_callback_lock); cb = iclog->ic_callback; } loopdidcallbacks++; funcdidcallbacks++; + spin_lock(&log->l_icloglock); ASSERT(iclog->ic_callback == NULL); + spin_unlock(&iclog->ic_callback_lock); if (!(iclog->ic_state & XLOG_STATE_IOERROR)) iclog->ic_state = XLOG_STATE_DIRTY; @@ -2223,7 +2246,7 @@ xlog_state_do_callback( repeats = 0; xfs_fs_cmn_err(CE_WARN, log->l_mp, "%s: possible infinite loop (%d iterations)", - __FUNCTION__, flushcnt); + __func__, flushcnt); } } while (!ioerrors && loopdidcallbacks); @@ -2242,7 +2265,7 @@ xlog_state_do_callback( * * SYNCING - i/o completion will go through logs * DONE_SYNC - interrupt thread should be waiting for - * LOG_LOCK + * l_icloglock * IOERROR - give up hope all ye who enter here */ if (iclog->ic_state == XLOG_STATE_WANT_SYNC || @@ -2260,7 +2283,7 @@ xlog_state_do_callback( flushcnt = log->l_flushcnt; log->l_flushcnt = 0; } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); while (flushcnt--) vsema(&log->l_flushsema); } /* xlog_state_do_callback */ @@ -2280,19 +2303,18 @@ xlog_state_do_callback( * global state machine log lock. Assume that the calls to cvsema won't * take a long time. At least we know it won't sleep. */ -void +STATIC void xlog_state_done_syncing( xlog_in_core_t *iclog, int aborted) { xlog_t *log = iclog->ic_log; - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || iclog->ic_state == XLOG_STATE_IOERROR); - ASSERT(iclog->ic_refcnt == 0); + ASSERT(atomic_read(&iclog->ic_refcnt) == 0); ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); @@ -2304,7 +2326,7 @@ xlog_state_done_syncing( */ if (iclog->ic_state != XLOG_STATE_IOERROR) { if (--iclog->ic_bwritecnt == 1) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return; } iclog->ic_state = XLOG_STATE_DONE_SYNC; @@ -2316,7 +2338,7 @@ xlog_state_done_syncing( * I/O, the others get to wait for the result. */ sv_broadcast(&iclog->ic_writesema); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2341,7 +2363,7 @@ xlog_state_done_syncing( * needs to be incremented, depending on the amount of data which * is copied. */ -int +STATIC int xlog_state_get_iclog_space(xlog_t *log, int len, xlog_in_core_t **iclogp, @@ -2349,23 +2371,22 @@ xlog_state_get_iclog_space(xlog_t *log, int *continued_write, int *logoffsetp) { - SPLDECL(s); int log_offset; xlog_rec_header_t *head; xlog_in_core_t *iclog; int error; restart: - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (XLOG_FORCED_SHUTDOWN(log)) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } iclog = log->l_iclog; if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { log->l_flushcnt++; - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); XFS_STATS_INC(xs_log_noiclogs); /* Ensure that log writes happen */ @@ -2375,7 +2396,7 @@ restart: ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); head = &iclog->ic_header; - iclog->ic_refcnt++; /* prevents sync */ + atomic_inc(&iclog->ic_refcnt); /* prevents sync */ log_offset = iclog->ic_offset; /* On the 1st write to an iclog, figure out lsn. This works @@ -2385,11 +2406,12 @@ restart: */ if (log_offset == 0) { ticket->t_curr_res -= log->l_iclog_hsize; - XLOG_TIC_ADD_REGION(ticket, + xlog_tic_add_region(ticket, log->l_iclog_hsize, XLOG_REG_TYPE_LRHEADER); - INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); - ASSIGN_LSN(head->h_lsn, log); + head->h_cycle = cpu_to_be32(log->l_curr_cycle); + head->h_lsn = cpu_to_be64( + xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); ASSERT(log->l_curr_block >= 0); } @@ -2406,13 +2428,13 @@ restart: xlog_state_switch_iclogs(log, iclog, iclog->ic_size); /* If I'm the only one writing to this iclog, sync it to disk */ - if (iclog->ic_refcnt == 1) { - LOG_UNLOCK(log, s); + if (atomic_read(&iclog->ic_refcnt) == 1) { + spin_unlock(&log->l_icloglock); if ((error = xlog_state_release_iclog(log, iclog))) return error; } else { - iclog->ic_refcnt--; - LOG_UNLOCK(log, s); + atomic_dec(&iclog->ic_refcnt); + spin_unlock(&log->l_icloglock); } goto restart; } @@ -2433,7 +2455,7 @@ restart: *iclogp = iclog; ASSERT(iclog->ic_offset <= iclog->ic_size); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); *logoffsetp = log_offset; return 0; @@ -2451,7 +2473,6 @@ xlog_grant_log_space(xlog_t *log, { int free_bytes; int need_bytes; - SPLDECL(s); #ifdef DEBUG xfs_lsn_t tail_lsn; #endif @@ -2463,7 +2484,7 @@ xlog_grant_log_space(xlog_t *log, #endif /* Is there space or do we need to sleep? */ - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); /* something is already sleeping; insert new transaction at end */ @@ -2486,7 +2507,7 @@ xlog_grant_log_space(xlog_t *log, */ xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 1"); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); } if (tic->t_flags & XFS_LOG_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_ocnt; @@ -2508,14 +2529,14 @@ redo: sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 2"); xlog_grant_push_ail(log->l_mp, need_bytes); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); @@ -2537,7 +2558,7 @@ redo: #endif xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return 0; error_return: @@ -2551,7 +2572,7 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_grant_log_space */ @@ -2565,7 +2586,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log, xlog_ticket_t *tic) { - SPLDECL(s); int free_bytes, need_bytes; xlog_ticket_t *ntic; #ifdef DEBUG @@ -2573,7 +2593,7 @@ xlog_regrant_write_log_space(xlog_t *log, #endif tic->t_curr_res = tic->t_unit_res; - XLOG_TIC_RESET_RES(tic); + xlog_tic_reset_res(tic); if (tic->t_cnt > 0) return 0; @@ -2583,7 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log, panic("regrant Recovery problem"); #endif - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); if (XLOG_FORCED_SHUTDOWN(log)) @@ -2622,14 +2642,14 @@ xlog_regrant_write_log_space(xlog_t *log, /* If we're shutting down, this tic is already * off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 1"); xlog_grant_push_ail(log->l_mp, tic->t_unit_res); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); } } @@ -2649,14 +2669,14 @@ redo: /* If we're shutting down, this tic is already off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto error_return; } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 2"); xlog_grant_push_ail(log->l_mp, need_bytes); - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_write_headq, tic); @@ -2673,7 +2693,7 @@ redo: xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return 0; @@ -2688,7 +2708,7 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_regrant_write_log_space */ @@ -2704,24 +2724,22 @@ STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket) { - SPLDECL(s); - xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: enter"); if (ticket->t_cnt > 0) ticket->t_cnt--; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_grant_sub_space(log, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; - XLOG_TIC_RESET_RES(ticket); + xlog_tic_reset_res(ticket); xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: sub current res"); xlog_verify_grant_head(log, 1); /* just return if we still have some of the pre-reserved space */ if (ticket->t_cnt > 0) { - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); return; } @@ -2729,9 +2747,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: exit"); xlog_verify_grant_head(log, 0); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); ticket->t_curr_res = ticket->t_unit_res; - XLOG_TIC_RESET_RES(ticket); + xlog_tic_reset_res(ticket); } /* xlog_regrant_reserve_log_space */ @@ -2753,12 +2771,10 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, xlog_ticket_t *ticket) { - SPLDECL(s); - if (ticket->t_cnt > 0) ticket->t_cnt--; - s = GRANT_LOCK(log); + spin_lock(&log->l_grant_lock); xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); xlog_grant_sub_space(log, ticket->t_curr_res); @@ -2775,25 +2791,11 @@ xlog_ungrant_log_space(xlog_t *log, xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); xlog_verify_grant_head(log, 1); - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); xfs_log_move_tail(log->l_mp, 1); } /* xlog_ungrant_log_space */ -/* - * Atomically put back used ticket. - */ -void -xlog_state_put_ticket(xlog_t *log, - xlog_ticket_t *tic) -{ - unsigned long s; - - s = LOG_LOCK(log); - xlog_ticket_put(log, tic); - LOG_UNLOCK(log, s); -} /* xlog_state_put_ticket */ - /* * Flush iclog to disk if this is the last reference to the given iclog and * the WANT_SYNC bit is set. @@ -2803,36 +2805,37 @@ xlog_state_put_ticket(xlog_t *log, * * */ -int -xlog_state_release_iclog(xlog_t *log, - xlog_in_core_t *iclog) +STATIC int +xlog_state_release_iclog( + xlog_t *log, + xlog_in_core_t *iclog) { - SPLDECL(s); int sync = 0; /* do we sync? */ - xlog_assign_tail_lsn(log->l_mp); + if (iclog->ic_state & XLOG_STATE_IOERROR) + return XFS_ERROR(EIO); - s = LOG_LOCK(log); + ASSERT(atomic_read(&iclog->ic_refcnt) > 0); + if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) + return 0; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } - - ASSERT(iclog->ic_refcnt > 0); ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_WANT_SYNC); - if (--iclog->ic_refcnt == 0 && - iclog->ic_state == XLOG_STATE_WANT_SYNC) { + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { + /* update tail before writing to iclog */ + xlog_assign_tail_lsn(log->l_mp); sync++; iclog->ic_state = XLOG_STATE_SYNCING; - INT_SET(iclog->ic_header.h_tail_lsn, ARCH_CONVERT, log->l_tail_lsn); + iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); /* cycle incremented when incrementing curr_block */ } - - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* * We let the log lock go, so it's possible that we hit a log I/O @@ -2841,11 +2844,9 @@ xlog_state_release_iclog(xlog_t *log, * this iclog has consistent data, so we ignore IOERROR * flags after this point. */ - if (sync) { + if (sync) return xlog_sync(log, iclog); - } return 0; - } /* xlog_state_release_iclog */ @@ -2865,7 +2866,7 @@ xlog_state_switch_iclogs(xlog_t *log, if (!eventual_size) eventual_size = iclog->ic_offset; iclog->ic_state = XLOG_STATE_WANT_SYNC; - INT_SET(iclog->ic_header.h_prev_block, ARCH_CONVERT, log->l_prev_block); + iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block); log->l_prev_block = log->l_curr_block; log->l_prev_cycle = log->l_curr_cycle; @@ -2873,7 +2874,7 @@ xlog_state_switch_iclogs(xlog_t *log, log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize); /* Round up to next log-sunit */ - if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && log->l_mp->m_sb.sb_logsunit > 1) { __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit); log->l_curr_block = roundup(log->l_curr_block, sunit_bb); @@ -2923,13 +2924,12 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) { xlog_in_core_t *iclog; xfs_lsn_t lsn; - SPLDECL(s); - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } @@ -2946,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) * previous iclog and go to sleep. */ if (iclog->ic_state == XLOG_STATE_DIRTY || - (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { + (atomic_read(&iclog->ic_refcnt) == 0 + && iclog->ic_offset == 0)) { iclog = iclog->ic_prev; if (iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY) @@ -2954,23 +2955,23 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) else goto maybe_sleep; } else { - if (iclog->ic_refcnt == 0) { + if (atomic_read(&iclog->ic_refcnt) == 0) { /* We are the only one with access to this * iclog. Flush it out now. There should * be a roundoff of zero to show that someone * has already taken care of the roundoff from * the previous sync. */ - iclog->ic_refcnt++; - lsn = INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT); + atomic_inc(&iclog->ic_refcnt); + lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); *log_flushed = 1; - s = LOG_LOCK(log); - if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn && + spin_lock(&log->l_icloglock); + if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && iclog->ic_state != XLOG_STATE_DIRTY) goto maybe_sleep; else @@ -2995,12 +2996,12 @@ maybe_sleep: if (flags & XFS_LOG_SYNC) { /* * We must check if we're shutting down here, before - * we wait, while we're holding the LOG_LOCK. + * we wait, while we're holding the l_icloglock. * Then we check again after waking up, in case our * sleep was disturbed by a bad news. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); @@ -3017,7 +3018,7 @@ maybe_sleep: } else { no_sleep: - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } return 0; } /* xlog_state_sync_all */ @@ -3035,7 +3036,7 @@ no_sleep: * If filesystem activity goes to zero, the iclog will get flushed only by * bdflush(). */ -int +STATIC int xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags, @@ -3043,26 +3044,24 @@ xlog_state_sync(xlog_t *log, { xlog_in_core_t *iclog; int already_slept = 0; - SPLDECL(s); - try_again: - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } do { - if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) != lsn) { - iclog = iclog->ic_next; - continue; + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { + iclog = iclog->ic_next; + continue; } if (iclog->ic_state == XLOG_STATE_DIRTY) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return 0; } @@ -3095,13 +3094,13 @@ try_again: already_slept = 1; goto try_again; } else { - iclog->ic_refcnt++; + atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); *log_flushed = 1; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); } } @@ -3113,7 +3112,7 @@ try_again: * gotten a log write error. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); @@ -3127,13 +3126,13 @@ try_again: return XFS_ERROR(EIO); *log_flushed = 1; } else { /* just return */ - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } return 0; } while (iclog != log->l_iclog); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); return 0; } /* xlog_state_sync */ @@ -3142,12 +3141,10 @@ try_again: * Called when we want to mark the current iclog as being ready to sync to * disk. */ -void +STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - SPLDECL(s); - - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); @@ -3156,7 +3153,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); } - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } /* xlog_state_want_sync */ @@ -3169,95 +3166,21 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) */ /* - * Algorithm doesn't take into account page size. ;-( - */ -STATIC void -xlog_state_ticket_alloc(xlog_t *log) -{ - xlog_ticket_t *t_list; - xlog_ticket_t *next; - xfs_caddr_t buf; - uint i = (NBPP / sizeof(xlog_ticket_t)) - 2; - SPLDECL(s); - - /* - * The kmem_zalloc may sleep, so we shouldn't be holding the - * global lock. XXXmiken: may want to use zone allocator. - */ - buf = (xfs_caddr_t) kmem_zalloc(NBPP, KM_SLEEP); - - s = LOG_LOCK(log); - - /* Attach 1st ticket to Q, so we can keep track of allocated memory */ - t_list = (xlog_ticket_t *)buf; - t_list->t_next = log->l_unmount_free; - log->l_unmount_free = t_list++; - log->l_ticket_cnt++; - log->l_ticket_tcnt++; - - /* Next ticket becomes first ticket attached to ticket free list */ - if (log->l_freelist != NULL) { - ASSERT(log->l_tail != NULL); - log->l_tail->t_next = t_list; - } else { - log->l_freelist = t_list; - } - log->l_ticket_cnt++; - log->l_ticket_tcnt++; - - /* Cycle through rest of alloc'ed memory, building up free Q */ - for ( ; i > 0; i--) { - next = t_list + 1; - t_list->t_next = next; - t_list = next; - log->l_ticket_cnt++; - log->l_ticket_tcnt++; - } - t_list->t_next = NULL; - log->l_tail = t_list; - LOG_UNLOCK(log, s); -} /* xlog_state_ticket_alloc */ - - -/* - * Put ticket into free list - * - * Assumption: log lock is held around this call. + * Free a used ticket. */ STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket) { sv_destroy(&ticket->t_sema); - - /* - * Don't think caching will make that much difference. It's - * more important to make debug easier. - */ -#if 0 - /* real code will want to use LIFO for caching */ - ticket->t_next = log->l_freelist; - log->l_freelist = ticket; - /* no need to clear fields */ -#else - /* When we debug, it is easier if tickets are cycled */ - ticket->t_next = NULL; - if (log->l_tail) { - log->l_tail->t_next = ticket; - } else { - ASSERT(log->l_freelist == NULL); - log->l_freelist = ticket; - } - log->l_tail = ticket; -#endif /* DEBUG */ - log->l_ticket_cnt++; + kmem_zone_free(xfs_log_ticket_zone, ticket); } /* xlog_ticket_put */ /* - * Grab ticket off freelist or allocation some more + * Allocate and initialise a new log ticket. */ -xlog_ticket_t * +STATIC xlog_ticket_t * xlog_ticket_get(xlog_t *log, int unit_bytes, int cnt, @@ -3266,23 +3189,10 @@ xlog_ticket_get(xlog_t *log, { xlog_ticket_t *tic; uint num_headers; - SPLDECL(s); - alloc: - if (log->l_freelist == NULL) - xlog_state_ticket_alloc(log); /* potentially sleep */ - - s = LOG_LOCK(log); - if (log->l_freelist == NULL) { - LOG_UNLOCK(log, s); - goto alloc; - } - tic = log->l_freelist; - log->l_freelist = tic->t_next; - if (log->l_freelist == NULL) - log->l_tail = NULL; - log->l_ticket_cnt--; - LOG_UNLOCK(log, s); + tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); + if (!tic) + return NULL; /* * Permanent reservations have up to 'cnt'-1 active log operations @@ -3333,7 +3243,7 @@ xlog_ticket_get(xlog_t *log, unit_bytes += sizeof(xlog_op_header_t) * num_headers; /* for roundoff padding for transaction data and one for commit record */ - if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && log->l_mp->m_sb.sb_logsunit > 1) { /* log su roundoff */ unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; @@ -3354,7 +3264,7 @@ xlog_ticket_get(xlog_t *log, tic->t_flags |= XLOG_TIC_PERM_RESERV; sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); - XLOG_TIC_RESET_RES(tic); + xlog_tic_reset_res(tic); return tic; } /* xlog_ticket_get */ @@ -3457,10 +3367,9 @@ xlog_verify_iclog(xlog_t *log, __uint8_t clientid; int len, i, j, k, op_len; int idx; - SPLDECL(s); /* check validity of iclog pointers */ - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); icptr = log->l_iclog; for (i=0; i < log->l_iclog_bufs; i++) { if (icptr == NULL) @@ -3469,21 +3378,21 @@ xlog_verify_iclog(xlog_t *log, } if (icptr != log->l_iclog) xlog_panic("xlog_verify_iclog: corrupt iclog ring"); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); /* check log magic numbers */ - ptr = (xfs_caddr_t) &(iclog->ic_header); - if (INT_GET(*(uint *)ptr, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) + if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) xlog_panic("xlog_verify_iclog: invalid magic num"); - for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&(iclog->ic_header))+count; + ptr = (xfs_caddr_t) &iclog->ic_header; + for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (INT_GET(*(uint *)ptr, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) + if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) xlog_panic("xlog_verify_iclog: unexpected magic num"); } /* check fields */ - len = INT_GET(iclog->ic_header.h_num_logops, ARCH_CONVERT); + len = be32_to_cpu(iclog->ic_header.h_num_logops); ptr = iclog->ic_datap; base_ptr = ptr; ophead = (xlog_op_header_t *)ptr; @@ -3501,9 +3410,11 @@ xlog_verify_iclog(xlog_t *log, if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - clientid = GET_CLIENT_ID(xhdr[j].hic_xheader.xh_cycle_data[k], ARCH_CONVERT); + clientid = xlog_get_client_id( + xhdr[j].hic_xheader.xh_cycle_data[k]); } else { - clientid = GET_CLIENT_ID(iclog->ic_header.h_cycle_data[idx], ARCH_CONVERT); + clientid = xlog_get_client_id( + iclog->ic_header.h_cycle_data[idx]); } } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) @@ -3515,16 +3426,16 @@ xlog_verify_iclog(xlog_t *log, field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); if (syncing == B_FALSE || (field_offset & 0x1ff)) { - op_len = INT_GET(ophead->oh_len, ARCH_CONVERT); + op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((__psint_t)&ophead->oh_len - (__psint_t)iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - op_len = INT_GET(xhdr[j].hic_xheader.xh_cycle_data[k], ARCH_CONVERT); + op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]); } else { - op_len = INT_GET(iclog->ic_header.h_cycle_data[idx], ARCH_CONVERT); + op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]); } } ptr += sizeof(xlog_op_header_t) + op_len; @@ -3533,7 +3444,7 @@ xlog_verify_iclog(xlog_t *log, #endif /* - * Mark all iclogs IOERROR. LOG_LOCK is held by the caller. + * Mark all iclogs IOERROR. l_icloglock is held by the caller. */ STATIC int xlog_state_ioerror( @@ -3581,8 +3492,6 @@ xfs_log_force_umount( xlog_t *log; int retval; int dummy; - SPLDECL(s); - SPLDECL(s2); log = mp->m_log; @@ -3611,8 +3520,8 @@ xfs_log_force_umount( * before we mark the filesystem SHUTDOWN and wake * everybody up to tell the bad news. */ - s = GRANT_LOCK(log); - s2 = LOG_LOCK(log); + spin_lock(&log->l_icloglock); + spin_lock(&log->l_grant_lock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; XFS_BUF_DONE(mp->m_sb_bp); /* @@ -3628,7 +3537,7 @@ xfs_log_force_umount( */ if (logerror) retval = xlog_state_ioerror(log); - LOG_UNLOCK(log, s2); + spin_unlock(&log->l_icloglock); /* * We don't want anybody waiting for log reservations @@ -3651,7 +3560,7 @@ xfs_log_force_umount( tic = tic->t_next; } while (tic != log->l_write_headq); } - GRANT_UNLOCK(log, s); + spin_unlock(&log->l_grant_lock); if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); @@ -3660,9 +3569,9 @@ xfs_log_force_umount( * log down completely. */ xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); - s2 = LOG_LOCK(log); + spin_lock(&log->l_icloglock); retval = xlog_state_ioerror(log); - LOG_UNLOCK(log, s2); + spin_unlock(&log->l_icloglock); } /* * Wake up everybody waiting on xfs_log_force. @@ -3675,13 +3584,13 @@ xfs_log_force_umount( { xlog_in_core_t *iclog; - s = LOG_LOCK(log); + spin_lock(&log->l_icloglock); iclog = log->l_iclog; do { ASSERT(iclog->ic_callback == 0); iclog = iclog->ic_next; } while (iclog != log->l_iclog); - LOG_UNLOCK(log, s); + spin_unlock(&log->l_icloglock); } #endif /* return non-zero if log IOERROR transition had already happened */