% tools-slides.tex -- Slides for talk on AFS management tools.
% $Id: tools-slides.tex 45 2005-06-14 02:39:18Z eagle $
%
% Written by Russ Allbery <rra@stanford.edu>

\documentclass[landscape,semhelv]{seminar}
\usepackage{url}
\usepackage{hyperref}

\newpagestyle{mypagestyle}%
  {Stanford University \hfil \thedate \hfil \thepage}%
  {\hfil Russ Allbery (rra@stanford.edu) \hfil}
\pagestyle{mypagestyle}

\begin{document}

\begin{slide}
  \begin{center}
    {\Large AFS Cell Management} \\
    {\large Tools and Techniques}

    \vspace{1in}
    
    Russ Allbery \\
    June 13, 2005
  \end{center}
\end{slide}

\begin{slide}
  \section*{Introduction}

  \begin{itemize}
  \item Stanford has 3.9TB of data in AFS, in 57,485 volumes (as of June
    1st).  (1.6TB user home directories, 660GB data, 180GB groups and
    departments, 550GB classes).

  \item Administration when no migrations are in progress takes a few
    hours a week, mostly creating unusual volumes, moving volumes around,
    and upgrading servers.
    
  \item Tools presented here developed by Neil Crellin.
    
  \item \url{http://www.eyrie.org/~eagle/software/}

  \item \url{http://www.eyrie.org/~eagle/notes/afs/}
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Contents}

  \begin{itemize}
  \item Volume creation and management
  \item Managing ACLs
  \item Analysis and reporting
  \item Replicated volumes
  \item Monitoring with Nagios
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Creating Volumes}

  \begin{itemize}
  \item \texttt{volcreate} wrapper to balance where volumes are placed
  \item Mapping volume types to servers
  \item Size policy (2-4GB max for ease of moving volumes)
  \item Automated log volume creation with \texttt{volcreate-logs}
  \item Wrapper scripts for volume types (\texttt{create-user}, etc.)
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Managing Volumes}

  \begin{itemize}
  \item \texttt{partinfo} wrapper for usage information
  \item \texttt{mvto} utility for all volume moving
  \item Generating volume lists with \texttt{vos listvol}
  \item Checking for unreleased volumes with \texttt{unreleased}
  \item Balacing: why or why not, and possible overkill solutions
  \item \texttt{volnuke} wrapper to delete volumes
  \item Delegated volume creation ability (\texttt{remctl} and
    \texttt{afs-backend})
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Managing ACLs}

  \begin{itemize}
  \item One PTS group per course, department, or group volume
  \item Help desk tools to change PTS group membership (and volume quota)
  \item \texttt{fsr} wrapper for users
  \item Be careful of IP-based ACLs: subnets work best, better to use
    \texttt{kstart} and machine srvtabs over IP ACLs
  \item Log volume ACLS (lik) and the potential problems
  \item Think about \texttt{fs cleanacl}
  \item Unix directory owners and their special ACLs
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Tracking Volumes}

  \begin{itemize}
  \item Hierarchical naming scheme for volumes
  \item Mount point database (\texttt{mtpt}, \texttt{loadmtpt},
    \texttt{cleanmtpts})
  \item Nightly load into an Oracle database
  \item Nightly reports from the Oracle database (released volumes, high
    accesses, volumes moved, unreleased changes, missing mount points)
  \item Monthly usage reports
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Replicated Volumes}

  \begin{itemize}
  \item Replication helps when server is down, not when it's slow
  \item How many replicas do you want?  (2-4)
  \item \texttt{volcreate} and server geographic locations
  \item How RW and RO paths work: replicate the whole path
  \item Delegated volume release ability (\texttt{remctl} and
    \texttt{afs-backend})
  \item \texttt{frak} to find changes
  \item Restoring a RW from a RO with \texttt{vos dump} and \texttt{vos
      restore}
  \end{itemize}
\end{slide}

\begin{slide}
  \section*{Monitoring with Nagios}

  \begin{itemize}
  \item Basic tool: \texttt{bos status}
  \item Monitor VLDB servers with \texttt{udebug}: \break
    pt 7002, vl 7003, ka 7004
  \item Available disk with \texttt{vos partinfo}
  \item Connections waiting for thread (\texttt{rxdebug})
  \item AFS logs and kill -TSTP
  \item Nightly problem reports from Oracle database
  \end{itemize}
\end{slide}

\end{document}
