No Account Yet?

You are not logged in.

Add to: JBookmarks Add to: Facebook Add to: Windows Live Add to: Digg Add to: Del.icoi.us Add to: Reddit Add to: StumbleUpon Add to: Slashdot Add to: Netscape Add to: Furl Add to: Yahoo Add to: Blogmarks Add to: Technorati Add to: Newsvine Add to: Google Information
idoit - Run commands based on input records E-mail
Programming HowTo's - Shell HowTo's
Written by Christopher Hahn   
Monday, 28 April 2008 08:32
#!/bin/bash
# Execute commands locally or remotely based on fields in records.

# Only supports single-character record separators, but field delimiters may be
# many characters (and that's probably a good idea).

# Author: Christopher Hahn, Apr 2008

function usage () {
local rc=""
echo "usage:
$(basename "$0") [-lnpHP] [<input options>] [<parallel options>] [<cmd options>] <cmd> ...

Run commands based on input records. Run me through perldoc for (lots) more
info.

<input options>
-d <delim> The field delimiter. Default is tab.
-k <hostfield> The field number to use as the hostname. Default is to use
fields named hostlan or host.
-s <sep> The record separator. Must be single-character. Default is
newline.
-H First record is NOT column names. Disallows \$colname
notation.

<parallel options>
-n <fanout> Maximum number of simultaneous processes. Default is 32
-p Don't fanout at all-- just run in serial.
-P Don't limit fanout. Same as -n 0

<cmd options>
-c <cmd2> Run <cmd2> before <cmd>.
-e <rcmd> Use <rcmd> to run commands remotely. Implies -r.
-f <filter> Run commands' output through <filter>
Default is to prepend \${hostlan:-\$host}:
-l Run commands locally. Default is remote.
-r Run commands remotely. This is the default.
Remote host is from the environment or from input records.
-u <user> Run commands as <user> (locally uses sudo)
" >&2
exit "${rc:-1}"
}


# defaults...
sep=\n'
delim=\t'
set | grep -E -q '^filter_prefix=' \
|| filter_prefix='${hostlan:-$host}'
set | grep -E -q '^preamble=' \
|| preamble='******************* ${hostlan:-$host} *******************'
set | grep -E -q '^postamble=' \
|| postamble=''

have_headers=1
cmd=
filter_cmd=
user=
remote=1
in_parallel=1
verbose=
hostfield=
declare -i fanout=32

export filter_prefix
export cmd

# inputs...
OPTIND=0
while getopts 'c:d:e:f:hk:ln:prs:u:vHP' opt; do
case "$opt" in
c) cmd="$cmd"\n'"$OPTARG" ;;
d) delim="$OPTARG" ;;
e) rcmd="$OPTARG" remote=1 ;;
f) filter_cmd="$OPTARG" ;;
h) usage 0 ;;
k) hostfield="$OPTARG" ;;
l) remote= ;;
n) fanout="$OPTARG" in_parallel=1 ;;
p) in_parallel= ;;
r) remote=1 ;;
s) sep="$OPTARG" ;;
u) user="$OPTARG" ;;
v) verbose=1 ;;
H) have_headers= ;;
P) fanout=0 in_parallel=1 ;;
esac
done

shift $((OPTIND-1))
OPTIND=0

[[ "$*" ]] && cmd="$cmd
$*"

if [[ "$remote" ]]; then
: "${user:=root}" "${rcmd:="ssh -T -o BatchMode=yes"}"
rcmd="$rcmd -l $user"
else
if [[ "$user" ]]; then
: "${rcmd:=sudo}"
rcmd="$rcmd -u $user"
fi
fi

readonly cmd

# convince me to keep on living...
[[ 1 = 1 ]] \
&& [[ "$cmd" ]] \
&& [[ "$delim" && "$sep" ]] \
&& [[ ${#sep} -eq 1 ]] \
&& [[ "$rcmd" || -z "$remote" ]] \
|| usage 1




# execs $cmd after performing a few per-process preliminaries
function run_cmd () (
[[ "$remote" && "$rcmd" ]] && rhost="${hostlan:-$host}"
if [[ "$remote" || "$rcmd" ]]; then
todo="eval export $(fix_var vars)
set -- $(
for i in "${fields[@]}"; do
echo -n " $(fix_var i)"
done
)
$cmd
"
todo="$(fix_var todo \')"
[[ "$verbose" ]] && set -x
exec $rcmd $rhost "exec /bin/bash -c $todo"
else
eval "export $vars"
[[ "$verbose" ]] && set -x
cat < <(eval "$cmd")
fi </dev/null
)

function killemall () {
trap '' TERM
kill -TERM 0
trap - TERM
exit
}

# echo a double-quoted string of a variable
# useful only for protecting an argument to an eval'd command
# while still performing substitutions into the arg
function fix_var () {
local val="${!1}" q="${2:-\"}"
val="${val//\\/\\\\}"
val="${val//$q/\\$q}"
[[ "$q" = "'" ]] && echo -n '
echo "$q$val$q"
}

# Parallel ChrisDidit SHell
function pcdsh () {
[[ $fanout -gt 0 ]] \
&& until [[ $(jobs | wc -l) -lt $fanout ]]; do
sleep 1
done

# preserve STDERR, but filter it too
run_cmd 2> >(${filter_cmd:-cat}) | ${filter_cmd:-cat} &
}

if ! type "$filter_cmd" &>/dev/null; then
# Output filtering for parallel operations
function filter () {
[[ "$filter_prefix" ]] \
&& grep --with-filename --label="$filter_prefix" -E '^' \
|| cat \

}
export filter_cmd=filter
fi


# We like headers, because it means we can use $varname instead of
# $fieldnum
if [[ "$have_headers" ]]; then
declare -ax headers=( )

# gimme column names
read -d "$sep" line
while [[ ${#line} -gt 0 && "${line##*"$delim"}" != "$line" ]]; do
field="${line%%"$delim"*}"
# conform....
line="${line#"$field$delim"}"
field="$(sed 's/[^_[:alnum:]]/_/g' <<<"$field")"
headers[${#headers[*]}]="$field"
done
headers[${#headers[*]}]="$line"

readonly -a headers
fi

trap wait EXIT
trap killemall INT

# Now process input records
while read -d "$sep" line; do
# Nasty, but... Green Peace.
# We need to protect the environment.
declare -i c=-1
declare -ax fields=( )
export real_hostlan= field= vars= hostlan=

# make our vars
while [[ ${#line} -gt 0 && "${line##*"$delim"}" != "$line" ]]; do
field="${line%%"$delim"*}"
line="${line#"$field$delim"}"
fields[${#fields[*]}]="$field"

[[ $((++c)) = "$hostfield" ]] \
&& real_hostlan="$field"

if [[ "$have_headers" ]]; then
[[ "${headers[$c]}" = hostlan ]] && hostlan="$field"
vars="$vars ${headers[$c]}=$(fix_var field)"
fi

done

if [[ "$line" ]]; then
fields[${#fields[*]}]="$line"
[[ "$have_headers" ]] \
&& vars="$vars ${headers[$c]}=$(fix_var line)"
fi

# special treatment of hostlan
[[ "$hostfield" && "$real_hostlan" ]] \
&& vars="$vars hostlan=$(fix_var real_hostlan)" \
&& hostlan="$real_hostlan"


vars="$vars filter_prefix=$(fix_var filter_prefix)"

# and run the cmd
if [[ "$in_parallel" ]]
then
eval $vars pcdsh
else
[[ "$preamble" ]] \
&& eval "$vars echo $(fix_var preamble)"

eval $vars run_cmd

[[ "$postamble" ]] \
&& eval "$vars echo $(fix_var postamble)"
fi
done

# wait for them to finish...
wait

exit 0

cat <<EOF

=head1 NAME

B<idoit> - Run commands based on input records

=head1 SYNOPSIS

B<idoit>
S<B<[-lnpHP]>>
S<B<[>I<L<input options|/Input Options>>B<]>>
S<B<[>I<L<parallel option|/Parallel Options>>B<]>>
S<B<[>I<L<cmd options|/Cmd Options>>B<]>>
S<I<cmd ...>>

=head1 DESCRIPTION

B<idoit> accepts delimited records on F<STDIN>, parses the input into fields, and runs commands for each record either locally or remotely, providing the field values to the commands' environment.
For example, one could pipe in the output of the mysql(1) or C<sqlplus> commands, redirect input from a simple csv (comma-separated values) file, I<et cetera>.

Currently no provision is made for escaping field delimiters within fields.

The commands are executed within a bash(1) subshell, so normal shell quoting, escaping, variable naming, and substitution rules apply.
Field names will have all non-word (alpha-numeric, underscore) characters transliterated into underscores.

=head1 ENVIRONMENT AND INTERPOLATION

=over 4

=item Positional parameters

The positional parameters of the subshell (local or remote) are set to input field values.
This occurs immediately prior to execution, i.e. on the remote host when executing remotely.

=item File descriptors

F<STDIN> is closed (redirected from F</dev/null>).
Other file descriptors are not touched.

=item Field names

When field names are available, these fields become environment variables for the commands to be run.
Like the positional parameters, these variables are defined immediately prior to execution, i.e. on the remote host when executing remotely.
They are also defined locally, prior to executing I<rcmd>.

The special field C<hostlan> is defined as the value of the field indicated by I<hostfield>, even if a field by that name is given, but only if that field has a non-null value.
If the field indicated by I<hostfield> has a null value, then the field C<hostlan> is not overridden.

=item $preamble

=item $postamble

When executing serially, these local variables specify the preamble and postamble to output respectively prior to and after each execution of the requested commands.

=item $filter_prefix

When executing in parallel and using the default output filter, this local variable specifies the string with which to prefix the output.

=item $headers

A reserved, read-only local array variable containing the field names.
Fields with this name will not be defined locally, but will be defined remotely.

=back

=head1 OPTIONS

=over 4

=item B<-h>

Display a usage and brief help message.

=back

=head2 Input Options

Records accepted on F<STDIN> should be clearly separated by a single character (I<sep>), and fields in each record should be clearly delimited by a constant string (I<delim>).
Normally, the first record is taken as list field names, which will have all non-word characters translated into underscores.

=over 4

=item B<-d> I<delim>

Specifies the field delimiter. The default is C<\t> (tab).

=item B<-k> I<hostfield>

Indicates by number which field to use as the hostname.

=item B<-s> I<sep>

Specifies the record separator. The default is C<\n> (newline).

=item B<-H>

Indicates that the first input record does I<not>, in fact, give field names, but is just another input record.

=back

=head2 Cmd Options

Normally, the command is taken as the concatenation of all arguments remaining after options processing.
Command options then affect how the requested command is executed.

Naturally, commands may only be run remotely when a hostname is available.
Hostnames are taken from the input fields named either C<hostlan> (preferred) or C<host>, or from the field indicated by I<hostfield>.
It is an error to attempt remote commands without a destination hostname.

=over 4

=item B<-c> I<cmd2>

Run I<cmd2> prior to I<cmd>.

=item B<-e> I<rcmd>

Implies B<-r>.
Provides the command list as the last argument (singular) to I<rcmd>.
For instance:

idoit -u web -e rsh uptime

Causes the following command to be run:

rsh -uweb $host "bash -c 'uptime'"

The default when running remotely is C<ssh -T>.
The default when running locally is nothing, unless a I<user> is specified, in which case the default is C<sudo -u>.

=item B<-f> I<filter>

Runs requested commands' output through the command I<filter>'s F<STDIN> prior to sending to our F<STDOUT>.
The default is an internal function called B<filter>, which prepends '${hostlan:-host}:' to each line.

Set to C<cat> to disable output filtering.

=item B<-l>

Run commands locally.
This changes the default I<rcmd> to nothing unless a I<user> is specified, in which case the default I<rcmd> becomes C<sudo>.

=back

=head1 REQUIRES

bash(1) >= 3
grep(1) with extended regular expression support.

=head1 AUTHOR

Christopher Hahn <cjhahn, gmail>

=cut


EOF
Comments
Search RSS
chahn   |Administrator |2008-05-10 04:53:59
updated for better signal handling, order-of-magnitude startup performance boost
to local commands, prettier presentation on theDynasty.
Only registered users can write comments!

3.22 Copyright (C) 2007 Alain Georgette / Copyright (C) 2006 Frantisek Hliva. All rights reserved."

Last Updated ( Sunday, 25 May 2008 00:11 )