#!/bin/sh -x # the '#!" sequence is often called a "she-bang" sequence. # comments start with a '#' sign # first line in a shell script of any language, starts with "#!" followed by # the pathname to the shell processor program (/bin/sh, perl, python, etc.) # Unix OS doesn't care about content of files: they are treated like generic # blobs of bytes. No format or interpretation is imposed. Some exceptions # include executable formats: read/write files doesn't matter, but executing # does matter: the OS has to setup a process, various segments and page # protections (e.g., TEXT, HEAP, STACK, etc.). OS has to verify that the # binary is of the "right format". There are many binary formats produced # by compiler: a.out (old one), ELF, COFF, etc. Different formats describe # where is the code, statics, shared or static libraries to link with # executable, and more. There's OS code to know how to setup a process for # each recognized binary format. #$ file /bin/ls #/bin/ls: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/l, for GNU/Linux 3.2.0, BuildID[sha1]=9567f9a28e66f4d7ec4baf31cfbf68d0410f0ae6, stripped # shell scripts are NOT binaries. So how do they run?! # when you execute a script like "./mytest.sh", and if "mytest.sh" is # chmod'ed u+x, then # 1. OS reads first two bytes of the file. # 2. first 2 bytes are known as the unix "magic type" of the file. See # magic(5), and sometimes the list of all known file types is in # /etc/magic. # 3. if first two bytes are "#!", then read rest of line until \n (newline) # 4. create a new process and execute in it the binary listed after "#!" # including any arguments if listed (delimited by spaces) # 5. OS passes the name of the file you executed, "mytest.sh" as an argument # to the process the OS sets up. # 6. Meaning: if your first line of mytest.sh is "#!/bin/sh -x", then the OS # will start a new process like this: # /bin/sh -x mytest.sh # 7. essentially no difference as if you ran the shell script yourself as # follows: # $ /bin/sh -x mytest.sh # how to print stuff on screen (stdout by default) echo hello world # variables i=1 MAX=99 Today=Monday # refer to variables with '$' or ${var} # REMINDER: In Makefiles, $FOO is not allowed, must use ${FOO} or $(FOO) echo Today is $Today echo max weight is ${MAX}lb # any undefined variable has a "" value # tests, use a unix program called test(1) # if-then-fi, or if-then-else-fi # condition to be tested follows the "if" # the condition is considered TRUE if the program exits with a 0 # REMINDER: in C programs, a "condition" is true when it is non-zero if test -f /usr/include/time.h then echo I have /usr/include/time.h else echo NO /usr/include/time.h found fi # the test(1) program is universal and versatile, used a lot in shell scripts. # how to write stuff to files # use "shell redirection" from stdout and stderr, using ">" echo hi there # prints to stdout echo hello world > file.txt # create or truncate file.txt, send all stdout # to it from program to left of ">" symbol echo hello world 1> file.txt # same b/c '1' is FD of stdout # append to an existing file (or create a new file) echo hello world >> file.txt # header file tests with redirection if test -f /usr/include/time.h then echo /usr/include/time.h: found echo "#define HAVE_TIME_H 1" > config.h else echo /usr/include/time.h: not found found fi if test -f /usr/include/sys/time.h then echo /usr/include/sys/time.h: found echo "#define HAVE_SYS_TIME_H 1" >> config.h else echo /usr/include/sys/time.h: not found found fi # redirection cmd > file # redirects cmd's stdout to "file" (file is created/truncated) cmd 2> file # redirects cmd's stderr to "file" (file is created/truncated) # '2' is FD of stderr cmd 1> file # same as "cmd > file" cmd > f1 2> f2 # redirect stdout to f1, and stderr to f2 cmd > f1 2>&1 # redirect stdout to f1, and "bind" stderr to stdout (both go to f1) cmd >> file # append cmd's stdout to file (create file if doesn't exist) cmd > /dev/null # redirect stdout to /dev/null (a "bit bucket" device) cmd < infile # redirect stdin cmd < infile > outfile # redirect stdin from infile, and output to outfile # internal variables # $1, $2, $3, ...: same as argv[1], argv[2], ... in a C program # $0: same as argv[0] (name of actual executable/program) # $#: same as argc (number of args) # $*: a space delimited list of all $1, $2, $3, args.. # $?: exit status of last program (a number) # a "for" loop LIST="one world 17 xyz" # note, for separating args in a list, number of spaces don't matter for i in $LIST do echo $i done # print all args passed to the script for j in $* do echo $j done # a test to check if we "gcc" works # 1. not enough, only tests if gcc exists if test -f /usr/bin/gcc then echo I have a working C compiler, maybe fi echo "main(){}" > test.c gcc test.c 2> /dev/null # don't care for actual stdout/err output, so redirect it # see test(1): allows you to test existence of files/dir/symlinks, compare # timestamps of files, compare variables (if empty, greater than, etc.) # NOTE: in shell scripts, and in test(1) a comparison is a SINGLE '='. if test $? = 0 then echo GCC compiled ok else echo GCC did not compile fi rm -f a.out test.c # use -f to "force" deletion, and not complain even if file doesn't exist # a shortcut if gcc test.c 2> /dev/null then echo GCC compiled ok else echo GCC did not compile fi # test which python versions are installed and working # create a dummy python2 (p2) and python3 programs (p3) # NOTE, ';' separates statements in shell scripts (otherwise \n ends stmt) if python2 p2 ; then echo python2 exists and working fi if python3 p3 ; then echo python3 exists and working fi # test WHICH version of python I have? if python p2 ; then echo python2 exists and working fi if python p3 ; then echo python3 exists and working fi # find out if there's a program available in default PATH PATH=/bin:/usr/bin:/usr/local/bin:/sbin:/usr/sbin:$PATH # use program which(1) that searches your path and gives your the full path # to a program. e.g., "which ls" or "which gcc" or "which python2.7" for p in python python2 python3 python2.7 python3.6 ; do tmp=`which $p` # the `cmd` syntax says "run cmd and put its output in variable if test -z "$tmp" ; then # -z says is empty echo NOT FOUND: $p else echo FOUND $p in $tmp fi done # ; separates "statements" in a shell program echo "main(){}" > test.c ; gcc test.c 2> /dev/null if test $? = 0 ; then echo GCC compiled ok else echo GCC did not compile fi # QUOTING RULES # quotes are useful to have a single parameter that includes spaces and # other special chars. By default, spaces delimit arguments or elements of # a string. # "double quotes": include string in quotes as is, but expand $variables # 'double quotes': include string in quotes verbatim # note above is a single forward quote mark (or "forward tick mark") MAX=99 echo "max is $MAX" # print max is 99 echo 'max is $MAX' # print max is $MAX # quotes also tell a shell script to consider the quoted material as a # single argument or item in a list. for i in hello world "I am here" ; do echo $i done # NOTE: you can quote 'single quotes' with "double quotes", # and you can quote "double quotes" with 'single quotes' # backtick quote: single quote bending left: execute command and replace # it's stdout output in place, useful for variables to capture output of # commands. tmp=`which ls` # capturing output (stdout) of programs, using `back tick` files=`/bin/ls *.c` # can always escape using backslash in front of special chars echo "I have \$10" # prints I have $10 echo "this is one backslash: \\" # prints this is one backslash: \ # note: on some terminals/graphical systems, it's hard to tell the # difference b/t: # ' and ` # O and 0 # l and 1 # environment variables (like "char **envp" in C programs) echo $PATH # some variables are exported from the parent shell to the shell script you # are running. If you want variable FOO from your env (parent or login # shell) to be passed on to a shell script you are running, then type this # in the PARENT (or login) shell: export FOO env # print all exported variables to this shell script ###################################################################### # useful tools # recall test(1) program above, useful for testing various conditions # 1. GREP: searching for strings and regular expressions in files # syntax: grep [opts] RE file1 file2 ... # see man page for grep(1) # grep will search the input files one by one, one line at a time, and try # to match against the RE (regular expression). if a match is found, print # the matching line and continue. # Regular Expressions (REs, useful for many programs, languages, and systems # that support regexps) # string: look for a "string" verbatim (any alphanumeric word) # .: match any one character # *: match 0 or more times of the last matched 'thing' # .*: match any one char 0 or more times (i.e., match any string of any length) # ?: match last thing 0 or 1 times exactly # Note there's extra syntax such as {}n to match the last thing n times # [list]: match any one of the chars in "list" # ranges example: [a-z] or [A-Za-z0-9] # [^list]: match anything OTHER than any one of the chars in "list" # ^: match against start of line (if ^ shows up first) # $: match against end of line (if $ shows up first) # example, if RE is "^$", match against an empty line # can escape any special char with \, including \\ # Useful options to grep # -i: perform a case insensitive match (ignore case) # -q: exit with 0 (match found) or non zero, without output to stdout # -H: list the file name being matched in addition to the matched line # (useful when grep-ing against multiple files) # -n: list the matching line number too # -r: match recursively # -v: reverse the RE match, show only lines that do NOT match the RE # example; does have a "struct timer"? if grep -q "struct timer" /usr/include/time.h ; then echo "found struct timer in time.h" fi ###################################################################### # 2. TR -- translate characters # performs a translation of one character to another in an input stream # tr(1) is a "UNIX filter" program: takes default input from stdin, and # prints default output to stdout. Useful when you want to combine # input/output of several programs together. # Note: tr works on an input stream, not one line at a time # Usage: tr [flags] INPUT-PATTERN [OUTPUT-PATTERN] # example: tr 'a' 'A' # or even just: tr a A # will convert all lowercase 'a' to uppercase 'A' in the input stream # tr '[a-z]' '[A-Z]': capitalize every letter # tr '[a-z]' '[n-za-m]': the "caesar cipher" or ROT13 # tr '[ -]' '__': convert all spaces and '-' to underscores # tr -d: delete any matching chars # example: tr -d '[0-9]' # means: delete all digits from the input # tr -c: complement the match, replace (or delete) anything that does NOT # match the input pattern. Example to delete all non-digits # tr -dc '[0-9]' ###################################################################### # UNIX FILTERS # cat(1): concatenate one or more input files cat foo.txt bar.txt # concatenate the two files and o/p on stdout cat foo.txt bar.txt > both.txt # sort(1): sort input one line at a time # can sort numerically, alphabetically, case in/sensitive, or even by "keys" # (sort using one or more keys in input, based in position in line) sort names.txt # read names.txt, sort, and o/p on stdout sort < names.txt # read names.txt from stdin, feed into sort program, o/p to stdout # in above example, the SHELL feeds the file names.txt into sort's stdin, so # sort does NOT know the file name it is reading. The mechanisms for # redirection are available in a UNIX OS, using dup(2), dup2(2), and # pipe(2). sort < names.txt > sorted-names.txt # uniq(1): print unique lines in input # will read stdin, o/p only "unique" lines, avoiding duplicates. # uniq expects sorted input # uniq -u: show only the lines there were unique # uniq -c: also display number of times a duplicate line showed up # uniq -n: will include a count of how many instances of the same line exist # wc(1): word-count # wc -w: count "words" (whitespace delimited) # wc -l: count number of lines # wc -c: count number of characters # cmp(1): compare two files' contents # You can also generate hash signatures of files using md5/md5sum, # sha1/sha1sum, etc. and compare them, but hash programs consume lots of CPU # cycles. # diff(1): compare 2 input files # diff -u: generates a "unified diff" useful as a patch to use the patch(1) # program. Often used to generate and submit source code patches. # example: take 2 input files, concatenate them, ignore '#' comments, sort, # then produce only unique lines, and then count how many lines we have # (a): the long way cat a.txt b.txt > c.txt grep -v "^#" c.txt > c2.txt # note: if I used > c.txt, the input file will be truncated before it gets # read! so has to use a temp file # E.G., BAD, will destroy your input: grep -v "^#" c.txt > c.txt sort < c2.txt > c3.txt # again, sort and put o/p into another temp file # better, reuse c.txt, instead of a 3rd o/p file uniq < c3.txt > c.txt wc -l c.txt # done, do some cleanup rm -f c.txt c2.txt c3.txt # better: use pipes to pass stdout of one program into the next program's # stdin (this is why it's useful to use UNIX "filters") numlines=`cat a.txt b.txt | grep -v "^#" | sort | uniq | wc -l` echo Number of lines is $numlines ###################################################################### # SED(1): serial editor # read input lines, match against an input RE, and change any matches to # something else. # Syntax: sed "s/MATCH/REPLACE/flags" [infile] # sed is also a uniq filter # "s/MATCH/REPLACE/flags" means: # s: match and substitute (a single letter cmd to control how sed works) # /: a single char delimiter to separate the MATCH and REPLACE parts # MATCH: a RE to match each line against # REPLACE: a string or RE to replace any matching input string # REPLACE can be empty, eg. "s/MATCH//g" and it'll effectively delete # any MATCH'd REs in the input line. # flags: modifiers to control overall behavior of sed # examples: # g: replace all matches on a single line, not just first one (global) # i: case insensitive match