#!/bin/sh -x

# the '#!" sequence is often called a "she-bang" sequence.

# comments start with a '#' sign

# first line in a shell script of any language, starts with "#!" followed by
# the pathname to the shell processor program (/bin/sh, perl, python, etc.)

# Unix OS doesn't care about content of files: they are treated like generic
# blobs of bytes.  No format or interpretation is imposed.   Some exceptions
# include executable formats: read/write files doesn't matter, but executing
# does matter: the OS has to setup a process, various segments and page
# protections (e.g., TEXT, HEAP, STACK, etc.).  OS has to verify that the
# binary is of the "right format".  There are many binary formats produced
# by compiler: a.out (old one), ELF, COFF, etc.  Different formats describe
# where is the code, statics, shared or static libraries to link with
# executable, and more.  There's OS code to know how to setup a process for
# each recognized binary format.

#$ file /bin/ls
#/bin/ls: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/l, for GNU/Linux 3.2.0, BuildID[sha1]=9567f9a28e66f4d7ec4baf31cfbf68d0410f0ae6, stripped

# shell scripts are NOT binaries.  So how do they run?!
# when you execute a script like "./mytest.sh", and if "mytest.sh" is
# chmod'ed u+x, then
# 1. OS reads first two bytes of the file.
# 2. first 2 bytes are known as the unix "magic type" of the file.  See
#     magic(5), and sometimes the list of all known file types is in
#     /etc/magic.
# 3. if first two bytes are "#!", then read rest of line until \n (newline)
# 4. create a new process and execute in it the binary listed after "#!"
#    including any arguments if listed (delimited by spaces)
# 5. OS passes the name of the file you executed, "mytest.sh" as an argument
#    to the process the OS sets up.
# 6. Meaning: if your first line of mytest.sh is "#!/bin/sh -x", then the OS
#    will start a new process like this:
#	/bin/sh -x mytest.sh
# 7. essentially no difference as if you ran the shell script yourself as
#    follows:
#	 $ /bin/sh -x mytest.sh

# how to print stuff on screen (stdout by default)
echo hello world

# variables
i=1
MAX=99
Today=Monday

# refer to variables with '$' or ${var}
# REMINDER: In Makefiles, $FOO is not allowed, must use ${FOO} or $(FOO)
echo Today is $Today
echo max weight is ${MAX}lb
# any undefined variable has a "" value

# tests, use a unix program called test(1)
# if-then-fi, or if-then-else-fi
# condition to be tested follows the "if"
# the condition is considered TRUE if the program exits with a 0
# REMINDER: in C programs, a "condition" is true when it is non-zero
if test -f /usr/include/time.h
then
    echo I have /usr/include/time.h
else
    echo NO /usr/include/time.h found
fi
# the test(1) program is universal and versatile, used a lot in shell scripts.

# how to write stuff to files
# use "shell redirection" from stdout and stderr, using ">"
echo hi there # prints to stdout
echo hello world > file.txt # create or truncate file.txt, send all stdout
# to it from program to left of ">" symbol
echo hello world 1> file.txt # same b/c '1' is FD of stdout

# append to an existing file (or create a new file)
echo hello world >> file.txt

# header file tests with redirection
if test -f /usr/include/time.h
then
    echo /usr/include/time.h: found
    echo "#define HAVE_TIME_H 1" > config.h
else
    echo /usr/include/time.h: not found found
fi

if test -f /usr/include/sys/time.h
then
    echo /usr/include/sys/time.h: found
    echo "#define HAVE_SYS_TIME_H 1" >> config.h
else
    echo /usr/include/sys/time.h: not found found
fi

# redirection
cmd > file # redirects cmd's stdout to "file" (file is created/truncated)
cmd 2> file # redirects cmd's stderr to "file" (file is created/truncated)
            # '2' is FD of stderr
cmd 1> file # same as "cmd > file"
cmd > f1 2> f2 # redirect stdout to f1, and stderr to f2
cmd > f1 2>&1 # redirect stdout to f1, and "bind" stderr to stdout (both go to f1)
cmd >> file # append cmd's stdout to file (create file if doesn't exist)
cmd > /dev/null # redirect stdout to /dev/null (a "bit bucket" device)
cmd < infile # redirect stdin
cmd < infile > outfile # redirect stdin from infile, and output to outfile

# internal variables
# $1, $2, $3, ...: same as argv[1], argv[2], ... in a C program
# $0: same as argv[0] (name of actual executable/program)
# $#: same as argc (number of args)
# $*: a space delimited list of all $1, $2, $3, args..
# $?: exit status of last program (a number)

# a "for" loop
LIST="one world     17 xyz"
# note, for separating args in a list, number of spaces don't matter
for i in $LIST
do
    echo $i
done

# print all args passed to the script
for j in $*
do
    echo $j
done

# a test to check if we "gcc" works
# 1. not enough, only tests if gcc exists
if test -f /usr/bin/gcc
then
     echo I have a working C compiler, maybe
fi

echo "main(){}" > test.c
gcc test.c 2> /dev/null # don't care for actual stdout/err output, so redirect it
# see test(1): allows you to test existence of files/dir/symlinks, compare
# timestamps of files, compare variables (if empty, greater than, etc.)
# NOTE: in shell scripts, and in test(1) a comparison is a SINGLE '='.
if test $? = 0
then
    echo GCC compiled ok
else
    echo GCC did not compile
fi
rm -f a.out test.c # use -f to "force" deletion, and not complain even if file doesn't exist

# a shortcut
if gcc test.c 2> /dev/null
then
    echo GCC compiled ok
else
    echo GCC did not compile
fi
# test which python versions are installed and working
# create a dummy python2 (p2) and python3 programs (p3)
# NOTE, ';' separates statements in shell scripts (otherwise \n ends stmt)
if python2 p2 ; then
    echo python2 exists and working
fi
if python3 p3 ; then
    echo python3 exists and working
fi

# test WHICH version of python I have?
if python p2 ; then
    echo python2 exists and working
fi
if python p3 ; then
    echo python3 exists and working
fi

# find out if there's a program available in default PATH
PATH=/bin:/usr/bin:/usr/local/bin:/sbin:/usr/sbin:$PATH
# use program which(1) that searches your path and gives your the full path
# to a program.  e.g., "which ls" or "which gcc" or "which python2.7"
for p in python python2 python3 python2.7 python3.6 ; do
    tmp=`which $p` # the `cmd` syntax says "run cmd and put its output in variable
    if test -z "$tmp" ; then # -z says is empty
	echo NOT FOUND: $p
    else
	echo FOUND $p in $tmp
    fi
done

# ; separates "statements" in a shell program
echo "main(){}" > test.c ; gcc test.c 2> /dev/null
if test $? = 0 ; then
    echo GCC compiled ok
else
    echo GCC did not compile
fi

# QUOTING RULES

# quotes are useful to have a single parameter that includes spaces and
# other special chars.  By default, spaces delimit arguments or elements of
# a string.

# "double quotes": include string in quotes as is, but expand $variables
# 'double quotes': include string in quotes verbatim
# note above is a single forward quote mark (or "forward tick mark")
MAX=99
echo "max is $MAX" # print max is 99
echo 'max is $MAX' # print max is $MAX
# quotes also tell a shell script to consider the quoted material as a
# single argument or item in a list.
for i in hello     world "I am here" ; do
    echo $i
done
# NOTE: you can quote 'single quotes' with "double quotes",
# and you can quote "double quotes" with 'single quotes'

# backtick quote: single quote bending left: execute command and replace
# it's stdout output in place, useful for variables to capture output of
# commands.
tmp=`which ls`
# capturing output (stdout) of programs, using `back tick`
files=`/bin/ls *.c`

# can always escape using backslash in front of special chars
echo "I have \$10" # prints I have $10
echo "this is one backslash: \\" # prints this is one backslash: \

# note: on some terminals/graphical systems, it's hard to tell  the
# difference b/t:
# ' and `
# O and 0
# l and 1

# environment variables (like "char **envp" in C programs)
echo $PATH
# some variables are exported from the parent shell to the shell script you
# are running.  If you want variable FOO from your env (parent or login
# shell) to be passed on to a shell script you are running, then type this
# in the PARENT (or login) shell:
export FOO
env # print all exported variables to this shell script

######################################################################
# useful tools

# recall test(1) program above, useful for testing various conditions

# 1. GREP: searching for strings and regular expressions in files
# syntax: grep [opts] RE file1 file2 ...
# see man page for grep(1)
# grep will search the input files one by one, one line at a time, and try
# to match against the RE (regular expression).  if a match is found, print
# the matching line and continue.

# Regular Expressions (REs, useful for many programs, languages, and systems
#		       that support regexps)
# string: look for a "string" verbatim (any alphanumeric word)
# .: match any one character
# *: match 0 or more times of the last matched 'thing'
# .*: match any one char 0 or more times (i.e., match any string of any length)
# ?: match last thing 0 or 1 times exactly
# Note there's extra syntax such as {}n to match the last thing n times
# [list]: match any one of the chars in "list"
#	ranges example: [a-z] or [A-Za-z0-9]
# [^list]: match anything OTHER than any one of the chars in "list"
# ^: match against start of line (if ^ shows up first)
# $: match against end of line (if $ shows up first)
#	example, if RE is "^$", match against an empty line
# can escape any special char with \, including \\

# Useful options to grep
# -i: perform a case insensitive match (ignore case)
# -q: exit with 0 (match found) or non zero, without output to stdout
# -H: list the file name being matched in addition to the matched line
#	(useful when grep-ing against multiple files)
# -n: list the matching line number too
# -r: match recursively
# -v: reverse the RE match, show only lines that do NOT match the RE

# example; does <time.h> have a "struct timer"?
if grep -q "struct timer" /usr/include/time.h ; then
    echo "found struct timer in time.h"
fi

######################################################################
# 2. TR -- translate characters
# performs a translation of one character to another in an input stream
# tr(1) is a "UNIX filter" program: takes default input from stdin, and
# prints default output to stdout.  Useful when you want to combine
# input/output of several programs together.

# Note: tr works on an input stream, not one line at a time
# Usage: tr [flags] INPUT-PATTERN [OUTPUT-PATTERN]

# example: tr 'a' 'A'
# or even just: tr a A
# will convert all lowercase 'a' to uppercase 'A' in the input stream

# tr '[a-z]' '[A-Z]': capitalize every letter

# tr '[a-z]' '[n-za-m]': the "caesar cipher" or ROT13

# tr '[ -]' '__': convert all spaces and '-' to underscores

# tr -d: delete any matching chars
# example: tr -d '[0-9]'
# means: delete all digits from the input

# tr -c: complement the match, replace (or delete) anything that does NOT
# match the input pattern.  Example to delete all non-digits
# tr -dc '[0-9]'

######################################################################
# UNIX FILTERS
# cat(1): concatenate one or more input files
cat foo.txt bar.txt # concatenate the two files and o/p on stdout
cat foo.txt bar.txt > both.txt

# sort(1): sort input one line at a time
# can sort numerically, alphabetically, case in/sensitive, or even by "keys"
# (sort using one or more keys in input, based in position in line)
sort names.txt # read names.txt, sort, and o/p on stdout
sort < names.txt # read names.txt from stdin, feed into sort program, o/p to stdout
# in above example, the SHELL feeds the file names.txt into sort's stdin, so
# sort does NOT know the file name it is reading.  The mechanisms for
# redirection are available in a UNIX OS, using dup(2), dup2(2), and
# pipe(2).
sort < names.txt > sorted-names.txt

# uniq(1): print unique lines in input
# will read stdin, o/p only "unique" lines, avoiding duplicates.
# uniq expects sorted input
# uniq -u: show only the lines there were unique
# uniq -c: also display number of times a duplicate line showed up
# uniq -n: will include a count of how many instances of the same line exist

# wc(1): word-count
# wc -w: count "words" (whitespace delimited)
# wc -l: count number of lines
# wc -c: count number of characters

# cmp(1): compare two files' contents
# You can also generate hash signatures of files using md5/md5sum,
# sha1/sha1sum, etc. and compare them, but hash programs consume lots of CPU
# cycles.

# diff(1): compare 2 input files
# diff -u: generates a "unified diff" useful as a patch to use the patch(1)
# program.  Often used to generate and submit source code patches.

# example: take 2 input files, concatenate them, ignore '#' comments, sort,
# then produce only unique lines, and then count how many lines we have
# (a): the long way
cat a.txt b.txt > c.txt
grep -v "^#" c.txt > c2.txt
# note: if I used > c.txt, the input file will be truncated before it gets
# read! so has to use a temp file
# E.G., BAD, will destroy your input: grep -v "^#" c.txt > c.txt
sort < c2.txt > c3.txt # again, sort and put o/p into another temp file
# better, reuse c.txt, instead of a 3rd o/p file
uniq < c3.txt > c.txt
wc -l c.txt
# done, do some cleanup
rm -f c.txt c2.txt c3.txt

# better: use pipes to pass stdout of one program into the next program's
# stdin (this is why it's useful to use UNIX "filters")
numlines=`cat a.txt b.txt | grep -v "^#" | sort | uniq | wc -l`
echo Number of lines is $numlines

######################################################################
# SED(1): serial editor
# read input lines, match against an input RE, and change any matches to
# something else.
# Syntax: sed "s/MATCH/REPLACE/flags" [infile]
# sed is also a uniq filter
# "s/MATCH/REPLACE/flags" means:
# s: match and substitute (a single letter cmd to control how sed works)
# /: a single char delimiter to separate the MATCH and REPLACE parts
# MATCH: a RE to match each line against
# REPLACE: a string or RE to replace any matching input string
#	REPLACE can be empty, eg. "s/MATCH//g" and it'll effectively delete
#	any MATCH'd REs in the input line.
# flags: modifiers to control overall behavior of sed
#	examples:
#	g: replace all matches on a single line, not just first one (global)
#	i: case insensitive match