Skip to content

Instantly share code, notes, and snippets.

@mh0w
Last active June 26, 2024 09:47
Show Gist options
  • Save mh0w/8fae0ba3e7558037861cfd8b7b4a96c9 to your computer and use it in GitHub Desktop.
Save mh0w/8fae0ba3e7558037861cfd8b7b4a96c9 to your computer and use it in GitHub Desktop.
Basic unix
# Resources:
# https://mally.stanford.edu/~sr/computing/basic-unix.html
# https://mally.stanford.edu/~sr/computing/more-unix.html
###############
# Console use #
###############
# Get help manual on a specific command
man commandname # nice formatting, pager
commandname --help # printed directly to console, no pager
# History - Re-run commands
history # numbered list of recent commands
history | tail -n 10
!12
!head # re-run most recent head command
sudo !! # re-run previous cmd with sudo added before (e.g. touch file.txt -> sudo touch file.txt)
!! -l # re-run previous cmd with -l added after (e.g. ls -> ls -l)
# Writing to file > - Redirect console output to a file with >
head -n 5 file.csv > top_5_lines.txt
# Piping | - Redirect output to the next command with |
head -n 5 file.csv | tail -n 2 # lines 4 and 5
head -n 100 file.csv | sort
cut -d , -f 1 seasonal/spring.csv | grep -v Date | head -n 10 # col 1, remove line containing 'Date', top 10 lines
# Running commands on multiple input files
head -n 5 file1.csv file2.csv file3.csv
head -n 5 *.csv # all files ending .csv
head -n 5 seasonal_data/* # all files in seasonal_data directory
head -n 5 file?.csv # file then any one char then .csv
head -n 5 file[123].csv # file1.csv, file2.csv, file3.csv
# Loops
for filetype in gif jpg png; do echo $filetype; done
for filename in seasonal/*.csv; do echo $filename; done
for file in seasonal/*.csv; do head -n 2 $file | tail -n 1; done
for f in seasonal/*.csv; do echo $f; head -n 2 $f | tail -n 1; done
for filename in seasonal_data/*
do
head -n 2 $filename | tail -n 1
tail -n 1 $filename
done
#########
# Files #
#########
# (Concatenate and) send output to console or file (uncompressed)
cat file1.txt file2.txt
cat file1.txt file2.txt >> file3.txt
# Display file content (even if compressed)
text
# Display top n lines
head file.txt -n 5
# Display tail n lines
tail file.txt -n 5
# Change file permissions
chgrp
chmod
chown
# List files/directories...
# -R recursive, -r reverse chronological, -l long (detailed), -a all (incl hidden), -h human readable file sizes
ls
ls -R
# View file content (space for more, q to exit pager view)
more
cd
# nano: ctr-o save, ctrl-x exit, ctrl-k delete line, ctrl-u undelete line
nano filename.txt # Edit filename.txt, create it if does not exist
# Move and/or rename one file file (cut)
mv filename1 filename2 # Same directory, new name
mv filename1 ../filename1 # Up one level, same name
# Move multiple files to a directory
mv filename1 filename2 directory_name
# Rename a directory
mv directory_name new_directory_name
# Copy one file (copy-paste)
cp filename1 filename2
# Copy multiple files to a directory
cp filename1 filename2 directory_name
# Remove... -i invokes confirmation dialogue
rm -i filename
# Diff
diff filename1 filename2
# Stats: lines (-l), words (-w), chars (-c)
wc filename
wc -c filename
# chmod... e.g. o+r for readable by all, o-r for unreadable by others
chmod options filename
# compress, decompress, view compressed
gzip filename # zip
gunzip filename # unzip
gunzip -c filename # preview content
# Statistics info for a file (block size, number of blocks, file type, etc)
stat
# Check size of file(s)?
du filename
# Add new line to end of file
echo 'fs.inotify.max_user_watches=500000' >> /etc/sysctl.conf
# Add command output to end of file
ls>> files.txt
# Print file content (cat has no pager, less has a pager)
cat /etc/sysctl.conf
less /etc/sysctl.conf
# View the top of a file
head
head -n 5
# View the tail of a file
tail
tail -n 5
# View select columns from a file (cannot cope with commas in strings)
cut -f 2-5,8 -d , values.csv # select cols 2-5 and 8, using comma as the separator
###############
# Directories #
###############
# Make directory
mkdir dirname
# Remove directory
rmdir
# Change directory (defaults to $HOME with nothing specified)
cd dirname
cd ..
cd
# Print working directory (current location)
pwd
# Check directory size?
du -s
# List directory content - R (recursive), l (long/details), t (chronologically), a (all, incl hidden)
ls
ls -latR
##########
# Search #
##########
# Find file... -p for partial name
# ff filename
# Find string in file (returns entire line containing the string)
# Flags: -c (count), -h (omit file names), -i (ignore case), -l (print file names only)
# -n (print line numbers), -v (show non-matches)
grep string filename
#######################
# About self, system #
######################
# Username
whoami
# Processes
ps -u username
kill PID
# Current date
date
cal
cal 2024
# system info
uname
# Storage use and availability
df
df -h
#############
# Variables #
#############
# Show value of an env variable
echo $ENVIRONMENT_VARIABLE_NAME
echo $TERM
echo $
# Set value of an env variable
setenv ENVIRONMENT_VARIABLE_NAME value
# Unset an env variable
unset ENVIRONMENT_VARIABLE_NAME
#############
# Functions #
#############
function hello() {
echo "Hello, $1!"
}
function checktimes {
echo "Checking recent start and end times, please wait..."
powershell -Command "& { Invoke-Expression (Get-Content -Raw '//NDATA12/hawkem$/My Documents/WindowsPowerShell/checktimes.txt') }"
}
hello world # "Hello, world!" - note that this only acknowledges one argument
hello one two # "Hello, one!" - the 2nd parameter 'two' is not used
checktimes # Runs PS script that checks recent 'Get-EventLog system' start and end times
########
# Misc #
########
# Sort: -n numerically, -r reverse, -b ignore leading blanks, -f case insensitive
head -n 100 file.csv | sort -f
# Adjacent uniqueness (remove adjacent duplicates): -c (count)
head -n 100 file.csv | sort -f | uniq
# Create temporary local variable (NB: no spaces around equals signs)
path_csv_files=path/to/*.csv
# Create a global environment variable
export VAR=value
export MY_TEMP_NEW_ENV_VAR_123=3.14
# Backspace --- delete previous character
# CTRL-d --- delete next character
# CTRL-k --- delete rest of line
# CTRL-a --- go to start of line
# CTRL-e --- go to end of line
# CTRL-b --- go backwards without deleting
# CTRL-f --- go forward without deleting
# TAB --- complete filename or command up to the point of uniqueness
# CTRL-u --- cancel whole line
# CTRL-c --- cancel the processes after it has started
# CTRL-z --- suspend a running process (e.g. in order to do something else in between), you can then put the process in the background with bg
# CTRL-l --- redraws the screen
# | (piping) --- Lets you execute any number of commands in a sequence.
# The second command will be executed once the first is done, and so forth, using the previous command's output as input. You can achieve the same effect by putting the output in a file and giving the filename as an argument to the second command, but that would be much more complicated, and you'd have to remember to remove all the junkfiles afterwards. Some examples that show the usefulness of this:
# ls | more --- will show you one screenful at a time, which is useful with any command that will produce a lot of output, e.g. also ps -aux
# man ls | grep time --- checks whether the man page for ls has something to say about listing files by time - very useful when you have a suspicion some command may be capable of doing what you want, but you aren't sure.
# ls -lR | grep dvi --- will show you all your dvi files - useful to solve disk space problems, since they're large and usually can be deleted.
# HDFS file system accepts around 30 commands, including many Unix commands.
# May or may not need to prefix with "hdfs", depending on environment
# Copy file(s) from one place to another
# -f to force (overwrite if already present)
hdfs put # HDFS
hdfs get # HDFS
hdfs copyFromLocal # HDFS
hdfs copyToLocal # HDFS
# Move files
hdfs moveFromLocal # HDFS
hdfs moveToLocal # HDFS
# "dfs -" to interact with file system
hdfs dfs -ls
hdfs dfs -ls toy_data/toy_dataset_1
hdfs dfs- mkdir folder/path/here
hdfs dfs- rmdir folder/path/here
hdfs dfs -cp /hadoop/file1 /hadoop1
hdfs dfs -cp -f /hadoop/file1 /hadoop1
hdfs dfs -rm -r /hadoop
# View Hadoop(?) environment variables
hdfs envvars
#
hdfs getconf -confKey dfs.blocksize
hdfs fsck –locations –files -blocks
# May or may not need to prefix with "yarn", depending on environment
# Command to configure and view information about the YARN cluster
yarn command
# List running applications
yarn application -list
# Kill a running application
yarn application -kill app-id
# View the logs of the specified application
yarn logs -applicationId app-id
# View the full list of command options
yarn -help
# View Yarn(?) environment variables
yarn envvars
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment