Last active
November 3, 2019 18:25
-
-
Save codepedia/c954085b4be55188cf36 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#in vim | |
echo &encoding | |
# count lines with grep | |
za:~ za$ grep -c ".*" file.txt | |
2 for Hexadecimal, you could use hd | |
echo Hello world | hd | |
00000000 48 65 6c 6c 6f 20 77 6f 72 6c 64 0a |Hello world.| | |
or od | |
echo Hello world | od -t x1 -t c | |
0000000 48 65 6c 6c 6f 20 77 6f 72 6c 64 0a | |
H e l l o w o r l d \n | |
shortly | |
while IFS= read -r -n1 car;do [ "$car" ] && echo -n "$car" || echo ; done | |
try them: | |
while IFS= read -rn1 c;do [ "$c" ]&&echo -n "$c"||echo;done < <(ls -l --color) | |
Explain: | |
while IFS= read -rn1 car # unset InputFieldSeparator so read every chars | |
do [ "$car" ] && # Test if there is ``something''? | |
echo -n "$car" || # then echo them | |
echo # Else, there is an end-of-line, so print one | |
done | |
find files with multiple extensions: | |
# dos to unix | |
sed -i.bak 's/\r$//' | |
zee tmp]$ lynx -dump -listonly some.html| sed -E 's/https//g' | |
for x in `find -maxdepth 1 \( -name "*.xls" -or -name "*.txt" \) ` ; do echo $x ;done | |
za:dev za$ find . -type f -print0 | xargs -0 grep -i hash | |
# find and replace with xars and regex. | |
# snippet form find man page. | |
#However, you may wish to consider the -print0 primary in conjunction with ``xargs -0'' as an effective alternative. | |
find . -name '*.ini' -type f -print0 | xargs -0 sed -i 's|oldstring|newstring|g' | |
# xargs -I {} to capture the value of find | |
find . -iname *something* | xargs -I {} mv {} | |
===== | |
IFS stands for "internal field separator". | |
It is used by the shell to determine how to do word splitting, i. e. how to recognize word boundaries. | |
[za ~]$ bash -c 'set w x y z; IFS=":-;"; echo "$*"' | |
w:x:y: | |
#extract two fields from passwd file | |
cat /etc/passwd | ( \ | |
IFS=: ; while read lognam pw id gp fname home sh; \ | |
do echo $home \"$fname\"; done \ | |
) | |
======= | |
file1.txt | |
dn_id101_400_CT_TC string1 | |
dn_id111_60_TT_AA string2 | |
file2.txt | |
dn_id101_400_XX_XX diffstring1 | |
dn_id400_40_XY_YX diffstring2 | |
dn_id111_60_GG_CC diffstring3 | |
awk -F"\t" '{ split($1, a, /_/); s=a[1]"_"a[2]"_"a[3]} FNR==NR { arr[s]++} FNR<NR && (s in arr)' | |
# print all field except field number 18 | |
awk -F'\t' 'BEGIN {OFS="\t"} {for (n=18; n < NF; ++n) $n = $(n+1); --NF; print}' | |
##### finds 2017-08-01 12:00:00 and replace it with 2017-08-01. keeping the same OFS | |
$ awk -F"\t" '{OFS= "\t"} { gsub( /12:00:00/ , "" , $16 ) ; gsub(/12:00:00/ , "", $17) ; print }' <( cat somefile ) | |
awk -F"\t" '{s=$1; sub(/_[[:upper:]]+_[[:upper:]]+$/, "", s)} FNR==NR { arr[s]++} FNR<NR && (s in arr)' f1 f2 | |
awk '{my_dict[$1] = $1 ; second[$2]= $2 ; second[$2]= $2 } END { for (key in my_dict) { print my_dict[key] } }' somefile | |
# print uniq-values only | |
awk '{ if (!seen[$0]++) print }' f.txt | |
awk '{print FILENAME, NR, FNR, $0}' f1_rejected.txt f2.txt | |
This is what awk was designed for: | |
$ awk -F'|' 'NR==FNR{c[$1$2]++;next};c[$1$2] > 0' file2 file1 | |
abc|123|BNY|apple| | |
cab|234|cyx|orange| | |
Explanation | |
-F'|' : sets the field separator to |. | |
NR==FNR : NR is the current input line number and FNR the current file's line number. The two will be equal only while the 1st file is being read. | |
c[$1$2]++; next : if this is the 1st file, save the 1st two fields in the c array. Then, skip to the next line so that this is only applied on the 1st file. | |
c[$1$2]>0 : the else block will only be executed if this is the second file so we check whether fields 1 and 2 of this file have already been seen (c[$1$2]>0) and if they have been, we print the line. In awk, the default action is to print the line so if c[$1$2]>0 is true, the line will be printed. | |
# alternative solution with cut, paste, join, sort and function definition to replicate awk functionality | |
]$ f() { paste <(cut -d_ -f1-3 ${1}) ${1} | sort; }; join -o2.2,2.3 <(f file1.txt) <(f file2.txt) | |
set #The Set Builtin | |
set -- reports_dir/* sets the positional parameters ($1, ...) to the result of the pathname expansion. | |
file_count=$( set -- $1/*; echo $#) | |
--- remove empty lines with awk | |
awk 'NF' file | |
#Print index and field name: | |
awk -F'\t' ' NR < 2 {for ( i=1 ; i < NF ; i++) { print i , $i } }' | |
============== | |
function error { echo "$*" > /proc/self/fd/2 } | |
die () { | |
echo >&2 "$*" | |
exit 1 | |
} | |
========================== | |
make | |
Ashly | |
"Ashley" = "Ashley"; | |
while IFS= read line || [[ -n $line ]]; do printf "\"%s\"=\"%s\"\n" "$line" "$line"; done < names.txt | |
# reads and seprate based on the IFS <Input field seprator> | |
awk 'BEGIN { FS = "," } ; { print $2 }' | |
[za]$ text="aeererwaer erewrwerwerer" ; printf '%s\n' "$text" | while read line; do printf '%s\n' "[$line]"; done | |
========================== | |
join two two lines from two different files | |
join <(sort file1 | sed 's/ - /-/') <(sort file2 | sed 's/ - /-/') | | |
sed 's/-/ - /' | |
$ du -sh * | |
# the cut commadn ... -d is how the fields are delimited | |
~ za$ echo "hi:my:name:is:one:two:three:four:five" | cut -d':' -f{1..5} | |
hi:my:name:is:one | |
~ za$ echo "hi:my:name:is:one:two:three:four:five" | cut -d':' -f{1,5} | |
hi:one | |
# the first one seprates theh new output with the octal "012" or \n ... if non is specifed the file will be read as | |
# a one line ... coming from the | pipe. | |
dev za$ cat myfile | cut -d " " -f{1..9} | while read line; do printf '%s\012' "[$line]"; done | |
dev za$ cat myfile | cut -d " " -f{1..9} | while read line; do printf '%s\n' "[$line]"; done | |
[$r] | |
[u learn omehing every day if you pay aenion.$r] | |
[You learn omehing every day if you pay aenion.$r] | |
[You learn omehing ev | |
# shel vs subshel | |
using < is better then cat file1.txt | ..Hence , < wont create a subshel since < is not a different process as in cat . | |
# this will keep everything on the sameshell. | |
while read line ; do if [[ $line == A* ]] ; then echo $line ; fi ; done < tmep/file1.txt | |
#sicne cat is a process its self , it will be passed to another process using "|". | |
cat file1.text | while read line ; do if [[ $line == A* ]] ; then echo $line ; fi ; done | |
#=================== | |
Join lines and separate with spaces | |
Read vmargs.txt, which is a text file that could either be DOS-style (\r\n) or UNIX-style (\n) line endings and join the lines with a space separator. Can this be shortened/made more elegant? | |
tr -d '\r' < vmargs.txt | tr '\n' ' ' | |
#=================== | |
#""""""""" trying to answer the question of replacing multiple spaces with a tab | |
za:temp za$ cat file1.txt | awk '{ IFS="\t;" ; line[NR] = $0 } { print "Line num : " NR " contains: " NF " fields " } END {for (i=1; i<=NR; i++) printf line[i] "\n" }' | |
Line num : 1 contains: 4 fields | |
Line num : 2 contains: 4 fields | |
Line num : 3 contains: 5 fields | |
Line num : 4 contains: 8 fields | |
This is a text | |
that is yy xxx | |
not distributed xxx xx xxx | |
evenly in a file xxxxx x xx xx | |
==================================== | |
[za awk-yaky]$ cat testfile.csv | |
Datum;Verbrauch | |
15.05.2016 00:00;0,075 | |
15.05.2016 00:15;0,075 | |
15.05.2016 00:30;0,075 | |
15.05.2016 00:45;0,075 | |
15.05.2016 01:00;0,075 | |
[za awk-yaky]$ awk -F'[ .:;]' 'NR==1{print;next} {printf "%s-%s-%s %s:%s:00;%s\n",$3,$2,$1,$4,$5,$6}' testfile.csv | |
Datum;Verbrauch | |
2016-05-15 00:00:00;0,075 | |
2016-05-15 00:15:00;0,075 | |
2016-05-15 00:30:00;0,075 | |
2016-05-15 00:45:00;0,075 | |
2016-05-15 01:00:00;0,075 | |
-F'[ .:;]' | |
Tells awk to use any of a space, a period, a colon, or a semicolon as the field separator | |
NR==1{print;next} | |
This prints the header line unchanged | |
printf "%s-%s-%s %s:%s:00;%s\n",$3,$2,$1,$4,$5,$6 | |
This reformats the line as you want. | |
will print only if the 2nd column value in a tab seperated file is greater than the 3rd column value. | |
awk -F \t '{ if ($2 > $3) print; }' <filename> | |
# takes field number 45 and change it to zzzz | |
cat file.txt | awk -F'[\t]' '{ arr[$1] = $45 } { for ( key in arr) { print arr[key] = "zzzzzzz" }}' | |
#--------- | |
complete this to compare the two arrays. | |
arr=($(cd dir/ && find . -type d | sort && cd .. ) ) && echo ${arr[*]} && | |
arr1=($(cd dir/ && find . -type d | sort && cd .. )) ; | |
for (( i = 0 ; i < ${#arr[@]} ; i++ )) do echo "got ${arr[$i]}" ; | |
for (( i = 0 ; i < ${#arr1[@]} ; i++ )) do echo "last_week ${arr1[$i]}" do echo "current_week ${arr[$i]}" ; done ; done | |
# fnish to find open file descriptors. | |
[vagrant@localhost ~]$ sudo lsof | while read line ; do if [[ $line =~ ^.*proc/ ]] ; then echo $line ; fi ; done | |
za:dtdir za$ cat import_User_Sample_en.csv | awk '{ FS = "," } ; {a[1]=$1 ; for ( key in a ) ; print a[key] }' | |
# get the total line count of each file and stpres it in an array .... onething fix why it is printing dups for array | |
# also , wc -l was counting empty lines too : ^I^I^I^I^I^I^I^I^I^I^M$ ... work around that | |
for x in `find . -name "soemfile.txt"` ; do val=$(cat $x | wc -l) && a[0]=$val ; for i in "${a[@]}" ; do echo a[$i] ; done ; done | |
# add values in a columns | |
cat soemfile.txt | awk '{sum+=$5 ; print $0} END{print "sum=",sum}' | |
# match this xxx:xxx:xxx | |
^ (?![\s:]*$) [^:]+ : [^:]+ : [^:]+ $ | |
start no empty 1st 2nd 3rd end | |
Don't use $*, it'll fail if any arguments have spaces in them; use "$@" instead. Similarly, put $1 inside the quotes in the echo command | |
#bash find/replace | |
${variable//pattern/string} | |
Example: | |
test="/this/isjust/atestvariable/for/stringoperation" | |
echo ${test//\//:} | |
#via the ${var:=value} expansion: | |
cat "$HOME/.bash_history" >"${tempfile:=$(mktemp)}" | |
# find a file in a dirctory with a regex. why start with .* .. double check. | |
filename=$(find $MY_DIR -regextype sed -regex ".*somename_.*.txt") | |
echo "banana is yellow" | awk '{for (i=1;i<=NF;i++) if($i ~/yellow/) print i}' | |
# this one will take the number of string "select || error and divide it by the number of java occurances..cool isn't it" | |
$ awk '/select|[Ee]rror/{a++}/java/{b++}END{printf "%.2f\n",a/b}' history.txt | |
find . -regextype foo -regex ".*/[a-f0-9\-]\{36\}\.jpg" | |
find: Unknown regular expression type `foo'; valid types are `findutils-default', `awk', `egrep', `ed', `emacs', `gnu-awk', `grep', `posix-awk', | |
`posix-basic', `posix-egrep', `posix-extended', `posix-minimal-basic', `sed'. | |
# fix this | |
max=10 ; counter=0 ; while true ; do counter=$(( counter + 1 )) ; echo " The counter is : $counter" ; if [[ $counter > $max ]]; then echo "reached the end of the this" && exit 0 ; fi ; sleep 1 ; done | |
#started working on this oneliner that parses PS output and find meme usage ...#TODO: find highest. | |
[vagrant@localhost ~]$ curr_max=0; ps -axl --no-headers | tr -s ' ' : | cut -d : -f 3,8 | while read proc ; do pidsize=$( cut -d : -f 2 ) ; curr_max=$pidsize ; printf "Max: %s\npid: %s\n" $curr_max $pidsize ; done | |
max=5 ; counter=0 ; while true ; do counter=$(($counter+1)) ; echo " The counter is : $counter" ; sleep 1 ; if [[ "$counter" -gt "$max" ]]; then echo "reached the end of the this" && break ; fi ; done | |
uname -a | grep -qi 'linux' ; case "$?" in "0") echo "match" ;; "1") echo "no match" ;; *) echo "error" ;; esac | |
You can optionally suppress output from STDERR like so: | |
grep -qi 'root' /etc/shadow &> /dev/null ; case "$?" in "0") echo "match" ;; "1") echo "no match" ;; *) echo "error: $?" ;; esac | |
=========== | |
How to grep for multiple strings in file on different lines (Use the pipe symbol): | |
for file in *;do | |
test $(grep -E 'Dansk|Norsk|Svenska' $file | wc -l) -ge 3 && echo $file | |
done | |
Notes: | |
If you use double quotes "" with your grep, you will have to escape the pipe like this: \| to search for Dansk, Norsk and Svenska. | |
if [[ -e ~/somefile ]] ; then echo -e "someotherstring\nmkdir -p somestring" >> ~/somefile && grep -qi 'VAGRANT*' somefile &> /dev/null ; case "$?" in "0") echo "match" ;; "1") echo "no match" ;; *) echo "error: $?" ;; esac ; else echo "can not find .bash_profile file. Please make sure it exists " ; fi | |
=================== find unicode and convert. | |
#!/bin/sh | |
TO='utf-8' | |
for i in *.txt | |
do | |
FROM=$(file -b --mime-encoding $i) | |
iconv -f $FROM -t $TO $i -o $i | |
done | |
=================== | |
$ variable=sample_data_2017_01_01_10_22_10.txt | |
$ IFS='_' read -r tmp tmp Year Month Date tmp <<< "$variable" | |
$ echo "$Year : $Month : $Date" | |
2017 : 01 : 01 | |
============================ | |
variable="sample_data_2017_01_01_10_22_10.txt" | |
if [[ $variable =~ ^sample_data_([[:digit:]]{4})_([[:digit:]]{2})_([[:digit:]]{2}).*$ ]]; then | |
year="${BASH_REMATCH[1]}" | |
month="${BASH_REMATCH[2]}" | |
date="${BASH_REMATCH[3]}" | |
fi | |
=================================== | |
for i in {1..10} ; do echo $x ; done | |
Can also be written as: | |
for i in {1..10}; { echo $i;} | |
=================================== | |
in bash : is equavelant to continue: | |
za:myapp za$ for x in $(seq 1 10) ; do (($x == 2)) && { echo "$x" ; : ; echo "continuing" ; } ; done | |
za:myapp za$ for x in $(seq 1 10) ; do (($x == 2)) && { echo "$x" ; continue ; echo "continuing" ; } ; done | |
# change field seprator | |
$ cat src/main/resources/Positions.txt | awk -F[' '] '{print $3, $4}' OFS=' ' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment