Text processing cheatsheet.
grep
grep "pattern" file
grep -i pattern file # case-insensitive
grep -v pattern file # invert
grep -r pattern dir/ # recursive
grep -rn pattern dir/ # with line numbers
grep -l pattern dir/ # filenames only
grep -c pattern file # count
grep -E "a|b" file # regex (or use grep -e)
grep -P "\d+" file # perl regex
grep -A 3 pattern file # 3 lines after
grep -B 3 pattern file # before
grep -C 3 pattern file # context
grep --include="*.py" -r pattern .
grep --exclude-dir=node_modules -r pattern .
ripgrep (faster)
rg pattern
rg -t py pattern # only Python files
rg -g '*.py' pattern
rg --json pattern | jq
rg -A 3 pattern
sed
sed 's/old/new/' file # first occurrence per line
sed 's/old/new/g' file # all
sed -i 's/old/new/g' file # in place
sed -i.bak 's/old/new/g' file # backup as file.bak
sed '5,10d' file # delete lines 5-10
sed '/pattern/d' file # delete matching
sed -n '/pattern/p' file # print only matching
sed 's|/old/path|/new/path|g' # alt delimiter
sed -E 's/([0-9]+)/<\1>/g' # extended regex + backref
awk
awk '{print $1}' file # first column
awk '{print $1, $3}' file
awk -F: '{print $1}' /etc/passwd # custom delimiter
awk '/pattern/ {print $1}'
awk 'NR>1' file # skip header
awk 'NR%2==0' file # even lines
awk '{sum+=$1} END {print sum}'
awk '{count[$1]++} END {for (k in count) print k, count[k]}'
awk 'length($0) > 80' # long lines
awk -v threshold=100 '$2 > threshold'
cut
cut -d: -f1 /etc/passwd
cut -d, -f2,4 csv
cut -c1-10 file # chars 1-10
cut -c1,5,10 file # specific chars
tr
echo "HELLO" | tr A-Z a-z
echo "a b c" | tr ' ' '\n'
echo "a-b-c" | tr -d '-' # delete
tr -s ' ' # squeeze repeats
cat file | tr -cd 'a-zA-Z0-9 \n' # keep only these
sort / uniq
sort file
sort -n file # numeric
sort -r file # reverse
sort -k 2 file # by 2nd column
sort -t: -k3 -n /etc/passwd # by UID
sort -u file # unique
sort | uniq # uniq needs sorted
sort | uniq -c # with count
sort | uniq -d # only dupes
sort | uniq -u # only uniques
# Top 10 most common
sort | uniq -c | sort -rn | head -10
head / tail
head -n 20 file
tail -n 20 file
tail -f file # follow
tail -F file # follow + reopen
tail -n +10 file # from line 10 to end
wc
wc -l file # lines
wc -w file # words
wc -c file # bytes
wc -m file # chars
paste / join
paste a.txt b.txt # side-by-side
paste -d, a.txt b.txt # comma delim
join file1 file2 # SQL-like join on first col
column
column -t file # align columns
ps aux | column -t
xargs
ls | xargs rm # like loop
find . -name "*.log" | xargs rm
find . -print0 | xargs -0 rm # handles spaces
echo "a b c" | xargs -n 1 # one arg per line
echo "a b c" | xargs -I{} echo "got: {}"
xargs -P 4 -n 1 cmd # parallel
jq
echo '{"a":1}' | jq '.a'
jq '.users[] | .name' file.json
jq '.users[].name' file.json
jq '.users | length'
jq '.users[] | select(.active)'
jq '.users[] | {name, email}'
jq -r '.users[].name' # raw (no quotes)
jq -c '.' # compact
jq 'to_entries | map(...)'
yq (YAML)
yq '.spec.replicas' deploy.yaml
yq -i '.spec.replicas = 5' deploy.yaml
diff / patch
diff a b
diff -u a b > patch
patch a < patch
patch -R a < patch # reverse
Common pipelines
# Top processes by mem
ps aux | sort -k4 -rn | head
# Count log levels
grep -oE 'level=\w+' app.log | sort | uniq -c | sort -rn
# Sum bytes from access log
awk '{sum+=$10} END {print sum}' access.log
# 95th percentile
awk '{print $9}' access.log | sort -n | awk '{a[NR]=$1} END {print a[int(NR*0.95)]}'
# IP rank
awk '{print $1}' access.log | sort | uniq -c | sort -rn | head
# Files by line count
find . -name "*.py" -exec wc -l {} + | sort -rn
# Disk usage by extension
find /var -type f -printf '%s %p\n' | awk '{ext=$2; sub(/.*\./,"",ext); s[ext]+=$1} END {for(k in s) print s[k], k}' | sort -rn
Common mistakes
grepwith regex without-E/-P— limited features.sed -iwithout backup → no undo.awk$0vs$1:$0whole line,$1first field.xargswithout-0and spaces in names breaks.- Forgetting
sortbeforeuniq.
Read this next
If you want my one-liner cookbook, it’s at rajpoot.dev .
Building something AI-, backend-, or data-heavy and want a second pair of eyes? I do consulting and freelance work — see my projects and ways to reach me at rajpoot.dev .