#!/usr/bin/env bash
# organize_by_mime.sh
# Recursively organize files by detected MIME type (via `file`), not by extension.
# Usage: ./organize_by_mime.sh [-n] [-c] <source_dir> <destination_dir>
#   -n : dry-run (show what would happen)
#   -c : copy instead of move

set -euo pipefail

DRY_RUN=0
DO_COPY=0

usage() {
  echo "Usage: $0 [-n] [-c] <source_dir> <destination_dir>"
  echo "  -n : dry-run (no changes)"
  echo "  -c : copy instead of move"
  exit 1
}

# Parse options
while getopts ":nc" opt; do
  case "$opt" in
    n) DRY_RUN=1 ;;
    c) DO_COPY=1 ;;
    *) usage ;;
  esac
done
shift $((OPTIND - 1))

# Validate args
if [ $# -ne 2 ]; then usage; fi
SRC_DIR="$1"
DEST_DIR="$2"

# Ensure `file` is available
command -v file >/dev/null 2>&1 || { echo "Error: 'file' command not found."; exit 2; }

# Create destination root
mkdir -p "$DEST_DIR"

# Helper: sanitize a path component (letters, numbers, ., _, - only)
sanitize() {
  # Use sed to replace unsafe chars with underscores
  echo "$1" | sed 's/[^A-Za-z0-9._-]/_/g'
}

# Helper: get MIME type top/sub using `file`
get_mime_components() {
  local f="$1"
  # -b: brief; --mime-type: output like image/jpeg
  local mime
  if ! mime=$(file -b --mime-type -- "$f" 2>/dev/null); then
    echo "unknown" "unknown"
    return
  fi
  # Some files might return "application/octet-stream" or something odd—still split
  # Use awk to split on '/', default to unknown if missing
  local top sub
  top=$(awk -F'/' '{print $1}' <<<"$mime")
  sub=$(awk -F'/' '{print $2}' <<<"$mime")
  [ -z "$top" ] && top="unknown"
  [ -z "$sub" ] && sub="unknown"

  # Sanitize components
  top=$(sanitize "$top")
  sub=$(sanitize "$sub")
  echo "$top" "$sub"
}

# Helper: ensure unique destination filename; if collision, append short hash
unique_target_path() {
  local src="$1"
  local dir="$2"
  local base
  base=$(basename -- "$src")

  # Sanitize filename too (keep extension intact if present)
  local name ext
  if [[ "$base" == *.* ]]; then
    name="${base%.*}"
    ext=".${base##*.}"
  else
    name="$base"
    ext=""
  fi
  name=$(sanitize "$name")
  ext=$(sanitize "$ext")

  local candidate="$dir/$name$ext"
  if [ ! -e "$candidate" ]; then
    echo "$candidate"
    return
  fi

  # If exists, add an 8-char md5 of the file contents to distinguish
  local hash
  if command -v md5sum >/dev/null 2>&1; then
    hash=$(md5sum -- "$src" | awk '{print $1}' | cut -c1-8)
  else
    # Fallback: use date+pid
    hash="$(date +%s%N | awk '{print substr($0,length($0)-7)}')"
  fi
  echo "$dir/${name}_${hash}$ext"
}

# Dry-run helpers
do_mkdir() {
  if [ $DRY_RUN -eq 1 ]; then
    echo "[DRY-RUN] mkdir -p -- $*"
  else
    mkdir -p -- "$@"
  fi
}

do_move_or_copy() {
  local src="$1" dst="$2"
  if [ $DRY_RUN -eq 1 ]; then
    if [ $DO_COPY -eq 1 ]; then
      echo "[DRY-RUN] cp -p -- \"$src\" \"$dst\""
    else
      echo "[DRY-RUN] mv -- \"$src\" \"$dst\""
    fi
    return
  fi

  if [ $DO_COPY -eq 1 ]; then
    # -p to preserve timestamps and mode
    cp -p -- "$src" "$dst"
  else
    mv -- "$src" "$dst"
  fi
}

# Walk files safely (null-delimited)
# Skip symlinks to avoid surprises; include only regular files
find "$SRC_DIR" -type f -print0 | while IFS= read -r -d '' FILE; do
  # Derive MIME top/sub directories
  read -r TOP SUB < <(get_mime_components "$FILE")

  # Build target directory like: DEST/TOP/SUB (e.g., DEST/image/jpeg)
  TARGET_DIR="$DEST_DIR/$TOP/$SUB"
  do_mkdir "$TARGET_DIR"

  # Resolve unique target file path (handles collisions)
  TARGET_PATH=$(unique_target_path "$FILE" "$TARGET_DIR")

  # Move or copy the file
  do_move_or_copy "$FILE" "$TARGET_PATH"
done

if [ $DRY_RUN -eq 1 ]; then
  echo "Dry-run complete. No changes were made."
else
  if [ $DO_COPY -eq 1 ]; then
    echo "Copy complete. Files organized by MIME type under: $DEST_DIR"
  else
    echo "Move complete. Files organized by MIME type under: $DEST_DIR"
  fi
fi
