#! /bin/sh
#  __   _
#  |_) /|  Copyright (C) 2002  |  richard@
#  | \/|  Richard Atterer     |  atterer.org
#   '` 
# Copyright (C) 2016-2021 Steve McIntyre <93sam@debian.org>
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License, version 2. See
#  the file COPYING for details.

# Mirror script for Debian CD images, using Jigsaw Download. You first
# need to set up a conventional mirror (rsync/http/ftp-based) for the
# jigdo and template files, then this script can use those files and
# your local Debian mirror to create the full images automatically.


# This directory will be scanned for .jigdo files
jigdoDir="/home/ftp/debian/jigdo"

# For any file $jigdoDir/somedir/file.jigdo, an attempt will be made
# to create all the images offered by file.jigdo in $imageDir/somedir/
imageDir="/home/ftp/debian-cd"

# Temporary dir to use for creating images. Should be on the same
# partition as $imageDir, because mv is used to put finished images
# into $imageDir.
tmpDir="/home/jigdo-mirror-tmpdir"

# Local Debian/Non-US mirrors. Can use http/ftp URLs, but beware that
# this may cause huge amounts of data to be downloaded repeatedly -
# the ftp/http server had better be on your LAN.
debianMirror="file:/home/ftp/debian"
nonusMirror="file:/home/ftp/debian/non-US"

# Where to put the logfile. If undefined, log output goes to stdout.
# If defined, stderr is also redirected to the logfile
#logfile="$tmpDir/jigdo-mirror-`date +%y%m%d`.log"

# Include and exclude certain files. These are two regular extended
# expressions, matched case-sensitively. The input given to them is
# the filename of the .iso files relative to $imageDir, in the form
# "somedir/image.iso", e.g. "3.0rev0/i386/woody-i386-1.iso". The
# filtering is equivalent to
#  echo $name | egrep $include | egrep -v $exclude
# That is, first the list of files is restricted to those matching
# $include, then anything matching $exclude is removed.
include='.'  # include all files,
exclude='$^' # then exclude none
# Examples:
# US sites: Exclude non-US stuff
#exclude='_NONUS'
# US sites: All i386 images, images 1 and 2 of the rest
#include='i386/|-[12]'; exclude='source/|_NONUS'
# Sites outside the US: All i386 images, images 1 and 2 of the rest
#include='i386/|-[12]'; exclude='source/|-1\.'

# How to call jigdo-file or jigdo-port.
# CAREFUL: Make sure that jigdo-cache.db is not publically accessible
# from the internet since it contains local path info.
jigdoFile="jigdo-file --cache=$tmpDir/jigdo-cache.db --cache-expiry=1w --report=noprogress --no-check-files"
#jigdoFile="jigdo-port"; havePMA=false

# Any files older than $maxAge days are deleted from $imageDir, except
# when the variable is unset; in that case, nothing happens. WARNING:
# This really means *any* files, not just files generated by
# jigdo-mirror.
#maxAge=8

# In case only a few files are missing for the image to be complete,
# will download them from any fallback servers specified in the .jigdo
# file. Maximum number of missing files to download:
maxMissing=100

filesPerFetch=10
wgetOpts="--passive-ftp --no-directories --no-verbose"

# To find the template file, first the leafname of the template's URL
# is extracted from the .jigdo file. Next, for a .jigdo file named
# $jigdoDir/somedir/file.jigdo, the file
# $templateDir/somedir/leafnameFromURL is tried. If that isn't
# present, either the template is downloaded (if the URL is absolute)
# or looked for in $jigdoDir/somedir/templateURL (if the URL is
# relative). Default if unset is templateDir=$jigdoDir.
#templateDir="$jigdoDir"

# If it is inconvenient for you to set the variables above, you can
# either specify a config file with settings on the command line...
if test "$1"; then
    . "$1"
elif test -r ~/.jigdo-mirror; then
    # ... or put the commands in "~/.jigdo-mirror"
    . ~/.jigdo-mirror
fi
#======================================================================
#  No user-serviceable parts below
#======================================================================

# fetch <URL>...
# Download a file, storing it in the current dir
fetch() {
    if test "$#" -eq 0; then return 0; fi
    wget --user-agent="$userAgent" $wgetOpts "$@" || return 1
}
userAgent="jigdo-mirror/1.0 (`wget --version 2>/dev/null | (read ver; echo $ver)`)"
#______________________________________________________________________

# isURI <string>
# Returns 0 (true) if the supplied string is a HTTP/HTTPS/FTP/FILE
# URL, otherwise 1
isURI() {
  case "$1" in
    [hH][tT][tT][pP]:*) return 0;;
    [hH][tT][tT][pP][sS]:*) return 0;;
    [fF][tT][pP]:*) return 0;;
    [fF][iI][lL][eE]:*) return 0;;
    *) return 1;
  esac
}
#______________________________________________________________________

makeImage() {
    rm -f "image" "image.tmp"
    template=`basename "$templateURI"`
    if test -f "$templateDir/$dirName/$template"; then
        # Check for template in $templateDir
        log "    Found template \`\$templateDir/$dirName/$template'"
        ln -s "$templateDir/$dirName/$template" "template"
    elif isURI "$templateURI"; then
        # Absolute template URL - download
        log "    Template \`\$templateDir/$dirName/$template' not found, will download"
        if fetch "$templateURI" -O "template"; then true; else
            log "    Error getting template file"
            exitCode=1
            rm -f "image" "template"
            return 0
        fi
    elif test -f "$jigdoDir/$dirName/$templateURI"; then
        log "    Found template \`\$jigdoDir/$dirName/$templateURI'"
        ln -s "$jigdoDir/$dirName/$templateURI" "template"
    else
        log "    Template file \`\$templateDir/$dirName/$template' not found"
        log "    Template file \`\$jigdoDir/$dirName/$templateURI' not found"
        exitCode=1
        rm -f "image" "template"
        return 0
    fi

    # If possible, check md5sum of template data
    if test "$templateMD5"; then
        set -- `$jigdoFile md5sum --report=quiet "template"`
        if test "$1" = "$templateMD5"; then
            log "    Template checksum is correct"
        else
            log "    Error - template checksum mismatch"
            exitCode=1
            rm -f "image" "template"
            return 0
        fi
    elif test "$templateSHA256"; then
        set -- `$jigdoFile sha256sum --report=quiet "template"`
        if test "$1" = "$templateSHA256"; then
            log "    Template checksum is correct"
        else
            log "    Error - template checksum mismatch"
            exitCode=1
            rm -f "image" "template"
            return 0
        fi
    else
        log "    [WARNING - \`Template-MD5Sum' and \`Template-SHA256Sum' missing from image section]"
    fi

    # Try to merge any files into the image.
    if $havePMA; then
        $jigdoFile print-missing-all $ijtOpts $uriOpts \
        | egrep -v '^([a-zA-Z0-9.+_-]+:|$)' \
        | $jigdoFile make-image $ijtOpts --files-from=-
        jigdoErr="$?"
    else
        $jigdoFile print-missing $ijtOpts $uriOpts \
        | egrep -v '^([a-zA-Z0-9.+_-]+:|$)' \
        | $jigdoFile make-image $ijtOpts --files-from=-
        jigdoErr="$?"
    fi
    if test "$jigdoErr" -ge 2; then
        log "    Error merging data from local filesystem"
        exitCode=1
        rm -f "image" "template"
        return 0
    fi

    # First try to download all files using the first URL in the
    # print-missing-all list. If any files remain missing, add another
    # pass, this time try to download the missing files using the 2nd
    # URL, and so on.
    noMorePasses=$localMirror
    for pass in x xx xxx xxxx xxxxx xxxxxx xxxxxxx xxxxxxxx; do
        if $havePMA; then
            $jigdoFile print-missing-all $ijtOpts $jigdoOpts $uriOpts \
            | egrep -i '^(https:|http:|ftp:|$)' >"list"
        else
            # Quick hack until jigdo-port supports print-missing-all
            $jigdoFile print-missing $ijtOpts $jigdoOpts $uriOpts \
            | egrep -i '^(https:|http:|ftp:|$)' \
            | sed -n '/./{p;s/^.*$//;p;}' >"list"
        fi
        missingCount=`egrep '^$' <"list" | wc -l | sed -e 's/ *//g'`
        if test "$pass" = "x"; then
            if $localMirror; then true; else missingCount=0; fi
        fi
        if test "$missingCount" -gt "$maxMissing"; then
            log "    Too many files ($missingCount) missing in local mirror"
            exitCode=1
            rm -f "list" "image" "template"
            return 0
        fi
        # Accumulate URLs in $@, pass them to fetchAndMerge in batches
        set --
        count=""
        while read url; do
            count="x$count"
            if test "$url" = ""; then count=""; continue; fi
            if test "$count" != "$pass"; then continue; fi
            if $noMorePasses; then
                log "    $missingCount parts still missing from image"
            fi
            noMorePasses=false
            set -- "$@" "$url"
            if test "$#" -ge "$filesPerFetch"; then
                if fetchAndMerge "$@"; then true; else
                    set --; noMorePasses=true
                fi
                set --
            fi
        done <"list"
        if test "$#" -ge 1; then
            if fetchAndMerge "$@"; then true; else break; fi
        fi
        if $noMorePasses; then break; fi
        if test -r "image"; then break; fi
        noMorePasses=true
    done

    if test -r "image"; then
        # Finished - verify checksum
        if $jigdoFile verify $ijtOpts; then
            log "    Image checksum is correct, moving image into place"
            mkdir -p "$imageDir/$dirName"
            mv "image" "$imageDir/$dirName/$image"
        else
            log "    Error - image checksum mismatch"
            exitCode=1
        fi
    else
        log "    Image creation failed, list of missing files follows"
        $jigdoFile print-missing $ijtOpts $jigdoOpts $uriOpts
        exitCode=1
    fi
    rm -f "list" "image" "image.tmp" "template"
    return 0
}
#______________________________________________________________________

# Given URLs, fetch them into $tmpDir, then merge them into image
fetchAndMerge() {
    (mkdir "$tmpDir/files"; cd "$tmpDir/files"; fetch "$@")
    # Merge into the image
    find "$tmpDir/files" -type f \
    | $jigdoFile make-image $ijtOpts --no-cache --files-from=-
    jigdoErr="$?"
    if test "$jigdoErr" -ge 2; then
        exitCode=1
        exit 1
    fi
    # Delete tmpDir, to avoid taking up more space than necessary
    rm -rf "$tmpDir/files"
}
#______________________________________________________________________

sectionEnd() {
  if test "$section" = "[Image]" -a "$image" \
          -a "$templateURI"; then
    log "  Image \`$image', template \`$templateURI'"
    set -- `echo "$dirName/$image" | egrep -- "$include" | egrep -v -- "$exclude"`
    if test "$#" -eq 0; then
      log "    \`\$imageDir/$dirName/$image' excluded by \$include/\$exclude"
      return
    fi
    if test -f "$imageDir/$dirName/$image"; then
      if test "$jigdoDir/$jigdo" -nt "$imageDir/$dirName/$image";then
        log "    jigdo is newer - updating \`\$imageDir/$dirName/$image'"
        # Remove outdated image *immediately*, even in case
        # the subsequent attempt to regenerate it fails
        rm -f "$imageDir/$dirName/$image"
        makeImage
      else
        log "    \`\$imageDir/$dirName/$image' is up to date"
        test "$maxAge" && touch "$imageDir/$dirName/$image"
      fi
    else
      log "    Attempting to create \`\$imageDir/$dirName/$image'"
      makeImage
    fi
  fi
}
#______________________________________________________________________

if test "$logfile"; then
    true >"$logfile"
    exec >>"$logfile"
    exec 2>>"$logfile"
fi
log() { printf "%s: %s\n" "`date +'%Y-%m-%d %H:%M:%S'`" "$1"; }
#________________________________________

log "imageDir:    $imageDir"
log "jigdoDir:    $jigdoDir"
log "templateDir: $templateDir"
# Remove slashes from dir names
jigdoDir=${jigdoDir%/}
imageDir=${imageDir%/}
templateDir=${templateDir%/}
tmpDir=${tmpDir%/}
debianMirror=${debianMirror%/}
nonusMirror=${nonusMirror%/}
uriOpts="--uri Debian='$debianMirror/' --uri Non-US='$nonusMirror/'"
ijtOpts="--image=image --jigdo=jigdo --template=template"
# Is the main mirror on the local disc?
case "$debianMirror $nonusMirror" in
    "file:"*" file:"*|/*" /"*) localMirror=true;;
    *) localMirror=false;;
esac
if test -z "$havePMA"; then havePMA=true; fi
if test -z "$templateDir"; then templateDir="$jigdoDir"; fi
exitCode=0
mkdir -p "$tmpDir" || true
cd "$tmpDir"
#________________________________________

find "$jigdoDir" -name "*.jigdo" \
| sed -e "s^$jigdoDir/" \
| while read jigdo; do

    log "Found \`\$jigdoDir/$jigdo'"
    dirName=`dirname "$jigdo"`
    if gzip -cd "$jigdoDir/$jigdo" >"jigdo" 2>"/dev/null"; then
        true
    elif test -f "$jigdoDir/$jigdo"; then
        rm -f "jigdo" "template"
        ln -s "$jigdoDir/$jigdo" "jigdo"
    else
        log "  jigdo file not present/could not be unpacked - ignored"
        exitCode=1
        continue
    fi

    # Parse jigdo file, look for images
    section=""
    while read REPLY; do
      set -- `echo "$REPLY" | sed -e 's/^ *\[ *\([^ ]*\) *\] *$/[\1]/; s/ *= */ /; s/['\''"$]//g'`
      case "$1" in
        "["*"]")
          sectionEnd
          unset image templateURI templateMD5 templateSHA256 shortInfo info
          section="$1";;
        Filename) image="$2";;
        Template) templateURI="$2";;
        Template-MD5Sum) templateMD5="$2";;
        Template-SHA256Sum) templateSHA256="$2";;
        ShortInfo) shift; shortInfo="$*";;
        Info) shift; info="$*";;
      esac
    done <"jigdo"
    sectionEnd

    rm -f "jigdo"

done
#________________________________________

if cd "$imageDir"; then
    if test "$maxAge"; then
        log "Expiring images older than $maxAge days"
        find . -type f -mtime +"$maxAge" \
        | while read file; do
            log "  Deleting \`\$imageDir/${file#./}'"
            rm -f "$file"
        done
        # Remove empty directories
        find "$imageDir" -depth -mindepth 1 -type d -empty \
            -exec rmdir '{}' ';'
    fi
fi
#________________________________________

log "Exit $exitCode"
exit $exitCode
