#! /bin/bash

# configs
warn_time="${warn_time:-86400}"  # seconds
rss_feed="${rss_feed:-https://www.archlinux.org/feeds/news/}"
mail_list="${mail_list:-https://lists.archlinux.org/pipermail/arch-general/$(LC_TIME=C printf '%(%Y-%B)T\n' -1).txt.gz}"
log_file="${log_file:-/var/log/arch-news.log}"
pacman_log="${pacman_log:-/var/log/pacman.log}"
pacdiff_program="${pacdiff_program:-pacdiff}"
pacman_program="${pacman_program:-pacman}"
config="${config:-$HOME/.config/pacmatic}"  # not really a config file ;-)
h2t_options="--protect-links --ignore-images"

# log file has one line per entry, with a unix time stamp and escaped news blurb
# keep track of the last mail stamp in $config

# Issues:
# Lots of dirty hacks for processing the RSS subset of XML.
# pacdiff integration is pretty crude.
# Switch to a real config file if it gets one more option.

# Todo:
# More unixy env vars.
# Merge serial seds.
# Backup db.
# Conf file for vars.
# edit pacnews from newest to oldest
# fix \n in news?
# mailing list assumes gz'd text archives

# sledgehammer says to watch arch-dev-public and arch-commits
# (sledgehammer also reads no MLs)

# borrow from https://bbs.archlinux.org/viewtopic.php?id=163398

mail_summary()  # None : None, set $mail_counts and save new stamp
{
    stamp="."
    if [ -f "$config" ]; then
        stamp=$(cat "$config")
    fi
    raw=$(curl -s "$mail_list" | zcat | sed -n "/$stamp/,\$p")
    if [ -z "$raw" ]; then
        # inefficient, but should only happen once a month
        raw=$(curl -s "$mail_list" | zcat)
    fi
    new_stamp=$(grep '^Date' <<< "$raw" | tail -n 1)
    mail_counts=$(grep -oF "$(expac -Q ' %n-')" <<< "$raw" | sort | uniq -c | sort -nr  | awk '{sub(/-$/,"",$2); if(NR!=1) printf ", "; printf "%s(%s)",$2,$1}')
    mkdir -p $(dirname "$config")
    echo "$new_stamp" > "$config"
}

clean_rss()  # none : xml
{
    # make xml less unfriendly to grep
    # escape \n | remove literal \n | opening tags get a line
    curl --connect-timeout 10 -s -o - "$rss_feed" | \
    sed "s/^/\\\\n/g" | tr -s "\r\n" " " | sed -r "s/<[^\/]/\n&/g"
}

tag_chop()  # xml string : string
{
    # this only works for the innermost tags
    local data=$1
    local tag=$2
    #sed "s/<[\/]\?$tag>//g"  sed "s|</?$tag>||g"
    echo "$data" | grep -o "<$tag>.*</$tag>" | sed "s/<$tag>//g" | sed "s/<\/$tag>//g"
}

log_time()  # datetime : seconds
{
    # turns a timestamp from pacman.log into unix time
    local stamp=$(echo "$1 $2" | grep -o -E "[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}")
    date -d "$stamp" +%s
}

last_sync()  # none : datetime
{
    # can not tell if last sync failed/aborted
    log_time $(grep 'synchronizing package lists' "$pacman_log" | tail -n 1)
}

last_upgrade()  # none : datetime
{
    # can not tell if last update failed/aborted
    log_time $(grep 'starting full system upgrade' "$pacman_log" | tail -n 1)
}

publication_dates()  # xml : seconds
{
    # collect xml time stamps and unix time them
    tag_chop "$1" "pubDate" | while read line ; do
        date -d "$line" +%s
    done
}

get_news_log()  # none : seconds
{
    # returns timestamp of last news seen
    if [ -f $log_file ]; then
        tail -n 1 $log_file | grep -o "^[0-9]*"
    else
        echo "0"
    fi
}

xml_decode() # string : string
{
    echo "$1" | sed 's/\t/ /g' | tr -s ' ' | sed 's/&lt;/</g' | sed 's/&gt;/>/g' | sed 's/&amp;/&/g'
}

check_news()  # none : news (logfile)
{
    last_news_date=$(get_news_log)
    clean=$(clean_rss)
    pubdates=$(mktemp)
    descriptions=$(mktemp)
    header="The latest and greatest news from the Arch Linux distribution."
    publication_dates "$clean" > "$pubdates"
    tag_chop "$clean" "description" | grep -v "$header" > "$descriptions"
    if [[ ! -s "$log_file" ]]; then
        echo "Initializing news archive, expect a flood of news."
        echo "This will not happen on subsequent launches."
        sleep 5
    fi
    # paste is much happier with files
    paste "$pubdates" "$descriptions" | tac | while read -r line ; do
    line2=$(xml_decode "$line")
    this_date=$(echo -n "$line2" | cut -d ' ' -f 1 | sed 's/ //g')
    if [ $(( $this_date )) -gt $(( $last_news_date )) ]; then
        line3=$(cut -d ' ' -f 2- <<< "$line2")
        if which html2text >& /dev/null; then
            echo "$line3" | html2text $h2t_options | sed 's/\\n$//'
        else
            echo "$line3" | sed -e 's|<br />|\n|g' -e 's|\\n||g' -e 's|<p>|\n|g' -e 's|</p>||g'
        fi
        echo
        echo "$line2" >> $log_file
    fi
    done
    rm "$pubdates"
    rm "$descriptions"
}

check_checkspace() # none : return value
{
    grep -qs '^[ \t]*CheckSpace' /etc/pacman.conf
}

pacnew_count()  # none : int
{
    find /etc/ \( -name \*.pacnew -o -name \*.pacorig -o -name \*.pacsave \) 2> /dev/null | wc -l
}

docs()  # none : docs
{
    cat << END
Usage: `basename $0` -options [packages]

Pacmatic is a pacman wrapper that takes care of menial but critial tasks.

These include
    Checking the archlinux.org news
    Summarizing the arch-general mailing list
    Reminding if it has been a while since the last sync
    Reporting pacnew files
    Editing pacnew files

The following environment variables can be set
    warn_time="$warn_time"  # (seconds)
    rss_feed="$rss_feed"
    log_file="$log_file"
    pacdiff_program="$pacdiff_program"
    pacman_program="$pacman_program"
    mail_list="$mail_list"
    pacman_log="$pacman_log"

To use Pacmatic's functionality in a script, source with
    . /usr/bin/pacmatic --as-lib

END
}

if [ $# -eq 0 ]; then
    docs
    exit
fi

[ $1 == "--as-lib" ] && return

current_time=$(date +%s)
sync_time=$(last_sync)
upgrade_time=$(last_upgrade)

case "$1" in
    -Syu|-Su|-Suy|-Syyu|-Syuy|-Suyy|-S|-Sy|-Syy)
        # do not like this threading
        check_news &
        mail_summary
        wait
        echo "Recent ML chatter: $mail_counts"
        if ( ! check_checkspace ); then
            echo -e "\nPlease enable CheckSpace in pacman.conf"
        fi
        ;;
esac

case "$1" in
    -Syu|-Su|-Suy|-Syyu|-Syuy|-Suyy)
        #check_news
        ;;
    -S)
        #check_news
        if [ $(( $sync_time - $upgrade_time )) -gt $(( $warn_time )) ]; then
            echo -e "\nWarning: stale installation, rerun with '-Syu'\n"
        fi
        ;;
    -Sy|-Syy)
        #check_news
        if [ $(( $current_time - $upgrade_time )) -gt $(( $warn_time )) ]; then
            echo -e "\nWarning: stale installation, rerun with '-Syu'\n"
        fi
        ;;
esac

old_pc=$(pacnew_count)
$pacman_program $*
exit_code=$?
exit_time=$(date "+[%F %H:%M]")
if [ -w "$pacman_log" ]; then
    echo "$exit_time Exited with code $exit_code" >> "$pacman_log"
fi
new_pc=$(pacnew_count)
add_pc=$(( $new_pc - $old_pc ))

# this section is a little clumsy

if [ "$new_pc" = "0" ]; then
  echo "No pacnew files to update."
  exit 0
fi

case "$1" in
    -Syu|-Su|-S|-Sy|-Syyu|-Syy)
    echo -en "\n$new_pc pacnew files found ($add_pc added).  Update files now? (Y/n) "
    read char
    if [ -z "$char" ]; then
        $pacdiff_program
    fi
    case "$char" in
        Y|y)
            $pacdiff_program
        ;;
        N|n)
            ;;
        *)
            ;;
    esac
    ;;
    *)
        ;;
esac
