#!/bin/bash # Generates a basic XML sitemap by collecting hyperlinks from the specified URL. file='./sitemap.xml' url='https://cerium.cc/' includelastmod='false' root=$(echo "$url" | sed 's/[a-z]*:\?\/\/\([0-9A-Za-z\.-]\+\)\/\?.*/\1/') tempfile1='./sitemap-url-list.temp' tempfile2='./sitemap-xml.temp' cat > $file << EOF EOF curl -s $url > $tempfile1 # Isolates tags sed -i -En 's/(<\/a>)[^<>]*()/\1\4\n\2\3/gp' $tempfile1 sed -i -En 's/(.*//' $tempfile1 sed -i 's/ $tempfile2 # Adds XML formatting sed -i 's/^/\t\n\t\t/' $tempfile2 sed -i 's/\([^>]\)$/\1<\/loc>\n\t<\/url>/' $tempfile2 # Adds tag with ISO8601 format if [ $includelastmod == 'true' ]; then pages=$(wc -l $tempfile1 | sed 's/\([0-9]\+\).*/\1/') for (( i = 1 ; i < $pages + 1 ; i++ )); do page=$(sed "${i}q;d" $tempfile1); lastmod=$(curl -sI $page | grep 'last-modified' | sed 's/last-modified: \(.*\)/\1/'); isolastmod=$(date -d date -ud "$lastmod" +"%FT%T%:z"); regexurl=$(echo $page | sed 's/\//\\\//g'); sed -i "s/\($regexurl<\/loc>\)/\1\n\t\t$isolastmod<\/lastmod>/" $tempfile2; done fi cat $tempfile2 >> $file rm $tempfile1 $tempfile2 echo '' >> $file