longnow-for-markdown

Take a markdown file and feed links to the internet archive
Log | Files | Refs | README

commit 6c35ae790940759207c20daaf51ffa12a61fbdd2
parent 183e5503855c5dc15b49279e0551f4cb256b16df
Author: NunoSempere <nuno.sempere@gmail.com>
Date:   Mon, 28 Jun 2021 16:38:06 +0200

Small changes to make this more suitable as a command line utility

Diffstat:
MREADME.md | 3++-
Alongnow | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dlongnowformd.sh | 93-------------------------------------------------------------------------------
3 files changed, 98 insertions(+), 94 deletions(-)

diff --git a/README.md b/README.md @@ -1,11 +1,12 @@ This package takes a markdown file, and creates a new markdown file in which each link is accompanied by an archive.org link, in the format [...](original link) ([a](archive.org link)) ## How to install -Copy [this file](https://github.com/NunoSempere/longNowForMd/blob/master/longnowformd.sh) to your .bashrc file, or, for Ubuntu distributions Focal Fossa and above: +Copy [this file](https://github.com/NunoSempere/longNowForMd/blob/master/longnowformd.sh) to your .bashrc file, or, for Ubuntu distributions 20.04 (Focal Fossa) and above: ``` $ sudo add-apt-repository ppa:nunosempere/longnowformd $ sudo apt-get update +$ sudo apt install longnowformd ``` This utility requires [archivenow](https://github.com/oduwsdl/archivenow) as a dependency, which itself requires a python installation. It can be installed with diff --git a/longnow b/longnow @@ -0,0 +1,96 @@ +#!/bin/bash + +function getMdLinks(){ # Use: Takes a markdown file file.md, extracts all links, finds the unique ones and saves them to file.md.links + echo "" + echo "Extracting links..." + + grep -Eoi '\]\((.*)\)' $1 | grep -Eo '(http|https)://[^)]+' >> "$1.links" + ## sed -i 's/www.wikiwand.com\/en/en.wikipedia.org\/wiki/g' $1 + awk '!seen[$0]++' "$1.links" > "$1.links2" && mv "$1.links2" "$1.links" + + echo "Done." + echo "" +} + +function pushToArchive(){ +# Use: Takes a txt file with one link on each line and pushes all the links to the internet archive. Saves those links to a textfile +# References: +# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file +# https://github.com/oduwsdl/archivenow +# For the double underscore, see: https://stackoverflow.com/questions/13797087/bash-why-double-underline-for-private-functions-why-for-bash-complet/15181999 + echo "Pushing to archive.org..." + + input=$1 + counter=1 + + rm -f "$1.archived" + touch "$1.archived" + + while IFS= read -r line + do + wait + if [ $(($counter % 15)) -eq 0 ] + then + printf "\nArchive.org doesn't accept more than 15 links per min; sleeping for 1min...\n" + sleep 1m + fi + echo "Url: $line" + archiveURL=$(archivenow --ia $line) + echo $archiveURL >> "$1.archived" + echo $archiveURL + counter=$((counter+1)) + echo "" + done < "$input" + + echo "Done." + echo "" +} + +function addArchiveLinksToFile(){ + + originalFile="$1" + originalFileTemp="$originalFile.temp" + linksFile="$1.links" + archivedLinksFile="$1.links.archived" + longNowFile="$1.longnow" + + echo "Creating longnow file @ $longNowFile..." + + rm -f "$longNowFile" + touch "$longNowFile" + cp "$originalFile" "$originalFileTemp" + + while IFS= read -r url + do + wait + + archivedUrl=$(grep "$url" "$archivedLinksFile" | tail -1) + ## echo "Url: $url" + ## echo "ArchivedUrl: $archivedUrl" + urlForSed="${url//\//\\/}" + archiveUrlForSed="${archivedUrl//\//\\/}" + sed -i "s/$urlForSed)/$urlForSed) ([a]($archiveUrlForSed))/g" "$1" + done < "$linksFile" + mv "$originalFile" "$longNowFile" + mv "$originalFileTemp" "$originalFile" + + echo "Done." + +} + +function longnow(){ + doesArchiveNowExist=$(whereis "archivenow") + if [ "$doesArchiveNowExist" == "archivenow:" ] + then + echo "Required archivenow utility not found in path." + echo "Install with \$ pip install archivenow" + echo "(resp. \$ pip3 install archivenow)" + echo "Or follow instructions on https://github.com/oduwsdl/archivenow" + else + getMdLinks $1 + pushToArchive $1.links + addArchiveLinksToFile $1 + fi +} + +longnow "$1" ## don't copy this line into your .bashrc file diff --git a/longnowformd.sh b/longnowformd.sh @@ -1,93 +0,0 @@ -function getMdLinks(){ # Use: Takes a markdown file file.md, extracts all links, finds the unique ones and saves them to file.md.links - echo "" - echo "Extracting links..." - - grep -Eoi '\]\((.*)\)' $1 | grep -Eo '(http|https)://[^)]+' >> "$1.links" - ## sed -i 's/www.wikiwand.com\/en/en.wikipedia.org\/wiki/g' $1 - awk '!seen[$0]++' "$1.links" > "$1.links2" && mv "$1.links2" "$1.links" - - echo "Done." - echo "" -} - -function pushToArchive(){ -# Use: Takes a txt file with one link on each line and pushes all the links to the internet archive. Saves those links to a textfile -# References: -# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file -# https://github.com/oduwsdl/archivenow -# For the double underscore, see: https://stackoverflow.com/questions/13797087/bash-why-double-underline-for-private-functions-why-for-bash-complet/15181999 - echo "Pushing to archive.org..." - - input=$1 - counter=1 - - rm -f "$1.archived" - touch "$1.archived" - - while IFS= read -r line - do - wait - if [ $(($counter % 15)) -eq 0 ] - then - printf "\nArchive.org doesn't accept more than 15 links per min; sleeping for 1min...\n" - sleep 1m - fi - echo "Url: $line" - archiveURL=$(archivenow --ia $line) - echo $archiveURL >> "$1.archived" - echo $archiveURL - counter=$((counter+1)) - echo "" - done < "$input" - - echo "Done." - echo "" -} - -function addArchiveLinksToFile(){ - - originalFile="$1" - originalFileTemp="$originalFile.temp" - linksFile="$1.links" - archivedLinksFile="$1.links.archived" - longNowFile="$1.longnow" - - echo "Creating longnow file @ $longNowFile..." - - rm -f "$longNowFile" - touch "$longNowFile" - cp "$originalFile" "$originalFileTemp" - - while IFS= read -r url - do - wait - - archivedUrl=$(grep "$url" "$archivedLinksFile" | tail -1) - ## echo "Url: $url" - ## echo "ArchivedUrl: $archivedUrl" - urlForSed="${url//\//\\/}" - archiveUrlForSed="${archivedUrl//\//\\/}" - sed -i "s/$urlForSed)/$urlForSed) ([a]($archiveUrlForSed))/g" "$1" - done < "$linksFile" - mv "$originalFile" "$longNowFile" - mv "$originalFileTemp" "$originalFile" - - echo "Done." - -} - -function longnow(){ - doesArchiveNowExist=$(whereis "archivenow") - if [ "$doesArchiveNowExist" == "archivenow:" ] - then - echo "Required archivenow utility not found in path." - echo "Install with \$ pip install archivenow" - echo "(resp. \$ pip3 install archivenow)" - echo "Or follow instructions on https://github.com/oduwsdl/archivenow" - else - getMdLinks $1 - pushToArchive $1.links - addArchiveLinksToFile $1 - fi -} -