commit 6c35ae790940759207c20daaf51ffa12a61fbdd2
parent 183e5503855c5dc15b49279e0551f4cb256b16df
Author: NunoSempere <nuno.sempere@gmail.com>
Date: Mon, 28 Jun 2021 16:38:06 +0200
Small changes to make this more suitable as a command line utility
Diffstat:
| M | README.md | | | 3 | ++- |
| A | longnow | | | 96 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| D | longnowformd.sh | | | 93 | ------------------------------------------------------------------------------- |
3 files changed, 98 insertions(+), 94 deletions(-)
diff --git a/README.md b/README.md
@@ -1,11 +1,12 @@
This package takes a markdown file, and creates a new markdown file in which each link is accompanied by an archive.org link, in the format [...](original link) ([a](archive.org link))
## How to install
-Copy [this file](https://github.com/NunoSempere/longNowForMd/blob/master/longnowformd.sh) to your .bashrc file, or, for Ubuntu distributions Focal Fossa and above:
+Copy [this file](https://github.com/NunoSempere/longNowForMd/blob/master/longnowformd.sh) to your .bashrc file, or, for Ubuntu distributions 20.04 (Focal Fossa) and above:
```
$ sudo add-apt-repository ppa:nunosempere/longnowformd
$ sudo apt-get update
+$ sudo apt install longnowformd
```
This utility requires [archivenow](https://github.com/oduwsdl/archivenow) as a dependency, which itself requires a python installation. It can be installed with
diff --git a/longnow b/longnow
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+function getMdLinks(){ # Use: Takes a markdown file file.md, extracts all links, finds the unique ones and saves them to file.md.links
+ echo ""
+ echo "Extracting links..."
+
+ grep -Eoi '\]\((.*)\)' $1 | grep -Eo '(http|https)://[^)]+' >> "$1.links"
+ ## sed -i 's/www.wikiwand.com\/en/en.wikipedia.org\/wiki/g' $1
+ awk '!seen[$0]++' "$1.links" > "$1.links2" && mv "$1.links2" "$1.links"
+
+ echo "Done."
+ echo ""
+}
+
+function pushToArchive(){
+# Use: Takes a txt file with one link on each line and pushes all the links to the internet archive. Saves those links to a textfile
+# References:
+# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
+# https://github.com/oduwsdl/archivenow
+# For the double underscore, see: https://stackoverflow.com/questions/13797087/bash-why-double-underline-for-private-functions-why-for-bash-complet/15181999
+ echo "Pushing to archive.org..."
+
+ input=$1
+ counter=1
+
+ rm -f "$1.archived"
+ touch "$1.archived"
+
+ while IFS= read -r line
+ do
+ wait
+ if [ $(($counter % 15)) -eq 0 ]
+ then
+ printf "\nArchive.org doesn't accept more than 15 links per min; sleeping for 1min...\n"
+ sleep 1m
+ fi
+ echo "Url: $line"
+ archiveURL=$(archivenow --ia $line)
+ echo $archiveURL >> "$1.archived"
+ echo $archiveURL
+ counter=$((counter+1))
+ echo ""
+ done < "$input"
+
+ echo "Done."
+ echo ""
+}
+
+function addArchiveLinksToFile(){
+
+ originalFile="$1"
+ originalFileTemp="$originalFile.temp"
+ linksFile="$1.links"
+ archivedLinksFile="$1.links.archived"
+ longNowFile="$1.longnow"
+
+ echo "Creating longnow file @ $longNowFile..."
+
+ rm -f "$longNowFile"
+ touch "$longNowFile"
+ cp "$originalFile" "$originalFileTemp"
+
+ while IFS= read -r url
+ do
+ wait
+
+ archivedUrl=$(grep "$url" "$archivedLinksFile" | tail -1)
+ ## echo "Url: $url"
+ ## echo "ArchivedUrl: $archivedUrl"
+ urlForSed="${url//\//\\/}"
+ archiveUrlForSed="${archivedUrl//\//\\/}"
+ sed -i "s/$urlForSed)/$urlForSed) ([a]($archiveUrlForSed))/g" "$1"
+ done < "$linksFile"
+ mv "$originalFile" "$longNowFile"
+ mv "$originalFileTemp" "$originalFile"
+
+ echo "Done."
+
+}
+
+function longnow(){
+ doesArchiveNowExist=$(whereis "archivenow")
+ if [ "$doesArchiveNowExist" == "archivenow:" ]
+ then
+ echo "Required archivenow utility not found in path."
+ echo "Install with \$ pip install archivenow"
+ echo "(resp. \$ pip3 install archivenow)"
+ echo "Or follow instructions on https://github.com/oduwsdl/archivenow"
+ else
+ getMdLinks $1
+ pushToArchive $1.links
+ addArchiveLinksToFile $1
+ fi
+}
+
+longnow "$1" ## don't copy this line into your .bashrc file
diff --git a/longnowformd.sh b/longnowformd.sh
@@ -1,93 +0,0 @@
-function getMdLinks(){ # Use: Takes a markdown file file.md, extracts all links, finds the unique ones and saves them to file.md.links
- echo ""
- echo "Extracting links..."
-
- grep -Eoi '\]\((.*)\)' $1 | grep -Eo '(http|https)://[^)]+' >> "$1.links"
- ## sed -i 's/www.wikiwand.com\/en/en.wikipedia.org\/wiki/g' $1
- awk '!seen[$0]++' "$1.links" > "$1.links2" && mv "$1.links2" "$1.links"
-
- echo "Done."
- echo ""
-}
-
-function pushToArchive(){
-# Use: Takes a txt file with one link on each line and pushes all the links to the internet archive. Saves those links to a textfile
-# References:
-# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
-# https://github.com/oduwsdl/archivenow
-# For the double underscore, see: https://stackoverflow.com/questions/13797087/bash-why-double-underline-for-private-functions-why-for-bash-complet/15181999
- echo "Pushing to archive.org..."
-
- input=$1
- counter=1
-
- rm -f "$1.archived"
- touch "$1.archived"
-
- while IFS= read -r line
- do
- wait
- if [ $(($counter % 15)) -eq 0 ]
- then
- printf "\nArchive.org doesn't accept more than 15 links per min; sleeping for 1min...\n"
- sleep 1m
- fi
- echo "Url: $line"
- archiveURL=$(archivenow --ia $line)
- echo $archiveURL >> "$1.archived"
- echo $archiveURL
- counter=$((counter+1))
- echo ""
- done < "$input"
-
- echo "Done."
- echo ""
-}
-
-function addArchiveLinksToFile(){
-
- originalFile="$1"
- originalFileTemp="$originalFile.temp"
- linksFile="$1.links"
- archivedLinksFile="$1.links.archived"
- longNowFile="$1.longnow"
-
- echo "Creating longnow file @ $longNowFile..."
-
- rm -f "$longNowFile"
- touch "$longNowFile"
- cp "$originalFile" "$originalFileTemp"
-
- while IFS= read -r url
- do
- wait
-
- archivedUrl=$(grep "$url" "$archivedLinksFile" | tail -1)
- ## echo "Url: $url"
- ## echo "ArchivedUrl: $archivedUrl"
- urlForSed="${url//\//\\/}"
- archiveUrlForSed="${archivedUrl//\//\\/}"
- sed -i "s/$urlForSed)/$urlForSed) ([a]($archiveUrlForSed))/g" "$1"
- done < "$linksFile"
- mv "$originalFile" "$longNowFile"
- mv "$originalFileTemp" "$originalFile"
-
- echo "Done."
-
-}
-
-function longnow(){
- doesArchiveNowExist=$(whereis "archivenow")
- if [ "$doesArchiveNowExist" == "archivenow:" ]
- then
- echo "Required archivenow utility not found in path."
- echo "Install with \$ pip install archivenow"
- echo "(resp. \$ pip3 install archivenow)"
- echo "Or follow instructions on https://github.com/oduwsdl/archivenow"
- else
- getMdLinks $1
- pushToArchive $1.links
- addArchiveLinksToFile $1
- fi
-}
-