#!/bin/bash # -- prototype script echo "Run this script in an empty folder." echo "This probably only works on YouTube videos which have comments which have replies." sleep 0.5 echo "YouTube video ID: $1" echo "YouTube API key (SHA1 hashed): $(echo $2 | sha1sum - | sed 's/\s.*//g')" echo "Cleanup option: $3" echo "Download start: $(date --utc)" curl "https://www.googleapis.com/youtube/v3/commentThreads?key=$2&part=snippet,replies&videoId=$1&maxResults=100" > "youtube_comments-$1.txt" # -- Download all pages of the comments nextpage=$(cat "youtube_comments-$1.txt" | grep '"nextPageToken"' | awk {'print $2'} | sed "s/\"\|,//g") if [ -n "$nextpage" ]; then echo "nextPageToken found." while [ -n "$nextpage" ]; do nextpagedup=$nextpage curl "https://www.googleapis.com/youtube/v3/commentThreads?key=$2&part=snippet,replies&videoId=$1&maxResults=100&maxResults=100&pageToken=$nextpage" >> "youtube_comments-$1.txt" nextpage=$(cat "youtube_comments-$1.txt" | grep '"nextPageToken"' | tail -n 1 | awk {'print $2'} | sed "s/\"\|,//g") echo "Old nextpage: $nextpagedup" echo "New nextpage: $nextpage" sleep 0.7 if [[ "$nextpagedup" == "$nextpage" ]]; then echo "No new nextPageToken found." break fi done else echo "nextPageToken not found (there is none)." fi # -- Download missing replies echo 'execute "%s/\\(.\\+\\n\\)\\{28} \\{8}\"totalReplyCount\":\\s\\([1-9]\\|\\d\\d\\+\\),\\(\\n.\\+\\)\\{3}\\n\\( \\{4}{\\)/\\4/ge"' > vim.txt echo 'execute "%s/,\\n\\(.\\+\\n\\)\\{28} \\{8}\"totalReplyCount\":\\s\\([1-9]\\|\\d\\d\\+\\),\\(\\n.\\+\\)\\{3}\\n\\( \\{2}\]\\)/\\r\\4/ge"' >> vim.txt echo 'execute "%s/,\\n\\(.\\+\\n\\)\\{28} \\{8}\"totalReplyCount\":\\s0,\\(\\n.\\+\\)\\{3}\\n\\( \\{2}\]\\)/\\r\\3/ge"' >> vim.txt echo 'execute "%s/\\(.\\+\\n\\)\\{28} \\{8}\"totalReplyCount\": 0,\\(\\n.\\+\\)\\{3}//ge"' >> vim.txt echo 'execute "%s/\\(.\\+\\n\\)\\{6} \\{14}\"textDisplay\": \"\\(.\\+\\n\\)\\{15}.\\+/./ge"' >> vim.txt echo 'execute "%s/\\(^\\.$\\n\\)\\+/\\=strlen(substitute(submatch(0), \"\\n\", \"\", \"g\")) . \"\\r\""' >> vim.txt echo 'execute "%s/^ \\{8}\"totalReplyCount\": \\(\\d\\+\\),\\(\\n.\\+\\)\\{4}\\n\\1/---Has all replies/ge"' >> vim.txt echo 'execute "%s/^ \\{8}\"totalReplyCount\": \\(\\d\\+\\),\\(\\n.\\+\\)\\{4}\\n\\d\\+/---Does NOT have all replies/ge"' >> vim.txt echo 'execute "g!/^---\\|^ \\{6}\"id\"/d"' >> vim.txt echo 'execute "%s/^\\s\\+\"id\":.\\+\\n---Has all replies\\n//g"' >> vim.txt echo 'execute "%s/^\\s\\+\"id\": \"\\([^\"]\\+\\)\",\\n---Does NOT have.\\+/curl \"https:\\/\\/www.googleapis.com\\/youtube\\/v3\\/comments?key=[KEY_HERE]\\&part=snippet,id\\&parentId=\\1\\&maxResults=100\" --header \"Accept: application\\/json\" > \\1.json/g"' >> vim.txt echo 'execute "w dl_missing.sh | q!"' >> vim.txt vim "youtube_comments-$1.txt" -c 'execute "so vim.txt"' cat "dl_missing.sh" | sed "s/\[KEY_HERE\]/$2/g" > "download_missing.sh" rm "dl_missing.sh" bash "download_missing.sh" somemissing=$(wc --bytes download_missing.sh | sed "s/\s.*//g") if [ "$somemissing" -gt 1 ]; then echo "Patching in missing replies..." # -- Patch in missing replies cp "youtube_comments-$1.txt" "youtube_comments-$1_patched.txt" dlmissing=$(cat download_missing.sh | sed "s/.\+ //g") # -- Do not run '; vim "$one" -c "execute \"so ""$args""_do.txt\""' in xargs, it breaks GNOME Terminal echo -en "$dlmissing" | xargs -d "\n" sh -c 'for args do one=$(ls | grep _patched.txt | tr -d \\n); argsnojson=$(echo -n $args | sed s/.json//g); echo "execute \"edit $args\"" > ""$args""_do.txt; echo "execute \"%s/^/\\\\\=\\\"\\\\\x20\\\\\x20\\\\\x20\\\\\x20\\\\\x20\\\\\x20\\\"/g | %s/ \\\"items\\\": / \\\"comments\\\": /g\"" >> ""$args""_do.txt; echo "execute \"normal! gg6ddGddgg\\\"ayG\"" >> ""$args""_do.txt; echo "execute \"bdelete!""\"" >> ""$args""_do.txt; echo "execute \"edit $one\"" >> ""$args""_do.txt; echo "execute \"%s/^\\\\\( \\\\\{10}\\\"id\\\": \\\"$argsnojson\\\".*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\\\\n.*\\\\\)\\\\\\\\n \\\\\{8}\\\"comments\\\": \\\\\(.\\\\\+\\\\\\\\n\\\\\)\\\\\{-} \\\\\{8}\\\\\]\\\\\\\\n/\\\\\\\\1\\\\\\\\r\\\"\\\"\\\"\\\"\\\"/g | %s/\\\"\\\"\\\"\\\"\\\"/\\\\\=@a/g\"" >> ""$args""_do.txt; echo "execute \"wq\"" >> ""$args""_do.txt; echo "vim \"$one\" -c \"execute \\\"so ""$args""_do.txt\\\"\"" >> do.txt; done' _ # -- done: make a loop which executes each line donum=$(expr $(wc -l do.txt | sed "s/\s.*//g") + 1) doloop=1; while [ $doloop -lt $donum ]; do eval $(tail +$doloop do.txt | head -n 1); doloop=$(expr $doloop + 1); done if [ -n "$3" ]; then if [[ "$3" == "gzip" ]]; then historyfiles=$(ls -1 . | grep -v "^youtube_comments-..........._patched\.txt" | sed 's/$/ /g' | tr -d \\n) echo "History files = $historyfiles" cat download_missing.sh | sed "s/$2/[KEY_HERE]/g" > dl_missing.sh cp dl_missing.sh download_missing.sh rm dl_missing.sh tar -I 'gzip -9' -cf "youtube_comments-"$1"_history.tar.gz" $historyfiles rm $historyfiles fi fi else echo "No missing replies." echo "Deleting download_missing.sh..." rm download_missing.sh echo "Deleting vim.txt..." echo "SHA1 hash of vim.txt: $(sha1sum vim.txt | sed 's/\s.*//g')" rm vim.txt fi echo "Download end: $(date --utc)"