最近感觉播客不够听,大家来推荐一下自己喜欢的高质量播客吧!
我先来推荐几个:
时政类:不明白播客
影视类:没折腰 FM、反派影评
资讯简报类:No News Is Good News 今日无事发生、声动早咖啡
脱口秀类:谐星聊天会
其他:裸辞后的快乐生活
最近感觉播客不够听,大家来推荐一下自己喜欢的高质量播客吧!
我先来推荐几个:
时政类:不明白播客
影视类:没折腰 FM、反派影评
资讯简报类:No News Is Good News 今日无事发生、声动早咖啡
脱口秀类:谐星聊天会
其他:裸辞后的快乐生活
我来几个 BBC World Service 的 podcast:
latest five minute news bulletin
Business Matters
Global News Podcast
官方 BBC Sounds App 访问太慢,我写了一个脚本用 cron 定时运行,缓存下来(虽然他们用了 CloudFront,但每个节目的下载速度连在美国访问都只有几百 K/s):
#!/usr/bin/env python3
import dataclasses
import logging
import os
from collections import namedtuple
from pathlib import Path
import redis
r = redis.Redis(host='localhost', port=6379, db=0)
import yt_dlp
bbcroot = os.environ.get('BBCROOT')
if bbcroot is None:
bbcroot = '/tmp/bbc_dl/'
logging.warning(f'BBCROOT not set, using default: {bbcroot}')
saving_dir = Path(bbcroot)
saving_dir.mkdir(exist_ok=True)
import requests
UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0'
session = requests.Session()
from lxml import etree
# sometimes, when an episode has come too soon, it may says 'This episode will be available soon', so a second_audio_xpath is helpful
Program = namedtuple('Program', ['program_link', 'first_audio_xpath', 'second_audio_xpath', 'type'])
class Programs():
items = []
def __init__(self, *args):
# automatically add to class attr when init
Programs.items.append(Program(*args))
@classmethod
def list_objects(cls):
return cls.items
@dataclasses.dataclass
class Link:
url: str
saving_path: str
class BBCPrograms(Programs):
def __init__(self, *args):
super().__init__(*args, 'bbc')
class BBCRegularPrograms(BBCPrograms):
# or another viable URL+selector
# BBCPrograms('https://www.bbc.co.uk/sounds/series/p02nq0gn',
# '/html/body/div[2]/div/div/div/div/div[2]/div/ul/li[1]/article/a',
# '/html/body/div[2]/div/div/div/div/div[2]/div/ul/li[2]/article/a')
episodes_player_first_audio_xpath = '/html/body/div[6]/div/div/div[2]/div[4]/ol[1]/div[1]/div[2]/h2/a'
episodes_player_second_audio_xpath = '/html/body/div[6]/div/div/div[2]/div[4]/ol[1]/div[2]/div[2]/h2/a'
def __init__(self, id):
super().__init__(f'https://www.bbc.co.uk/programmes/{id}/episodes/player',
BBCRegularPrograms.episodes_player_first_audio_xpath,
BBCRegularPrograms.episodes_player_second_audio_xpath)
class NPRPrograms(Programs):
def __init__(self, *args):
# NPR program does not have a second_audio_xpath
super().__init__(*args, '', 'npr')
Five_Min_News = BBCRegularPrograms('p002vsmz')
Global_News_Podcast = BBCRegularPrograms('p02nq0gn')
Tech_Life = BBCRegularPrograms('p01plr2p')
Witness_History = BBCRegularPrograms('p004t1hd')
Business_Matters = BBCRegularPrograms('p016tl04')
Newsday = BBCRegularPrograms('p00w940j')
The_Newsroom = BBCRegularPrograms('p016tklr')
Newshour = BBCRegularPrograms('p002vsnk')
World_Business_Report = BBCRegularPrograms('p00fvhj7')
The_Real_Story = BBCRegularPrograms('p02dbd4m')
def printError(*args):
CRED = '\033[91m'
CEND = '\033[0m'
print(CRED, *args, CEND, sep='')
links = []
for target in Programs.list_objects():
page_html = session.get(target.program_link)
tree = etree.HTML(page_html.text)
title = tree.findtext('.//title')
first_program_link = tree.xpath(target.first_audio_xpath)[0].attrib['href']
second_program_link = tree.xpath(target.second_audio_xpath)[0].attrib['href']
if not first_program_link.startswith('https://'):
first_program_link = f'https://www.bbc.co.uk{first_program_link}'
if not second_program_link.startswith('https://'):
second_program_link = f'https://www.bbc.co.uk{second_program_link}'
middle_title = title.split(' - ')[1].strip()
links.append(Link(first_program_link, saving_dir / middle_title))
links.append(Link(second_program_link, saving_dir / middle_title))
def dl_link(link):
url = link.url
saving_path = link.saving_path
if (got := r.get(url)) is not None:
got = got.decode()
if got in ['downloaded', 'handling']:
print(f'{url} has already been downloaded')
else:
r.set(url, 'handling') # write-lock
# direct yt-dlp call doesn't give you extra info
# # os.system(f'cd ~/Music/bbc; proxychains yt-dlp "{latest_program_url}"')
# p = Popen(shlex.split(f'proxychains yt-dlp "{latest_program_url}"'),
# cwd=saving_dir,
# stdout=PIPE, stderr=PIPE)
# out, err = p.communicate()
# if len(err) != 0:
# pass
# else:
# pass # insert to db
saving_path.mkdir(parents=True, exist_ok=True)
ydl = yt_dlp.YoutubeDL({'paths': {'home': str(saving_path)}})
try:
info = ydl.extract_info(url)
if info is not None:
r.set(url, 'downloaded')
else: # empty file
printError(info)
r.set(url, 'failed')
except Exception as e:
printError(e)
r.set(url, 'failed')
import multiprocessing as mp
pool = mp.Pool(processes=6)
res = pool.map(dl_link, links)
for pod_subdir in saving_dir.glob('*'):
if pod_subdir.is_dir():
try:
list_of_files = pod_subdir.glob('*')
latest_path = max(list_of_files, key=lambda p: p.stat().st_ctime)
sym_src_path = pod_subdir / latest_path.name
sym_dst_path = saving_dir / f'{pod_subdir}{latest_path.suffix}' # `suffix` itself has '.'
# os.symlink(sym_src_path, sym_dst_path) # does not support overwrite
os.system(f'ln -sf "{sym_src_path}" "{sym_dst_path}"')
except Exception as e:
printError(e)
else:
if not pod_subdir.is_symlink():
printError(f'we should NOT have files in subdir: {pod_subdir}')
# shutil.move(pod_subdir, saving_dir / pod_subdir.name.split(',')[0])
# delete old files
os.system(f'find {bbcroot} -mtime +7 -type f -delete')
# if complains about already downloaded while it's not, then delete the key in redis
Politics related:
Analyse Asia with Bernard Leong
Economic Update
NCUSCR Interiews
David Harvey’s Anti-Capitalist Chronicles
Miscellaneous:
EconTalk
Hidden Brain
Merriam-Webster’s word of the Day
Nature Podcast
Philosophize This!
Sticky Notes:The Classical Music Podcast
响声播客
TED Talks Daily