播客推荐

最近感觉播客不够听,大家来推荐一下自己喜欢的高质量播客吧!
我先来推荐几个:
时政类:不明白播客

影视类:没折腰 FM、反派影评

资讯简报类:No News Is Good News 今日无事发生、声动早咖啡

脱口秀类:谐星聊天会

其他:裸辞后的快乐生活

我来几个 BBC World Service 的 podcast:
latest five minute news bulletin
Business Matters
Global News Podcast

官方 BBC Sounds App 访问太慢,我写了一个脚本用 cron 定时运行,缓存下来(虽然他们用了 CloudFront,但每个节目的下载速度连在美国访问都只有几百 K/s):

#!/usr/bin/env python3
import dataclasses
import logging
import os
from collections import namedtuple
from pathlib import Path

import redis

r = redis.Redis(host='localhost', port=6379, db=0)

import yt_dlp

bbcroot = os.environ.get('BBCROOT')
if bbcroot is None:
    bbcroot = '/tmp/bbc_dl/'
    logging.warning(f'BBCROOT not set, using default: {bbcroot}')
saving_dir = Path(bbcroot)
saving_dir.mkdir(exist_ok=True)
import requests

UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0'

session = requests.Session()
from lxml import etree

# sometimes, when an episode has come too soon, it may says 'This episode will be available soon', so a second_audio_xpath is helpful
Program = namedtuple('Program', ['program_link', 'first_audio_xpath', 'second_audio_xpath', 'type'])


class Programs():
    items = []

    def __init__(self, *args):
        # automatically add to class attr when init
        Programs.items.append(Program(*args))

    @classmethod
    def list_objects(cls):
        return cls.items


@dataclasses.dataclass
class Link:
    url: str
    saving_path: str


class BBCPrograms(Programs):
    def __init__(self, *args):
        super().__init__(*args, 'bbc')


class BBCRegularPrograms(BBCPrograms):
    # or another viable URL+selector
    # BBCPrograms('https://www.bbc.co.uk/sounds/series/p02nq0gn',
    #                                   '/html/body/div[2]/div/div/div/div/div[2]/div/ul/li[1]/article/a',
    #                                   '/html/body/div[2]/div/div/div/div/div[2]/div/ul/li[2]/article/a')
    episodes_player_first_audio_xpath = '/html/body/div[6]/div/div/div[2]/div[4]/ol[1]/div[1]/div[2]/h2/a'
    episodes_player_second_audio_xpath = '/html/body/div[6]/div/div/div[2]/div[4]/ol[1]/div[2]/div[2]/h2/a'

    def __init__(self, id):
        super().__init__(f'https://www.bbc.co.uk/programmes/{id}/episodes/player',
                         BBCRegularPrograms.episodes_player_first_audio_xpath,
                         BBCRegularPrograms.episodes_player_second_audio_xpath)


class NPRPrograms(Programs):
    def __init__(self, *args):
        # NPR program does not have a second_audio_xpath
        super().__init__(*args, '', 'npr')


Five_Min_News = BBCRegularPrograms('p002vsmz')
Global_News_Podcast = BBCRegularPrograms('p02nq0gn')
Tech_Life = BBCRegularPrograms('p01plr2p')
Witness_History = BBCRegularPrograms('p004t1hd')
Business_Matters = BBCRegularPrograms('p016tl04')
Newsday = BBCRegularPrograms('p00w940j')
The_Newsroom = BBCRegularPrograms('p016tklr')
Newshour = BBCRegularPrograms('p002vsnk')
World_Business_Report = BBCRegularPrograms('p00fvhj7')
The_Real_Story = BBCRegularPrograms('p02dbd4m')

def printError(*args):
    CRED = '\033[91m'
    CEND = '\033[0m'
    print(CRED, *args, CEND, sep='')


links = []
for target in Programs.list_objects():
    page_html = session.get(target.program_link)
    tree = etree.HTML(page_html.text)
    title = tree.findtext('.//title')
    first_program_link = tree.xpath(target.first_audio_xpath)[0].attrib['href']
    second_program_link = tree.xpath(target.second_audio_xpath)[0].attrib['href']
    if not first_program_link.startswith('https://'):
        first_program_link = f'https://www.bbc.co.uk{first_program_link}'
    if not second_program_link.startswith('https://'):
        second_program_link = f'https://www.bbc.co.uk{second_program_link}'
    middle_title = title.split(' - ')[1].strip()
    links.append(Link(first_program_link, saving_dir / middle_title))
    links.append(Link(second_program_link, saving_dir / middle_title))


def dl_link(link):
    url = link.url
    saving_path = link.saving_path
    if (got := r.get(url)) is not None:
        got = got.decode()
        if got in ['downloaded', 'handling']:
            print(f'{url} has already been downloaded')
    else:
        r.set(url, 'handling')  # write-lock
        # direct yt-dlp call doesn't give you extra info
        # # os.system(f'cd ~/Music/bbc; proxychains yt-dlp "{latest_program_url}"')
        # p = Popen(shlex.split(f'proxychains yt-dlp "{latest_program_url}"'),
        #           cwd=saving_dir,
        #           stdout=PIPE, stderr=PIPE)
        # out, err = p.communicate()
        # if len(err) != 0:
        #   pass
        # else:
        #   pass # insert to db
        saving_path.mkdir(parents=True, exist_ok=True)
        ydl = yt_dlp.YoutubeDL({'paths': {'home': str(saving_path)}})
        try:
            info = ydl.extract_info(url)
            if info is not None:
                r.set(url, 'downloaded')
            else: # empty file
                printError(info)
                r.set(url, 'failed')
        except Exception as e:
            printError(e)
            r.set(url, 'failed')

import multiprocessing as mp

pool = mp.Pool(processes=6)
res = pool.map(dl_link, links)


for pod_subdir in saving_dir.glob('*'):
    if pod_subdir.is_dir():
        try:
            list_of_files = pod_subdir.glob('*')
            latest_path = max(list_of_files, key=lambda p: p.stat().st_ctime)
            sym_src_path = pod_subdir / latest_path.name
            sym_dst_path = saving_dir / f'{pod_subdir}{latest_path.suffix}'  # `suffix` itself has '.'
            # os.symlink(sym_src_path, sym_dst_path) # does not support overwrite
            os.system(f'ln -sf "{sym_src_path}" "{sym_dst_path}"')
        except Exception as e:
            printError(e)
    else:
        if not pod_subdir.is_symlink():
            printError(f'we should NOT have files in subdir: {pod_subdir}')
            # shutil.move(pod_subdir, saving_dir / pod_subdir.name.split(',')[0])

# delete old files
os.system(f'find {bbcroot} -mtime +7 -type f -delete')
# if complains about already downloaded while it's not, then delete the key in redis
1 Like

Politics related:
Analyse Asia with Bernard Leong
Economic Update
NCUSCR Interiews
David Harvey’s Anti-Capitalist Chronicles

Miscellaneous:
EconTalk
Hidden Brain
Merriam-Webster’s word of the Day
Nature Podcast
Philosophize This!
Sticky Notes:The Classical Music Podcast
响声播客
TED Talks Daily