見出し画像

【Python】Youtubeの動画からコメントを取得する方法【YoutubeDataAPI】

取得した手順動画はこちら


コード

from api import variable
import requests
import pandas as pd

comments = []

def get_comment(channel_id, video_id, pageToken):
    comment_url = "https://www.googleapis.com/youtube/v3/commentThreads"
    if video_id is None:
        # チャンネルからコメント取得
        param = {
            "key": variable.youtube_api_key,
            "allThreadsRelatedToChannelId": channel_id,
            "part": "replies, snippet",  
            "maxResults": "100",
            "moderationStatus":"published", # likelySpam, heldForReview 
            "order":"time", # relevance 
            "searchTerms" : "", # 指定した検索キーワードを含むコメントのみを含むように API レスポンスを制限するよう API に指示します。
            "textFormat": "html" # html がデフォルト or plainText
            }
    else:
        param = {
            "key": variable.youtube_api_key,
            "videoId": video_id,
            "part": "replies, snippet",  
            "maxResults": "100",
        }
    if pageToken:
        param["pageToken"] = pageToken

    req = requests.get(comment_url, params=param)
    return req.json()

channel_id = ""
# https://www.youtube.com/channel/UCpMPdgKNlydow5ZUsbyhjzg
# https://www.youtube.com/watch?v=33mwRxwP5ZE
video_id = ""

pageToken = ""
while pageToken != None:
    req = get_comment(channel_id, video_id, pageToken)
    for comment_thread in req["items"]:
        snippet = comment_thread["snippet"]
        topLevelComment_snippet = snippet["topLevelComment"]["snippet"]
        video_id = snippet["videoId"]
        textDisplay = topLevelComment_snippet["textDisplay"]
        author_name = topLevelComment_snippet["authorDisplayName"]
        authorChannelUrl = topLevelComment_snippet["authorProfileImageUrl"]
        author_channel = topLevelComment_snippet["authorChannelId"]["value"]
        publishedAt = topLevelComment_snippet["publishedAt"]
        comments.append((video_id, textDisplay, author_name, authorChannelUrl, author_channel, publishedAt))
        if "replies" in comment_thread and "comments" in comment_thread["replies"]:
            for replies in comment_thread["replies"]["comments"]:
                author_name = replies["snippet"]["authorDisplayName"]
                textDisplay = replies["snippet"]["textDisplay"]
                authorChannelUrl = replies["snippet"]["authorProfileImageUrl"]
                author_channel = replies["snippet"]["authorChannelId"]["value"]
                publishedAt = replies["snippet"]["publishedAt"]
                comments.append((video_id, textDisplay, author_name, authorChannelUrl, author_channel, publishedAt))
    if "nextPageToken" in req:
        pageToken = req["nextPageToken"]
    else:
        pageToken = None


comment_df = pd.DataFrame(comments, columns=["video_id", "textDisplay", "author_name", "authorChannelUrl", "channel_id", "publishedAt"])
comment_df.to_csv('./sample.csv')

この記事が気に入ったらサポートをしてみませんか?