Yogesh Bansal
Posted on October 4, 2024
import requests
import csv
import time
import pandas as pd
API_KEY = 'YOUR_API_KEY' # Replace with your actual YouTube Data API key
# List of search keywords related to travel
SEARCH_KEYWORDS = [
'travel vlog',
'travel guide',
'food travel',
'adventure travel',
'backpacking',
'cultural travel experiences',
'travel tips',
'travel destinations',
'travel blogger',
'world travel',
'wanderlust',
'exploring the world',
'travel channel',
'travel documentary',
'road trips'
]
# Base URL for YouTube Data API
BASE_URL = 'https://www.googleapis.com/youtube/v3'
def collect_travel_channels(api_key, search_keywords):
"""Collects travel-related YouTube channels based on search keywords."""
channels = []
channel_ids_set = set()
for keyword in search_keywords:
print(f"Searching for keyword: {keyword}")
params = {
'part': 'snippet',
'q': keyword,
'type': 'channel',
'maxResults': 50,
'key': api_key
}
response = requests.get(f"{BASE_URL}/search", params=params)
result = response.json()
if 'items' in result:
for item in result['items']:
channel_id = item['snippet']['channelId']
if channel_id not in channel_ids_set:
channel_ids_set.add(channel_id)
channels.append({
'channelId': channel_id,
'channelTitle': item['snippet']['channelTitle'],
'description': item['snippet']['description']
})
else:
print(f"Error in response: {result}")
# Pause to respect API rate limits
time.sleep(1)
# Save channels to CSV
channels_df = pd.DataFrame(channels)
channels_df.to_csv('travel_channels.csv', index=False, encoding='utf-8')
print("Finished collecting channel data.")
def collect_channel_videos(api_key):
"""Retrieves video lists for each channel."""
channels_df = pd.read_csv('travel_channels.csv', encoding='utf-8')
videos = []
for index, row in channels_df.iterrows():
channel_id = row['channelId']
channel_title = row['channelTitle']
print(f"Processing channel: {channel_title} (ID: {channel_id})")
# Get uploads playlist ID
uploads_playlist_id = get_uploads_playlist_id(api_key, channel_id)
if uploads_playlist_id:
# Get videos from playlist
channel_videos = get_videos_from_playlist(api_key, uploads_playlist_id)
for video in channel_videos:
videos.append({
'channelId': channel_id,
'channelTitle': channel_title,
'videoId': video['videoId'],
'videoTitle': video['videoTitle'],
'publishedAt': video['publishedAt']
})
else:
print(f"Skipping channel {channel_title} due to missing uploads playlist.")
# Pause between channels
time.sleep(1)
# Save videos to CSV
videos_df = pd.DataFrame(videos)
videos_df.to_csv('channel_videos.csv', index=False, encoding='utf-8')
print("Finished collecting video data.")
def get_uploads_playlist_id(api_key, channel_id):
"""Retrieves the uploads playlist ID for a given channel."""
params = {
'part': 'contentDetails',
'id': channel_id,
'key': api_key
}
response = requests.get(f"{BASE_URL}/channels", params=params)
result = response.json()
if 'items' in result and len(result['items']) > 0:
uploads_playlist_id = result['items'][0]['contentDetails']['relatedPlaylists']['uploads']
return uploads_playlist_id
else:
print(f"Could not get uploads playlist for channel ID: {channel_id}")
return None
def get_videos_from_playlist(api_key, playlist_id):
"""Retrieves all videos from a playlist."""
videos = []
params = {
'part': 'snippet,contentDetails',
'playlistId': playlist_id,
'maxResults': 50,
'key': api_key
}
while True:
response = requests.get(f"{BASE_URL}/playlistItems", params=params)
result = response.json()
if 'items' in result:
for item in result['items']:
video_id = item['contentDetails']['videoId']
video_title = item['snippet']['title']
published_at = item['contentDetails']['videoPublishedAt']
videos.append({
'videoId': video_id,
'videoTitle': video_title,
'publishedAt': published_at
})
if 'nextPageToken' in result:
params['pageToken'] = result['nextPageToken']
time.sleep(0.5)
else:
break
else:
print(f"Error retrieving videos: {result}")
break
return videos
def main():
collect_travel_channels(API_KEY, SEARCH_KEYWORDS)
collect_channel_videos(API_KEY)
print("Data collection complete.")
if __name__ == '__main__':
main()
💖 💪 🙅 🚩
Yogesh Bansal
Posted on October 4, 2024
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.
Related
webdev Industries Transformed by Information Systems: Education, Healthcare, News, Travel
April 17, 2023