教程标题:《PHP打造多功能抖音视频抓取工具:从永久标识解析到视频下载与分析》
一、前言
抖音作为当前最流行的短视频平台,其视频分享机制与B站类似,都采用了永久标识符的设计理念。B站使用bvid,而抖音则使用aweme_id(作品ID)作为视频的唯一永久标识。通过这个标识,我们可以动态获取视频的最新播放地址,实现视频的抓取、下载和数据分析。
本教程将带你从零开始,使用PHP实现一个完整的抖音视频抓取工具,涵盖:
- 抖音视频永久标识解析
- 视频真实地址提取
- 视频下载与保存
- 视频元数据分析(点赞、评论、作者信息等)
二、抖音视频标识体系解析
2.1 抖音视频的永久标识
抖音视频的URL通常有以下几种格式:
https://www.douyin.com/video/7263456789012346123
https://www.douyin.com/note/7263456789012346123
https://v.douyin.com/xxxxxx/ (短链接)其中 7263456789012346123 就是aweme_id,即抖音视频的永久标识符。与B站的bvid类似,这个ID不会改变,但背后的视频资源地址会定期更新或根据网络环境变化。
2.2 抓取流程
用户输入(aweme_id或分享链接)
↓
解析获取aweme_id
↓
请求抖音API获取视频信息
↓
提取视频下载地址
↓
下载视频并保存
↓
分析视频数据三、环境准备
3.1 系统要求
- PHP 7.4+
- cURL扩展
- JSON扩展
- fileinfo扩展
- 足够的磁盘空间
3.2 依赖库
# 无需额外安装,使用PHP原生扩展即可
composer require guzzlehttp/guzzle # 可选,用于简化HTTP请求四、核心功能实现
4.1 抖音分享链接解析类
<?php
/**
* 抖音链接解析器
* 支持多种抖音链接格式,提取aweme_id
*/
class DouyinUrlParser
{
/**
* 从用户输入中提取aweme_id
* @param string $input URL或aweme_id
* @return string|null
*/
public static function extractAwemeId($input)
{
// 如果输入已经是纯数字ID,直接返回
if (preg_match('/^\d{19}$/', $input)) {
return $input;
}
// 处理短链接:https://v.douyin.com/xxxxxx/
if (preg_match('/v\.douyin\.com\/([a-zA-Z0-9]+)/', $input, $matches)) {
$shortCode = $matches[1];
return self::resolveShortUrl($shortCode);
}
// 处理标准链接:https://www.douyin.com/video/7263456789012346123
if (preg_match('/douyin\.com\/(?:video|note)\/(\d{19})/', $input, $matches)) {
return $matches[1];
}
return null;
}
/**
* 解析短链接获取真实aweme_id
* @param string $shortCode
* @return string|null
*/
private static function resolveShortUrl($shortCode)
{
$shortUrl = "https://v.douyin.com/{$shortCode}/";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $shortUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); // 不自动跳转,获取302 Location
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// 短链接会返回302重定向到真实页面
if ($httpCode == 302 || $httpCode == 301) {
if (preg_match('/Location: (.*?)\r?\n/', $response, $matches)) {
$location = trim($matches[1]);
if (preg_match('/(?:video|note)\/(\d{19})/', $location, $idMatches)) {
return $idMatches[1];
}
}
}
return null;
}
}4.2 抖音视频信息获取类
<?php
/**
* 抖音视频信息获取器
* 通过aweme_id获取视频详细信息
*/
class DouyinVideoFetcher
{
private $userAgent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1';
private $cookie = ''; // 可选的cookie,用于提高请求成功率
/**
* 设置User-Agent
*/
public function setUserAgent($ua)
{
$this->userAgent = $ua;
}
/**
* 获取视频详细信息
* @param string $awemeId
* @return array|null
*/
public function fetchVideoInfo($awemeId)
{
// 抖音移动端API接口
$apiUrl = "https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={$awemeId}";
$response = $this->httpGet($apiUrl);
if (!$response) {
return null;
}
$data = json_decode($response, true);
if (isset($data['item_list'][0])) {
return $this->parseVideoInfo($data['item_list'][0]);
}
return null;
}
/**
* 解析视频信息
* @param array $rawData
* @return array
*/
private function parseVideoInfo($rawData)
{
$video = $rawData['video'] ?? [];
$author = $rawData['author'] ?? [];
$statistics = $rawData['statistics'] ?? [];
// 提取不同清晰度的视频地址
$videoUrls = [];
if (isset($video['play_addr']['url_list'][0])) {
$videoUrls['normal'] = $video['play_addr']['url_list'][0];
}
if (isset($video['download_addr']['url_list'][0])) {
$videoUrls['download'] = $video['download_addr']['url_list'][0];
}
if (isset($video['bit_rate'])) {
foreach ($video['bit_rate'] as $bitRate) {
if (isset($bitRate['play_addr']['url_list'][0])) {
$quality = $bitRate['quality_type'] ?? 'unknown';
$videoUrls["quality_{$quality}"] = $bitRate['play_addr']['url_list'][0];
}
}
}
return [
'aweme_id' => $rawData['aweme_id'],
'desc' => $rawData['desc'] ?? '',
'create_time' => date('Y-m-d H:i:s', $rawData['create_time'] ?? 0),
'duration' => ($video['duration'] ?? 0) / 1000, // 转换为秒
'cover' => $video['cover']['url_list'][0] ?? '',
'video_urls' => $videoUrls,
'author' => [
'uid' => $author['uid'] ?? '',
'nickname' => $author['nickname'] ?? '',
'signature' => $author['signature'] ?? '',
'avatar' => $author['avatar_thumb']['url_list'][0] ?? '',
'follower_count' => $author['follower_count'] ?? 0,
],
'statistics' => [
'digg_count' => $statistics['digg_count'] ?? 0, // 点赞数
'comment_count' => $statistics['comment_count'] ?? 0, // 评论数
'share_count' => $statistics['share_count'] ?? 0, // 分享数
'play_count' => $statistics['play_count'] ?? 0, // 播放数
],
'music' => [
'title' => $video['music']['title'] ?? '',
'author' => $video['music']['author'] ?? '',
'cover' => $video['music']['cover_large']['url_list'][0] ?? '',
]
];
}
/**
* HTTP GET请求
* @param string $url
* @return string|false
*/
private function httpGet($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Accept: application/json',
'Accept-Language: zh-CN,zh;q=0.9',
'Referer: https://www.douyin.com/',
]);
if (!empty($this->cookie)) {
curl_setopt($ch, CURLOPT_COOKIE, $this->cookie);
}
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode == 200) {
return $response;
}
return false;
}
}4.3 视频下载器
<?php
/**
* 抖音视频下载器
* 支持断点续传、进度显示
*/
class DouyinVideoDownloader
{
private $savePath = './downloads/';
private $chunkSize = 1024 * 1024; // 1MB per chunk
/**
* 构造函数
* @param string $savePath 保存路径
*/
public function __construct($savePath = null)
{
if ($savePath) {
$this->savePath = rtrim($savePath, '/') . '/';
}
// 创建保存目录
if (!is_dir($this->savePath)) {
mkdir($this->savePath, 0755, true);
}
}
/**
* 下载视频
* @param string $url 视频地址
* @param string $filename 文件名
* @param callable $progressCallback 进度回调函数
* @return string|false 返回保存的文件路径
*/
public function download($url, $filename, $progressCallback = null)
{
$filePath = $this->savePath . $filename;
// 检查文件是否已存在
if (file_exists($filePath)) {
// 可以添加断点续传逻辑
return $filePath;
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
// 打开文件写入
$fp = fopen($filePath, 'wb');
curl_setopt($ch, CURLOPT_FILE, $fp);
// 进度回调
if ($progressCallback && is_callable($progressCallback)) {
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function($resource, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($progressCallback) {
if ($downloadSize > 0) {
$progress = ($downloaded / $downloadSize) * 100;
call_user_func($progressCallback, $progress, $downloaded, $downloadSize);
}
});
}
$result = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
fclose($fp);
if ($httpCode == 200 && $result !== false) {
return $filePath;
}
// 下载失败,删除不完整的文件
if (file_exists($filePath)) {
unlink($filePath);
}
return false;
}
/**
* 多线程下载(使用curl_multi)
* @param array $urls 多个视频地址
* @return array 下载结果
*/
public function multiDownload($urls)
{
$results = [];
$mh = curl_multi_init();
$handles = [];
foreach ($urls as $key => $item) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $item['url']);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
curl_multi_add_handle($mh, $ch);
$handles[$key] = [
'ch' => $ch,
'filename' => $item['filename']
];
}
// 执行多线程下载
$running = null;
do {
curl_multi_exec($mh, $running);
curl_multi_select($mh);
} while ($running > 0);
// 获取结果
foreach ($handles as $key => $handle) {
$content = curl_multi_getcontent($handle['ch']);
$httpCode = curl_getinfo($handle['ch'], CURLINFO_HTTP_CODE);
if ($httpCode == 200 && $content) {
$filePath = $this->savePath . $handle['filename'];
file_put_contents($filePath, $content);
$results[$key] = $filePath;
} else {
$results[$key] = false;
}
curl_multi_remove_handle($mh, $handle['ch']);
curl_close($handle['ch']);
}
curl_multi_close($mh);
return $results;
}
}4.4 视频分析器
<?php
/**
* 抖音视频分析器
* 提取视频元数据、生成分析报告
*/
class DouyinVideoAnalyzer
{
/**
* 分析视频信息,生成统计报告
* @param array $videoInfo
* @return array
*/
public function analyze($videoInfo)
{
$analysis = [
'basic_info' => [
'aweme_id' => $videoInfo['aweme_id'],
'description' => $videoInfo['desc'],
'duration' => $videoInfo['duration'],
'create_time' => $videoInfo['create_time'],
],
'performance' => [
'engagement_rate' => $this->calculateEngagementRate($videoInfo['statistics']),
'popularity_score' => $this->calculatePopularityScore($videoInfo['statistics']),
],
'author_insights' => [
'nickname' => $videoInfo['author']['nickname'],
'follower_count' => $videoInfo['author']['follower_count'],
'content_style' => $this->analyzeContentStyle($videoInfo['desc']),
],
'recommendations' => [
'best_post_time' => $this->suggestPostTime($videoInfo['create_time']),
'video_quality' => $this->evaluateVideoQuality($videoInfo),
]
];
return $analysis;
}
/**
* 计算互动率
*/
private function calculateEngagementRate($statistics)
{
$total = $statistics['digg_count'] + $statistics['comment_count'] + $statistics['share_count'];
$views = $statistics['play_count'];
if ($views > 0) {
return round(($total / $views) * 100, 2);
}
return 0;
}
/**
* 计算热度分数(0-100)
*/
private function calculatePopularityScore($statistics)
{
$score = 0;
// 点赞权重 0.4
$score += min($statistics['digg_count'] / 10000, 40);
// 评论权重 0.3
$score += min($statistics['comment_count'] / 500, 30);
// 分享权重 0.3
$score += min($statistics['share_count'] / 500, 30);
return min($score, 100);
}
/**
* 分析内容风格(基于文案关键词)
*/
private function analyzeContentStyle($description)
{
$styles = [
'搞笑' => ['搞笑', '幽默', '段子', '哈哈'],
'美食' => ['美食', '吃货', '好吃', '烹饪'],
'旅行' => ['旅行', '风景', '旅游', '打卡'],
'知识' => ['科普', '知识', '教学', '干货'],
'音乐' => ['音乐', '唱歌', '翻唱', '演奏'],
'舞蹈' => ['舞蹈', '跳舞', '舞姿'],
'宠物' => ['宠物', '猫', '狗', '萌宠'],
'美妆' => ['美妆', '化妆', '护肤'],
];
$matchedStyles = [];
foreach ($styles as $style => $keywords) {
foreach ($keywords as $keyword) {
if (stripos($description, $keyword) !== false) {
$matchedStyles[] = $style;
break;
}
}
}
return empty($matchedStyles) ? ['其他'] : $matchedStyles;
}
/**
* 建议最佳发布时间
*/
private function suggestPostTime($createTime)
{
// 根据历史数据,抖音用户活跃高峰通常在:
// 工作日:12:00-13:00, 18:00-23:00
// 周末:10:00-23:00
return [
'weekday' => ['12:00-13:00', '18:00-23:00'],
'weekend' => ['10:00-23:00']
];
}
/**
* 评估视频质量
*/
private function evaluateVideoQuality($videoInfo)
{
$quality = 'good';
$suggestions = [];
// 检查是否有高清视频源
$hasHighQuality = false;
foreach ($videoInfo['video_urls'] as $key => $url) {
if (strpos($key, 'quality') !== false) {
$hasHighQuality = true;
break;
}
}
if (!$hasHighQuality) {
$quality = 'medium';
$suggestions[] = '该视频没有高清源,建议上传更高清的视频';
}
// 检查视频时长
if ($videoInfo['duration'] < 7) {
$suggestions[] = '视频时长较短(' . $videoInfo['duration'] . '秒),建议制作7-15秒的视频以获得更好效果';
} elseif ($videoInfo['duration'] > 60) {
$suggestions[] = '视频时长较长,抖音用户偏好短视频,建议控制在60秒以内';
}
return [
'rating' => $quality,
'suggestions' => $suggestions
];
}
}4.5 主程序入口
<?php
/**
* 抖音视频抓取工具 - 主程序
* 整合所有功能,提供完整的工作流
*/
require_once 'DouyinUrlParser.php';
require_once 'DouyinVideoFetcher.php';
require_once 'DouyinVideoDownloader.php';
require_once 'DouyinVideoAnalyzer.php';
class DouyinVideoTool
{
private $fetcher;
private $downloader;
private $analyzer;
public function __construct($downloadPath = null)
{
$this->fetcher = new DouyinVideoFetcher();
$this->downloader = new DouyinVideoDownloader($downloadPath);
$this->analyzer = new DouyinVideoAnalyzer();
}
/**
* 完整处理流程:解析->获取信息->下载->分析
* @param string $input 用户输入(URL或aweme_id)
* @return array 处理结果
*/
public function process($input)
{
$result = [
'success' => false,
'aweme_id' => null,
'video_info' => null,
'download_path' => null,
'analysis' => null,
'error' => null
];
// 1. 解析aweme_id
$awemeId = DouyinUrlParser::extractAwemeId($input);
if (!$awemeId) {
$result['error'] = '无法解析aweme_id,请检查输入格式';
return $result;
}
$result['aweme_id'] = $awemeId;
// 2. 获取视频信息
$videoInfo = $this->fetcher->fetchVideoInfo($awemeId);
if (!$videoInfo) {
$result['error'] = '获取视频信息失败,请检查网络或aweme_id是否正确';
return $result;
}
$result['video_info'] = $videoInfo;
// 3. 下载视频(优先使用download地址)
$downloadUrl = $videoInfo['video_urls']['download'] ??
$videoInfo['video_urls']['normal'] ??
null;
if ($downloadUrl) {
$filename = $awemeId . '_' . date('YmdHis') . '.mp4';
echo "开始下载视频...\n";
$downloadPath = $this->downloader->download($downloadUrl, $filename, function($progress, $downloaded, $total) {
echo "\r下载进度: " . round($progress, 2) . "% (" .
$this->formatBytes($downloaded) . "/" .
$this->formatBytes($total) . ")";
});
if ($downloadPath) {
$result['download_path'] = $downloadPath;
echo "\n下载完成!保存路径: {$downloadPath}\n";
} else {
$result['error'] = '视频下载失败';
}
} else {
$result['error'] = '未找到可用的视频下载地址';
}
// 4. 视频分析
if ($videoInfo) {
$result['analysis'] = $this->analyzer->analyze($videoInfo);
}
$result['success'] = true;
return $result;
}
/**
* 仅获取视频信息(不下载)
*/
public function getInfo($input)
{
$awemeId = DouyinUrlParser::extractAwemeId($input);
if (!$awemeId) {
return null;
}
return $this->fetcher->fetchVideoInfo($awemeId);
}
/**
* 格式化字节数
*/
private function formatBytes($bytes, $precision = 2)
{
$units = ['B', 'KB', 'MB', 'GB', 'TB'];
$bytes = max($bytes, 0);
$pow = floor(($bytes ? log($bytes) : 0) / log(1024));
$pow = min($pow, count($units) - 1);
$bytes /= pow(1024, $pow);
return round($bytes, $precision) . ' ' . $units[$pow];
}
}
// 使用示例
if (php_sapi_name() === 'cli') {
// 命令行模式
if ($argc < 2) {
echo "使用方法: php douyin_tool.php <抖音视频链接或aweme_id>\n";
exit(1);
}
$input = $argv[1];
$tool = new DouyinVideoTool('./videos/');
$result = $tool->process($input);
if ($result['success']) {
echo "\n========== 处理结果 ==========\n";
echo "aweme_id: {$result['aweme_id']}\n";
echo "视频描述: {$result['video_info']['desc']}\n";
echo "作者: {$result['video_info']['author']['nickname']}\n";
echo "点赞数: {$result['video_info']['statistics']['digg_count']}\n";
echo "评论数: {$result['video_info']['statistics']['comment_count']}\n";
echo "分享数: {$result['video_info']['statistics']['share_count']}\n";
echo "播放数: {$result['video_info']['statistics']['play_count']}\n";
if ($result['analysis']) {
echo "\n========== 数据分析 ==========\n";
echo "互动率: {$result['analysis']['performance']['engagement_rate']}%\n";
echo "热度分数: {$result['analysis']['performance']['popularity_score']}\n";
echo "内容风格: " . implode(', ', $result['analysis']['author_insights']['content_style']) . "\n";
}
if ($result['download_path']) {
echo "\n视频已保存至: {$result['download_path']}\n";
}
} else {
echo "错误: {$result['error']}\n";
}
}五、高级功能扩展
5.1 批量抓取多个视频
<?php
/**
* 批量抓取工具
*/
class DouyinBatchProcessor
{
private $tool;
public function __construct($downloadPath = null)
{
$this->tool = new DouyinVideoTool($downloadPath);
}
/**
* 批量处理多个视频
* @param array $inputs 多个URL或aweme_id
* @param bool $parallel 是否并行处理
* @return array
*/
public function batchProcess($inputs, $parallel = false)
{
if ($parallel) {
return $this->parallelProcess($inputs);
}
$results = [];
foreach ($inputs as $index => $input) {
echo "处理第 " . ($index + 1) . "/" . count($inputs) . " 个视频: {$input}\n";
$results[$input] = $this->tool->process($input);
echo "----------------------------------------\n";
}
return $results;
}
/**
* 并行处理(使用pcntl_fork)
*/
private function parallelProcess($inputs)
{
$pids = [];
$results = [];
foreach ($inputs as $input) {
$pid = pcntl_fork();
if ($pid == -1) {
die("fork失败");
} elseif ($pid) {
// 父进程
$pids[] = $pid;
} else {
// 子进程
$result = $this->tool->process($input);
$outputFile = "/tmp/douyin_result_{$input}.json";
file_put_contents($outputFile, json_encode($result));
exit(0);
}
}
// 等待所有子进程完成
foreach ($pids as $pid) {
pcntl_waitpid($pid, $status);
}
// 收集结果
foreach ($inputs as $input) {
$outputFile = "/tmp/douyin_result_{$input}.json";
if (file_exists($outputFile)) {
$results[$input] = json_decode(file_get_contents($outputFile), true);
unlink($outputFile);
}
}
return $results;
}
}5.2 缓存机制
<?php
/**
* 带缓存的视频获取器
*/
class CachedDouyinVideoFetcher extends DouyinVideoFetcher
{
private $cacheDir = './cache/';
private $cacheTTL = 3600; // 缓存1小时
public function __construct($cacheDir = null, $cacheTTL = 3600)
{
if ($cacheDir) {
$this->cacheDir = rtrim($cacheDir, '/') . '/';
}
if (!is_dir($this->cacheDir)) {
mkdir($this->cacheDir, 0755, true);
}
$this->cacheTTL = $cacheTTL;
}
/**
* 获取视频信息(带缓存)
*/
public function fetchVideoInfo($awemeId)
{
$cacheFile = $this->cacheDir . $awemeId . '.json';
// 检查缓存是否有效
if (file_exists($cacheFile) && (time() - filemtime($cacheFile)) < $this->cacheTTL) {
$cached = json_decode(file_get_contents($cacheFile), true);
if ($cached) {
return $cached;
}
}
// 缓存失效,重新获取
$videoInfo = parent::fetchVideoInfo($awemeId);
if ($videoInfo) {
file_put_contents($cacheFile, json_encode($videoInfo));
}
return $videoInfo;
}
/**
* 清除缓存
*/
public function clearCache($awemeId = null)
{
if ($awemeId) {
$cacheFile = $this->cacheDir . $awemeId . '.json';
if (file_exists($cacheFile)) {
unlink($cacheFile);
}
} else {
$files = glob($this->cacheDir . '*.json');
foreach ($files as $file) {
unlink($file);
}
}
}
}六、常见问题与解决方案
6.1 反爬虫机制处理
抖音对爬虫有一定的防护措施,常见解决方案:
/**
* 代理IP池
*/
class ProxyPool
{
private $proxies = [];
public function addProxy($proxy)
{
$this->proxies[] = $proxy;
}
public function getRandomProxy()
{
if (empty($this->proxies)) {
return null;
}
return $this->proxies[array_rand($this->proxies)];
}
public function curlWithProxy($url, $proxy = null)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
if ($proxy) {
curl_setopt($ch, CURLOPT_PROXY, $proxy);
}
// 随机User-Agent
$userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15',
];
curl_setopt($ch, CURLOPT_USERAGENT, $userAgents[array_rand($userAgents)]);
$response = curl_exec($ch);
curl_close($ch);
return $response;
}
}6.2 请求频率限制
/**
* 请求速率限制器
*/
class RateLimiter
{
private $requests = [];
private $maxRequests = 10; // 最大请求数
private $timeWindow = 60; // 时间窗口(秒)
public function __construct($maxRequests = 10, $timeWindow = 60)
{
$this->maxRequests = $maxRequests;
$this->timeWindow = $timeWindow;
}
public function canMakeRequest()
{
$now = time();
// 清理过期的请求记录
$this->requests = array_filter($this->requests, function($timestamp) use ($now) {
return ($now - $timestamp) < $this->timeWindow;
});
if (count($this->requests) >= $this->maxRequests) {
$oldest = min($this->requests);
$waitTime = $this->timeWindow - ($now - $oldest);
return ['allowed' => false, 'wait' => $waitTime];
}
$this->requests[] = $now;
return ['allowed' => true, 'wait' => 0];
}
public function waitIfNeeded()
{
$result = $this->canMakeRequest();
if (!$result['allowed']) {
sleep($result['wait']);
}
}
}七、部署与优化建议
7.1 性能优化
- 使用Redis缓存:将高频访问的视频信息存入Redis
- 异步下载:使用消息队列(如RabbitMQ)处理下载任务
- CDN加速:下载的视频上传到对象存储(OSS),提供CDN访问
7.2 安全建议
- 输入验证:严格验证用户输入的URL和aweme_id
- 文件隔离:下载的视频放在隔离目录,防止代码执行
- 速率限制:对每个IP进行请求频率限制
- 日志记录:记录所有请求和错误,便于追踪
7.3 部署示例
# 项目结构
douyin-video-tool/
├── index.php # Web接口入口
├── cli.php # 命令行入口
├── config/
│ └── config.php # 配置文件
├── src/
│ ├── DouyinUrlParser.php
│ ├── DouyinVideoFetcher.php
│ ├── DouyinVideoDownloader.php
│ ├── DouyinVideoAnalyzer.php
│ └── DouyinVideoTool.php
├── downloads/ # 下载目录
├── cache/ # 缓存目录
├── logs/ # 日志目录
└── vendor/ # Composer依赖八、总结
本教程完整实现了一个PHP抖音视频抓取工具,主要特点:
- 永久标识解析:类似B站的
bvid,通过aweme_id实现视频的永久定位 - 动态地址获取:每次请求都能获取最新的视频播放地址
- 多清晰度支持:自动提取不同清晰度的视频源
- 完整数据分析:不仅下载视频,还分析互动率、热度等关键指标
- 扩展性强:支持缓存、代理、批量处理等高级功能
通过本教程,你可以:
- 理解抖音视频标识体系
- 掌握PHP抓取动态网页的技术
- 学会处理反爬虫机制
- 实现完整的视频下载和分析工具
注意事项:
-
终于有一篇能把抖音抓取讲清楚的中文教程了。之前看英文文档头都大了。作者不仅讲怎么做,还讲为什么这么做,非常适合学习。收藏加转发。
作为老PHPER,这篇教程的质量很高。从URL解析到视频保存,每一步都很扎实。但抖音的接口经常会换,建议把API请求部分做成可配置的,方便更新。
教程里提到了B站bvid和抖音aweme_id的类比,这个思路很好。如果能对比两个平台的API设计差异,会对架构设计有很大启发。
代码风格值得学习,变量命名规范,逻辑清晰。不过有些地方可以用PHP 8的新特性优化一下,比如match表达式和命名参数,代码会更简洁。
读完这篇教程,我对抖音的数据结构有了更深的理解。aweme_id、sec_uid、uid这些字段的关系搞清楚了。建议再加个用户主页视频批量抓取的功能。