基于新版FFmpeg（FFmpeg 6.1）的音視頻復用（不涉及編解碼）

這篇具有很好參考價值的文章主要介紹了基于新版FFmpeg（FFmpeg 6.1）的音視頻復用（不涉及編解碼）。希望對大家有所幫助。如果存在錯誤或未考慮完全的地方，請大家不吝賜教，您也可以點擊"舉報違法"按鈕提交疑問。

1 項目中使用的FFmpeg函數(shù)介紹

FFmpeg庫常用函數(shù)介紹（一）-CSDN博客

FFmpeg庫常用函數(shù)介紹（二）-CSDN博客

FFmpeg庫常用函數(shù)介紹（三）-CSDN博客

2 介紹

這篇文章介紹的是基于新版FFmpeg（FFmpeg 6.1）的音視頻復用器的實現(xiàn)，可以實現(xiàn)音頻和視頻文件復用為一個視頻文件，具體功能如下表所示。

輸入視頻文件	輸入音頻文件	輸出視頻文件
input.h264	input.aac	output.mp4 (avi、mkv、wmv等)
input.h264	input.mp3
input.mp4	input.mp3
input.mp4	input.aac
input.mp4	input.mp4
…等等…

3 代碼邏輯

根據(jù)輸出文件的格式選擇是否開啟比特流過濾器（AAC_ADTS_TO_ASC和H264_AVCC_TO_ANNEXB宏）。例如，輸出格式為avi，就需要開啟H264_AVCC_TO_ANNEXB（置為1）；
打開輸入音視頻文件，創(chuàng)建并初始化輸入AVFormatContext，創(chuàng)建輸出AVFormatContext；
根據(jù)輸入視頻文件的視頻流創(chuàng)建輸出文件的視頻流，拷貝編解碼器參數(shù)；
根據(jù)輸入音頻文件的音頻流創(chuàng)建輸出文件的音頻流，拷貝編解碼器參數(shù)；
打開輸出文件，寫入文件頭；
根據(jù)過濾器的開啟情況創(chuàng)建并初始化對應比特流過濾器；
根據(jù)av_compare_ts的輸出判斷先讀取音頻還是視頻文件，然后讀取幀；
時間戳轉換、送入過濾器過濾、交錯寫入；
所有幀寫完后寫入文件尾；

4 問題匯總

4.1 沒有pts

有的碼流沒有pts，例如原始的H.264碼流，因此需要自己手動設置pts。pts是以輸入流時間基表示的ffmpeg內(nèi)部時間。以輸入流時間基表示的意思是有幾個輸入流時間基。ffmpeg內(nèi)部時間是AV_TIME_BASE (1000000)，換算關系是1s = 1000000。

計算過程是首先計算出ffmpeg內(nèi)部時間表示的兩幀之間的間隔：

int frame_duration = AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);

1 / av_q2d(in_stream->r_frame_rate)表示的是以秒表示的間隔，AV_TIME_BASE / av_q2d(in_stream->r_frame_rate)表示的是ffmpeg內(nèi)部時間表示的間隔。

接著就是算出真正的pts，也就是以輸入流時間基表示的ffmpeg內(nèi)部時間。

pkt.pts = frame_index * frame_duration / (av_q2d(in_stream->time_base) * AV_TIME_BASE);

frame_index * frame_duration表示當前幀以ffmpeg內(nèi)部時間表示的顯示時間。(av_q2d(in_stream->time_base) * AV_TIME_BASE)表示輸入流時間基以ffmpeg內(nèi)部時間表示的結果。二者相除表示以輸入流時間基表示的ffmpeg內(nèi)部時間，也就是真正的pts。

4.2 二倍速問題

寫完代碼后，使用沒有pts的碼流進行測試，發(fā)現(xiàn)畫面變成了二倍速，并且視頻長度也減半了，猜測是pts的設置有問題。最終將pts乘以2解決了問題，但是目前還不知道原理是什么。

//解決2倍速問題...

pkt.pts *= 2;

4.3 packet里的stream_index的設置

輸出文件的音視頻流來自不同的文件，因此packet中流的索引與輸出文件中流的索引可能不匹配，可能出現(xiàn)packet中音頻幀和視頻幀所對應的stream_index是一樣的的情況。因此將packet中的音頻或視頻幀與輸出流的音視頻流的索引匹配上。

pkt.stream_index = out_stream->index;

5 代碼

#include <stdio.h>
extern "C"
{
    #include "libavformat/avformat.h"
    #include "libavcodec/bsf.h"
};

#define AAC_ADTS_TO_ASC 0
#define H264_AVCC_TO_ANNEXB 0

void release_context(AVFormatContext *in_fmt_ctx1, AVFormatContext *in_fmt_ctx2, AVFormatContext *out_fmt_ctx, AVPacket *pkt1, AVPacket *pkt2,
AVBSFContext *bsf_ctx1, AVBSFContext *bsf_ctx2)
{
    if (out_fmt_ctx && !((out_fmt_ctx->oformat->flags) & AVFMT_NOFILE))
    {
        avio_close(out_fmt_ctx->pb);
    }

    avformat_close_input(&in_fmt_ctx1);
    avformat_close_input(&in_fmt_ctx2);
    avformat_free_context(out_fmt_ctx);
    av_packet_unref(pkt1);
    av_packet_unref(pkt2);
    av_bsf_free(&bsf_ctx1);
    av_bsf_free(&bsf_ctx2);
}

int main(int argc, char *argv[])
{
    if (argc < 4)
    {
        printf("argument error, caller should pass 3 filenames as arguments, for example, \"./main input_video.h264 input_audio.aac output_video.mp4\"\n");
        return -1;
    }

    const char *in_filename_video = argv[1];
    const char *in_filename_audio = argv[2];
    const char *out_filename_video = argv[3];

    AVFormatContext *in_fmt_ctx_video = NULL, *in_fmt_ctx_audio = NULL, *out_fmt_ctx = NULL;
    AVPacket pkt, pkt_filtered;
    memset(&pkt, 0, sizeof(pkt));
    memset(&pkt_filtered, 0, sizeof(pkt_filtered));
    AVBSFContext *bsf_ctx_video = NULL, *bsf_ctx_audio = NULL;

    int64_t ts_video = 0, ts_audio = 0;
    AVRational time_base_in_video, time_base_in_audio;

    int in_video_index = -1, in_audio_index = -1, out_video_index = -1, out_audio_index = -1;

    //打開輸入視頻，初始化AVFormatContext
    if (avformat_open_input(&in_fmt_ctx_video, in_filename_video, NULL, NULL) < 0)
    {
        printf("failed to open input video file\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }
    //尋找輸入視頻流信息
    if (avformat_find_stream_info(in_fmt_ctx_video, NULL) < 0)
    {
        printf("failed to find input video stream info\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }

    //格式化輸出輸入流信息
    av_dump_format(in_fmt_ctx_video, 0, in_filename_video, 0);

    //打開輸入音頻，初始化AVFormatContext
    if (avformat_open_input(&in_fmt_ctx_audio, in_filename_audio, NULL, NULL) < 0)
    {
        printf("failed to open input audio file\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }
    //尋找輸入音頻流信息
    if (avformat_find_stream_info(in_fmt_ctx_audio, NULL) < 0)
    {
        printf("failed to find input audio stream info\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }

    //格式化輸出輸入流信息
    av_dump_format(in_fmt_ctx_audio, 0, in_filename_audio, 0);

    //分配輸出AVFormatContext
    if (avformat_alloc_output_context2(&out_fmt_ctx, NULL, NULL, out_filename_video) < 0)
    {
        printf("failed to alloc output AVFormatContext\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }

    //遍歷輸入視頻，尋找視頻流，為輸出AVFormatContext創(chuàng)建新流，拷貝編解碼器參數(shù)
    for (int i = 0; i < in_fmt_ctx_video->nb_streams; i++)
    {
        if (in_fmt_ctx_video->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            AVStream *in_stream = in_fmt_ctx_video->streams[i];
            time_base_in_video = in_stream->time_base;
            //創(chuàng)建流
            AVStream *out_stream = avformat_new_stream(out_fmt_ctx, NULL);
            if (!out_stream)
            {
                printf("failed to create new stream for output AVFormatContext\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }
            //拷貝編解碼器參數(shù)
            if (avcodec_parameters_copy(out_stream->codecpar, in_stream->codecpar) < 0)
            {
                printf("failed to copy codec parameters form input video to output video\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }
            //自動選擇符合輸出格式的碼流類型
            out_stream->codecpar->codec_tag = 0;
            //輸入視頻流索引
            in_video_index = i;
            //輸出視頻流索引
            out_video_index = 0;
            break;
        }
    }

    //遍歷輸入音頻，尋找音頻流，為輸出AVFormatContext創(chuàng)建新流，拷貝編解碼器參數(shù)
    for (int i = 0; i < in_fmt_ctx_audio->nb_streams; i++)
    {
        if (in_fmt_ctx_audio->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            AVStream *in_stream = in_fmt_ctx_audio->streams[i];
            time_base_in_audio = in_stream->time_base;
            //創(chuàng)建流
            AVStream *out_stream = avformat_new_stream(out_fmt_ctx, NULL);
            if (!out_stream)
            {
                printf("failed to create new stream for output AVFormatContext\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }
            //拷貝編解碼器參數(shù)
            if (avcodec_parameters_copy(out_stream->codecpar, in_stream->codecpar) < 0)
            {
                printf("failed to copy codec parameters form input audio to output video\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }
            //自動選擇符合輸出格式的碼流類型
            out_stream->codecpar->codec_tag = 0;
            //輸入音頻流索引
            in_audio_index = i;
            //輸出音頻流索引
            out_audio_index = 1;
            break;
        }
    }

    //格式化輸出輸出音視頻流信息
    av_dump_format(out_fmt_ctx, 0, out_filename_video, 1);

    //打開輸出文件
    if (!(out_fmt_ctx->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open2(&out_fmt_ctx->pb, out_filename_video, AVIO_FLAG_WRITE, NULL, NULL) < 0)
        {
            printf("failed to open output file\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
    }

    //寫入文件頭
    if (avformat_write_header(out_fmt_ctx, NULL) < 0)
    {
        printf("failed to write header to the output file\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }

    #if AAC_ADTS_TO_ASC
        //獲取比特流過濾器
        const AVBitStreamFilter *bsf_audio = av_bsf_get_by_name("aac_adtstoasc");
    #endif

    #if H264_AVCC_TO_ANNEXB
        //獲取比特流過濾器
        const AVBitStreamFilter *bsf_video = av_bsf_get_by_name("h264_mp4toannexb");
    #endif

    #if AAC_ADTS_TO_ASC
        //分配比特流過濾器上下文AVBSFContext
        if (av_bsf_alloc(bsf_audio, &bsf_ctx_audio) < 0)
        {
            printf("failed to alloc AVBSFContext\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
        //拷貝編解碼器參數(shù)
        if (avcodec_parameters_copy(bsf_ctx_audio->par_in, in_fmt_ctx_audio->streams[in_audio_index]->codecpar) < 0)
        {
            printf("failed to copy codec parameters from input audio to the bi stream filter context\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
        //初始化AVBSFContext
        if (av_bsf_init(bsf_ctx_audio) < 0)
        {
            printf("failed to init AVBSFContext\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
    #endif

    #if H264_AVCC_TO_ANNEXB
        //分配比特流過濾器上下文AVBSFContext
        if (av_bsf_alloc(bsf_video, &bsf_ctx_video) < 0)
        {
            printf("failed to alloc AVBSFContext\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
        //拷貝編解碼器參數(shù)
        if (avcodec_parameters_copy(bsf_ctx_video->par_in, in_fmt_ctx_video->streams[in_video_index]->codecpar) < 0)
        {
            printf("failed to copy codec parameters from input video to the bit stream filter context\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
        //初始化AVBSFContext
        if (av_bsf_init(bsf_ctx_video) < 0)
        {
            printf("failed to init AVBSFContext\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }
    #endif

    int frame_index = 0;
    AVFormatContext *fmt_ctx_v_or_a = NULL;
    while (true)
    {
        AVStream *in_stream = NULL, *out_stream = NULL;
        //比較時間戳，以判斷先處理并寫入音頻還是視頻幀
        int ret = av_compare_ts(ts_video, time_base_in_video, ts_audio, time_base_in_audio);
        switch (ret)
        {
            case -1:
            case 0:
            {
                fmt_ctx_v_or_a = in_fmt_ctx_video;
                in_stream = in_fmt_ctx_video->streams[in_video_index];
                out_stream = out_fmt_ctx->streams[out_video_index];
                break;
            }
            case 1:
            {
                fmt_ctx_v_or_a = in_fmt_ctx_audio;
                in_stream = in_fmt_ctx_audio->streams[in_audio_index];
                out_stream = out_fmt_ctx->streams[out_audio_index];
                break;
            }
            default:
            {
                printf("undefined result\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }
        }

        //讀取音視頻幀
        ret = av_read_frame(fmt_ctx_v_or_a, &pkt);
        if (ret < 0)
        {
            if (ret == AVERROR_EOF)
            {
                break;
            }

            printf("failed to read frame from input file\n");
            release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
            return -1;
        }

        //讀取的是輸入的視頻文件
        //如果讀取到的幀不是視頻幀則重新讀取
        if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            if (pkt.stream_index != in_video_index)
            {
                av_packet_unref(&pkt);
                continue;
            }
        }
        //讀取的是輸入的音頻文件
        //如果讀取到的不是音頻幀則重新讀取
        else if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            if (pkt.stream_index != in_audio_index)
            {
                av_packet_unref(&pkt);
                continue;
            }
        }

        //有的碼流沒有pts，例如原始的H.264碼流
        //因此需要自己手動設置pts
        if (pkt.pts == AV_NOPTS_VALUE)
        {
            //兩幀之間的間隔
            int frame_duration = AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
            //計算pts以輸入流時間基表示的ffmpeg內(nèi)部時間
            pkt.pts = frame_index * frame_duration / (av_q2d(in_stream->time_base) * AV_TIME_BASE);
            //解決2倍速問題...
            pkt.pts *= 2;
            //計算duration以輸入流時間基表示的ffmpeg內(nèi)部時間
            pkt.duration = frame_duration / (av_q2d(in_stream->time_base) * AV_TIME_BASE);
            pkt.dts = pkt.pts;
            frame_index++;
        }

        if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            ts_video = pkt.pts;
        }
        else if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            ts_audio = pkt.pts;
        }

        //時間戳轉換
        pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_INF | AV_ROUND_PASS_MINMAX));
        pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_INF | AV_ROUND_PASS_MINMAX));
        pkt.duration = av_rescale_q_rnd(pkt.duration, in_stream->time_base, out_stream->time_base, (AVRounding)AV_ROUND_INF);
        //輸出文件的音視頻流來自不同的文件，因此packet中流的索引與輸出文件中流的索引可能不匹配，可能出現(xiàn)packet中音頻幀和視頻幀所對應的stream_index是一樣的的情況
        //因此將packet中的音頻或視頻幀與輸出流的音視頻流的索引匹配上
        pkt.stream_index = out_stream->index;

        AVBSFContext *bsf_ctx = NULL;

        #if AAC_ADTS_TO_ASC
            if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
            {
                bsf_ctx = bsf_ctx_audio;
            }
        #endif

        #if H264_AVCC_TO_ANNEXB
            if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
            {
                bsf_ctx = bsf_ctx_video;
            }
        #endif

        if ((AAC_ADTS_TO_ASC && in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) || (H264_AVCC_TO_ANNEXB && in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO))
        {                
            //將packet送入過濾器
            int ans = av_bsf_send_packet(bsf_ctx, &pkt);
            if (ans < 0)
            {
                //需要多個packet才能過濾
                if (ans == AVERROR(EAGAIN))
                {
                    av_packet_unref(&pkt);
                    continue;
                }
                printf("failed to send packet to filter\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }

            //一個輸入packet可能產(chǎn)生多個輸出packet
            do
            {
                ans = av_bsf_receive_packet(bsf_ctx, &pkt_filtered);
                if (ans < 0 && ans != AVERROR(EAGAIN))
                {
                    if (ans == AVERROR_EOF)
                    {
                        break;
                    }
                    else
                    {
                        printf("failed to receive packet from filter\n");
                        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                        return -1;
                    }
                }
                //交錯寫入
                if (av_interleaved_write_frame(out_fmt_ctx, &pkt_filtered) < 0)
                {
                    printf("failed to write frame to the output file\n");
                    release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                    return -1;                    
                }
                av_packet_unref(&pkt_filtered);
            } while (ans == AVERROR(EAGAIN));
        }
        else
        {
            //交錯寫入
            if (av_interleaved_write_frame(out_fmt_ctx, &pkt) < 0)
            {
                printf("failed to write frame to the output file\n");
                release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
                return -1;
            }
        }

        av_packet_unref(&pkt);
    }

    //寫入文件尾
    if (av_write_trailer(out_fmt_ctx) < 0)
    {
        printf("failed to write tail to the output file\n");
        release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
        return -1;
    }

    release_context(in_fmt_ctx_video, in_fmt_ctx_audio, out_fmt_ctx, &pkt, &pkt_filtered, bsf_ctx_video, bsf_ctx_audio);
    return 0;
}

項目代碼及使用方法：FFmpeg_Learning_Projects/Mux_Audio_Video at master · zn111111/FFmpeg_Learning_Projects (github.com)文章來源地址http://www.zghlxwxcb.cn/news/detail-798315.html

到了這里，關于基于新版FFmpeg（FFmpeg 6.1）的音視頻復用（不涉及編解碼）的文章就介紹完了。如果您還想了解更多內(nèi)容，請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關文章，希望大家以后多多支持TOY模板網(wǎng)！