How do I use the FFmpeg libraries to extract every nth frame from a video and save it as a small image file in C++?

After experimenting with the examples on the FFmpeg documentation, I was finally able to create a short program that extracts every nth frame from a video. However, the output files that it produces are huge at over 15mb for each image. How can I change this to produce lower quality images?

The result I am trying to get is done easily on the command line with:

ffmpeg -i [input video] -vf "select=not(mod(n\,10))" -fps_mode vfr img_%03d.jpg

For a video with about 500 frames, this creates 50 images that are only about 800kb each; how am would I be able to mimic this in my program?

My code consists of opening the input file, decoding the packets, then saving the frames:

#include <cstdio>
#include <cstdlib>
#include <iostream>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
}

static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
static int video_stream_index = -1;

// OPEN THE INPUT FILE
static int open_input_file(const char *filename) {
    // INIT VARS AND FFMPEG OBJECTS
    int ret;
    const AVCodec *dec;

    // OPEN INPUT FILE
    if((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
        printf("ERROR: failed to open input file\n");
        return ret;
    }

    // FIND STREAM INFO BASED ON INPUT FILE
    if((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
        printf("ERROR: failed to find stream information\n");
        return ret;
    }

    // FIND THE BEST VIDEO STREAM FOR THE INPUT FILE
    ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
    if(ret < 0) {
        printf("ERROR: failed to find a video stream in the input file\n");
        return ret;
    }
    video_stream_index = ret;

    // ALLOCATE THE DECODING CONTEXT FOR THE INPUT FILE
    dec_ctx = avcodec_alloc_context3(dec);
    if(!dec_ctx) {
        printf("ERROR: failed to allocate decoding context\n");
        // CAN NOT ALLOCATE MEMORY ERROR
        return AVERROR(ENOMEM);
    }
    avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);

    // INIT THE VIDEO DECODER
    if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
        printf("ERROR: failed to open video decoder\n");
        return ret;
    }

    return 0;
}

// SAVE THE FILE
static void save(unsigned char *buf, int wrap, int x_size, int y_size, char *file_name) {
    // INIT THE EMPTY FILE
    FILE *file;

    // OPEN AND WRITE THE IMAGE FILE
    file = fopen(file_name, "wb");
    fprintf(file, "P6\n%d %d\n%d\n", x_size, y_size, 255);
    for(int i = 0; i < y_size; i++) {
        fwrite(buf + i * wrap, 1, x_size * 3, file);
    }
    fclose(file);
}

// DECODE FRAME AND CONVERT IT TO AN RGB IMAGE
static void decode(AVCodecContext *cxt, AVFrame *frame, AVPacket *pkt,
                   const char *out_file_name, const char *file_ext, int mod=1) {
    // INIT A BLANK CHAR TO HOLD THE FILE NAME AND AN EMPTY INT TO HOLD FUNCTION RETURN VALUES
    char buf[1024];
    int ret;

    // SEND PACKET TO DECODER
    ret = avcodec_send_packet(cxt, pkt);
    if(ret < 0) {
        printf("ERROR: error sending packet for decoding\n");
        exit(1);
    }

    // CREATE A SCALAR CONTEXT FOR CONVERSION
    SwsContext *sws_ctx = sws_getContext(dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, dec_ctx->width,
                                         dec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);

    // CREATE A NEW RGB FRAME FOR CONVERSION
    AVFrame* rgb_frame = av_frame_alloc();
    rgb_frame->format = AV_PIX_FMT_RGB24;
    rgb_frame->width = dec_ctx->width;
    rgb_frame->height = dec_ctx->height;

    // ALLOCATE A NEW BUFFER FOR THE RGB CONVERSION FRAME
    av_frame_get_buffer(rgb_frame, 0);

    // WHILE RETURN COMES BACK OKAY (FUNCTION RETURNS >= 0)...
    while(ret >= 0) {
        // GET FRAME BACK FROM DECODER
        ret = avcodec_receive_frame(cxt, frame);
        // IF "RESOURCE TEMP NOT AVAILABLE" OR "END OF FILE" ERROR...
        if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return;
        } else if(ret < 0) {
            printf("ERROR: error during decoding\n");
            exit(1);
        }

        // IF FRAME NUMBER IF THE (MOD)TH FRAME...
        if(cxt->frame_number % mod == 0){
            // OUTPUT WHICH FRAME IS BEING SAVED
            printf("saving frame %03d\n", cxt->frame_number);
            // REMOVES TEMPORARY BUFFERED DATA
            fflush(stdout);

            // SCALE (CONVERT) THE OLD FRAME TO THE NEW RGB FRAME
            sws_scale(sws_ctx, frame->data, frame->linesize, 0, frame->height,
                      rgb_frame->data, rgb_frame->linesize);

            // SET "BUF" TO THE OUTPUT FILE PATH (SAVES TO "out_file_name_###.file_ext")
            snprintf(buf, sizeof(buf), "%s_%03d.%s", out_file_name, cxt->frame_number, file_ext);
            // SAVE THE FRAME
            save(rgb_frame->data[0], rgb_frame->linesize[0], rgb_frame->width, rgb_frame->height, buf);
        }
    }
}

int main() {
    // SIMULATE COMMAND LINE ARGUMENTS
    char argv0[] = "test";
    char argv1[] = "/User/Desktop/frames/test_video.mov";
    char *argv[] = {argv0, argv1, nullptr};

    // INIT VARS AND FFMPEG OBJECTS
    int ret;
    AVPacket *packet;
    AVFrame *frame;

    // ALLOCATE FRAME AND PACKET
    frame = av_frame_alloc();
    packet = av_packet_alloc();
    if (!frame || !packet) {
        fprintf(stderr, "Could not allocate frame or packet\n");
        exit(1);
    }

    // IF FILE DOESN'T OPEN, GO TO THE END
    if((ret = open_input_file(argv[1])) < 0) {
        goto end;
    }
    
    // READ ALL THE PACKETS - simple
    while(av_read_frame(fmt_ctx, packet) >= 0) {
        // IF PACKET INDEX MATCHES VIDEO INDEX...
        if (packet->stream_index == video_stream_index) {
            // SEND PACKET TO THE DECODER and SAVE
            std::string name = "/User/Desktop/frames/img";
            std::string ext = "bmp";
            decode(dec_ctx, frame, packet, name.c_str(), ext.c_str(), 5);
        }

        // UNREFERENCE THE PACKET
        av_packet_unref(packet);
    }

    // END MARKER
    end:
    avcodec_free_context(&dec_ctx);
    avformat_close_input(&fmt_ctx);
    av_frame_free(&frame);
    av_packet_free(&packet);

    // FINAL ERROR CATCH
    if (ret < 0 && ret != AVERROR_EOF) {
        fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
        exit(1);
    }

    exit(0);
}

I am not sure how to go about producing images that are much smaller in size like the ones produced on the command line. I have a feeling that this is possible somehow during the conversion to RGB or the saving of the file but I can’t seem to figure out how.

Also, is there any way that I could go about this much more efficiently? On the command line, this finishes very quickly (no more than a second or two for a 9 sec. movie at ~60 fps).

Read more here: Source link