Googler | 62cf23d | 2021-05-19 16:28:56 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2005 Michael Ahlberg, Måns Rullgård |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person |
| 5 | * obtaining a copy of this software and associated documentation |
| 6 | * files (the "Software"), to deal in the Software without |
| 7 | * restriction, including without limitation the rights to use, copy, |
| 8 | * modify, merge, publish, distribute, sublicense, and/or sell copies |
| 9 | * of the Software, and to permit persons to whom the Software is |
| 10 | * furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be |
| 13 | * included in all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 19 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 22 | * DEALINGS IN THE SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | #include <stdlib.h> |
| 26 | |
| 27 | #include "libavutil/avstring.h" |
| 28 | #include "libavutil/base64.h" |
| 29 | #include "libavutil/bswap.h" |
| 30 | #include "libavutil/dict.h" |
| 31 | |
| 32 | #include "libavcodec/bytestream.h" |
| 33 | #include "libavcodec/vorbis_parser.h" |
| 34 | |
| 35 | #include "avformat.h" |
| 36 | #include "flac_picture.h" |
| 37 | #include "internal.h" |
| 38 | #include "oggdec.h" |
| 39 | #include "vorbiscomment.h" |
| 40 | #include "replaygain.h" |
| 41 | |
| 42 | static int ogm_chapter(AVFormatContext *as, uint8_t *key, uint8_t *val) |
| 43 | { |
| 44 | int i, cnum, h, m, s, ms, keylen = strlen(key); |
| 45 | AVChapter *chapter = NULL; |
| 46 | |
| 47 | if (keylen < 9 || av_strncasecmp(key, "CHAPTER", 7) || sscanf(key+7, "%03d", &cnum) != 1) |
| 48 | return 0; |
| 49 | |
| 50 | if (keylen <= 10) { |
| 51 | if (sscanf(val, "%02d:%02d:%02d.%03d", &h, &m, &s, &ms) < 4) |
| 52 | return 0; |
| 53 | |
| 54 | avpriv_new_chapter(as, cnum, (AVRational) { 1, 1000 }, |
| 55 | ms + 1000 * (s + 60 * (m + 60 * h)), |
| 56 | AV_NOPTS_VALUE, NULL); |
| 57 | av_free(val); |
| 58 | } else if (!av_strcasecmp(key + keylen - 4, "NAME")) { |
| 59 | for (i = 0; i < as->nb_chapters; i++) |
| 60 | if (as->chapters[i]->id == cnum) { |
| 61 | chapter = as->chapters[i]; |
| 62 | break; |
| 63 | } |
| 64 | if (!chapter) |
| 65 | return 0; |
| 66 | |
| 67 | av_dict_set(&chapter->metadata, "title", val, AV_DICT_DONT_STRDUP_VAL); |
| 68 | } else |
| 69 | return 0; |
| 70 | |
| 71 | av_free(key); |
| 72 | return 1; |
| 73 | } |
| 74 | |
| 75 | int ff_vorbis_stream_comment(AVFormatContext *as, AVStream *st, |
| 76 | const uint8_t *buf, int size) |
| 77 | { |
| 78 | int updates = ff_vorbis_comment(as, &st->metadata, buf, size, 1); |
| 79 | |
| 80 | if (updates > 0) { |
| 81 | st->event_flags |= AVSTREAM_EVENT_FLAG_METADATA_UPDATED; |
| 82 | } |
| 83 | |
| 84 | return updates; |
| 85 | } |
| 86 | |
| 87 | int ff_vorbis_comment(AVFormatContext *as, AVDictionary **m, |
| 88 | const uint8_t *buf, int size, |
| 89 | int parse_picture) |
| 90 | { |
| 91 | const uint8_t *p = buf; |
| 92 | const uint8_t *end = buf + size; |
| 93 | int updates = 0; |
| 94 | unsigned n; |
| 95 | int s; |
| 96 | |
| 97 | /* must have vendor_length and user_comment_list_length */ |
| 98 | if (size < 8) |
| 99 | return AVERROR_INVALIDDATA; |
| 100 | |
| 101 | s = bytestream_get_le32(&p); |
| 102 | |
| 103 | if (end - p - 4 < s || s < 0) |
| 104 | return AVERROR_INVALIDDATA; |
| 105 | |
| 106 | p += s; |
| 107 | |
| 108 | n = bytestream_get_le32(&p); |
| 109 | |
| 110 | while (end - p >= 4 && n > 0) { |
| 111 | const char *t, *v; |
| 112 | int tl, vl; |
| 113 | |
| 114 | s = bytestream_get_le32(&p); |
| 115 | |
| 116 | if (end - p < s || s < 0) |
| 117 | break; |
| 118 | |
| 119 | t = p; |
| 120 | p += s; |
| 121 | n--; |
| 122 | |
| 123 | v = memchr(t, '=', s); |
| 124 | if (!v) |
| 125 | continue; |
| 126 | |
| 127 | tl = v - t; |
| 128 | vl = s - tl - 1; |
| 129 | v++; |
| 130 | |
| 131 | if (tl && vl) { |
| 132 | char *tt, *ct; |
| 133 | |
| 134 | tt = av_malloc(tl + 1); |
| 135 | ct = av_malloc(vl + 1); |
| 136 | if (!tt || !ct) { |
| 137 | av_freep(&tt); |
| 138 | av_freep(&ct); |
| 139 | return AVERROR(ENOMEM); |
| 140 | } |
| 141 | |
| 142 | memcpy(tt, t, tl); |
| 143 | tt[tl] = 0; |
| 144 | |
| 145 | memcpy(ct, v, vl); |
| 146 | ct[vl] = 0; |
| 147 | |
| 148 | /* The format in which the pictures are stored is the FLAC format. |
| 149 | * Xiph says: "The binary FLAC picture structure is base64 encoded |
| 150 | * and placed within a VorbisComment with the tag name |
| 151 | * 'METADATA_BLOCK_PICTURE'. This is the preferred and |
| 152 | * recommended way of embedding cover art within VorbisComments." |
| 153 | */ |
| 154 | if (!av_strcasecmp(tt, "METADATA_BLOCK_PICTURE") && parse_picture) { |
| 155 | int ret, len = AV_BASE64_DECODE_SIZE(vl); |
| 156 | char *pict = av_malloc(len); |
| 157 | |
| 158 | if (!pict) { |
| 159 | av_log(as, AV_LOG_WARNING, "out-of-memory error. Skipping cover art block.\n"); |
| 160 | av_freep(&tt); |
| 161 | av_freep(&ct); |
| 162 | continue; |
| 163 | } |
| 164 | ret = av_base64_decode(pict, ct, len); |
| 165 | av_freep(&tt); |
| 166 | av_freep(&ct); |
| 167 | if (ret > 0) |
| 168 | ret = ff_flac_parse_picture(as, pict, ret, 0); |
| 169 | av_freep(&pict); |
| 170 | if (ret < 0) { |
| 171 | av_log(as, AV_LOG_WARNING, "Failed to parse cover art block.\n"); |
| 172 | continue; |
| 173 | } |
| 174 | } else if (!ogm_chapter(as, tt, ct)) { |
| 175 | updates++; |
| 176 | if (av_dict_get(*m, tt, NULL, 0)) { |
| 177 | av_dict_set(m, tt, ";", AV_DICT_APPEND); |
| 178 | } |
| 179 | av_dict_set(m, tt, ct, |
| 180 | AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL | |
| 181 | AV_DICT_APPEND); |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | if (p != end) |
| 187 | av_log(as, AV_LOG_INFO, |
| 188 | "%"PTRDIFF_SPECIFIER" bytes of comment header remain\n", end - p); |
| 189 | if (n > 0) |
| 190 | av_log(as, AV_LOG_INFO, |
| 191 | "truncated comment header, %i comments not found\n", n); |
| 192 | |
| 193 | ff_metadata_conv(m, NULL, ff_vorbiscomment_metadata_conv); |
| 194 | |
| 195 | return updates; |
| 196 | } |
| 197 | |
| 198 | /* |
| 199 | * Parse the vorbis header |
| 200 | * |
| 201 | * Vorbis Identification header from Vorbis_I_spec.html#vorbis-spec-codec |
| 202 | * [vorbis_version] = read 32 bits as unsigned integer | Not used |
| 203 | * [audio_channels] = read 8 bit integer as unsigned | Used |
| 204 | * [audio_sample_rate] = read 32 bits as unsigned integer | Used |
| 205 | * [bitrate_maximum] = read 32 bits as signed integer | Not used yet |
| 206 | * [bitrate_nominal] = read 32 bits as signed integer | Not used yet |
| 207 | * [bitrate_minimum] = read 32 bits as signed integer | Used as bitrate |
| 208 | * [blocksize_0] = read 4 bits as unsigned integer | Not Used |
| 209 | * [blocksize_1] = read 4 bits as unsigned integer | Not Used |
| 210 | * [framing_flag] = read one bit | Not Used |
| 211 | */ |
| 212 | |
| 213 | struct oggvorbis_private { |
| 214 | unsigned int len[3]; |
| 215 | unsigned char *packet[3]; |
| 216 | AVVorbisParseContext *vp; |
| 217 | int64_t final_pts; |
| 218 | int final_duration; |
| 219 | }; |
| 220 | |
| 221 | static int fixup_vorbis_headers(AVFormatContext *as, |
| 222 | struct oggvorbis_private *priv, |
| 223 | uint8_t **buf) |
| 224 | { |
| 225 | int i, offset, len, err; |
| 226 | int buf_len; |
| 227 | unsigned char *ptr; |
| 228 | |
| 229 | len = priv->len[0] + priv->len[1] + priv->len[2]; |
| 230 | buf_len = len + len / 255 + 64; |
| 231 | |
| 232 | if (*buf) |
| 233 | return AVERROR_INVALIDDATA; |
| 234 | |
| 235 | ptr = *buf = av_realloc(NULL, buf_len); |
| 236 | if (!ptr) |
| 237 | return AVERROR(ENOMEM); |
| 238 | memset(*buf, '\0', buf_len); |
| 239 | |
| 240 | ptr[0] = 2; |
| 241 | offset = 1; |
| 242 | offset += av_xiphlacing(&ptr[offset], priv->len[0]); |
| 243 | offset += av_xiphlacing(&ptr[offset], priv->len[1]); |
| 244 | for (i = 0; i < 3; i++) { |
| 245 | memcpy(&ptr[offset], priv->packet[i], priv->len[i]); |
| 246 | offset += priv->len[i]; |
| 247 | av_freep(&priv->packet[i]); |
| 248 | } |
| 249 | if ((err = av_reallocp(buf, offset + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) |
| 250 | return err; |
| 251 | return offset; |
| 252 | } |
| 253 | |
| 254 | static void vorbis_cleanup(AVFormatContext *s, int idx) |
| 255 | { |
| 256 | struct ogg *ogg = s->priv_data; |
| 257 | struct ogg_stream *os = ogg->streams + idx; |
| 258 | struct oggvorbis_private *priv = os->private; |
| 259 | int i; |
| 260 | if (os->private) { |
| 261 | av_vorbis_parse_free(&priv->vp); |
| 262 | for (i = 0; i < 3; i++) |
| 263 | av_freep(&priv->packet[i]); |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | static int vorbis_update_metadata(AVFormatContext *s, int idx) |
| 268 | { |
| 269 | struct ogg *ogg = s->priv_data; |
| 270 | struct ogg_stream *os = ogg->streams + idx; |
| 271 | AVStream *st = s->streams[idx]; |
| 272 | int ret; |
| 273 | |
| 274 | if (os->psize <= 8) |
| 275 | return 0; |
| 276 | |
| 277 | /* New metadata packet; release old data. */ |
| 278 | av_dict_free(&st->metadata); |
| 279 | ret = ff_vorbis_stream_comment(s, st, os->buf + os->pstart + 7, |
| 280 | os->psize - 8); |
| 281 | if (ret < 0) |
| 282 | return ret; |
| 283 | |
| 284 | /* Update the metadata if possible. */ |
| 285 | av_freep(&os->new_metadata); |
| 286 | if (st->metadata) { |
| 287 | os->new_metadata = av_packet_pack_dictionary(st->metadata, &os->new_metadata_size); |
| 288 | /* Send an empty dictionary to indicate that metadata has been cleared. */ |
| 289 | } else { |
| 290 | os->new_metadata = av_mallocz(1); |
| 291 | os->new_metadata_size = 0; |
| 292 | } |
| 293 | |
| 294 | return ret; |
| 295 | } |
| 296 | |
| 297 | static int vorbis_header(AVFormatContext *s, int idx) |
| 298 | { |
| 299 | struct ogg *ogg = s->priv_data; |
| 300 | AVStream *st = s->streams[idx]; |
| 301 | struct ogg_stream *os = ogg->streams + idx; |
| 302 | struct oggvorbis_private *priv; |
| 303 | int pkt_type = os->buf[os->pstart]; |
| 304 | |
| 305 | if (!os->private) { |
| 306 | os->private = av_mallocz(sizeof(struct oggvorbis_private)); |
| 307 | if (!os->private) |
| 308 | return AVERROR(ENOMEM); |
| 309 | } |
| 310 | |
| 311 | priv = os->private; |
| 312 | |
| 313 | if (!(pkt_type & 1)) |
| 314 | return priv->vp ? 0 : AVERROR_INVALIDDATA; |
| 315 | |
| 316 | if (os->psize < 1 || pkt_type > 5) |
| 317 | return AVERROR_INVALIDDATA; |
| 318 | |
| 319 | if (priv->packet[pkt_type >> 1]) |
| 320 | return AVERROR_INVALIDDATA; |
| 321 | if (pkt_type > 1 && !priv->packet[0] || pkt_type > 3 && !priv->packet[1]) |
| 322 | return priv->vp ? 0 : AVERROR_INVALIDDATA; |
| 323 | |
| 324 | priv->len[pkt_type >> 1] = os->psize; |
| 325 | priv->packet[pkt_type >> 1] = av_mallocz(os->psize); |
| 326 | if (!priv->packet[pkt_type >> 1]) |
| 327 | return AVERROR(ENOMEM); |
| 328 | memcpy(priv->packet[pkt_type >> 1], os->buf + os->pstart, os->psize); |
| 329 | if (os->buf[os->pstart] == 1) { |
| 330 | const uint8_t *p = os->buf + os->pstart + 7; /* skip "\001vorbis" tag */ |
| 331 | unsigned blocksize, bs0, bs1; |
| 332 | int srate; |
| 333 | int channels; |
| 334 | |
| 335 | if (os->psize != 30) |
| 336 | return AVERROR_INVALIDDATA; |
| 337 | |
| 338 | if (bytestream_get_le32(&p) != 0) /* vorbis_version */ |
| 339 | return AVERROR_INVALIDDATA; |
| 340 | |
| 341 | channels = bytestream_get_byte(&p); |
| 342 | if (st->codecpar->channels && channels != st->codecpar->channels) { |
| 343 | av_log(s, AV_LOG_ERROR, "Channel change is not supported\n"); |
| 344 | return AVERROR_PATCHWELCOME; |
| 345 | } |
| 346 | st->codecpar->channels = channels; |
| 347 | srate = bytestream_get_le32(&p); |
| 348 | p += 4; // skip maximum bitrate |
| 349 | st->codecpar->bit_rate = bytestream_get_le32(&p); // nominal bitrate |
| 350 | p += 4; // skip minimum bitrate |
| 351 | |
| 352 | blocksize = bytestream_get_byte(&p); |
| 353 | bs0 = blocksize & 15; |
| 354 | bs1 = blocksize >> 4; |
| 355 | |
| 356 | if (bs0 > bs1) |
| 357 | return AVERROR_INVALIDDATA; |
| 358 | if (bs0 < 6 || bs1 > 13) |
| 359 | return AVERROR_INVALIDDATA; |
| 360 | |
| 361 | if (bytestream_get_byte(&p) != 1) /* framing_flag */ |
| 362 | return AVERROR_INVALIDDATA; |
| 363 | |
| 364 | st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
| 365 | st->codecpar->codec_id = AV_CODEC_ID_VORBIS; |
| 366 | |
| 367 | if (srate > 0) { |
| 368 | st->codecpar->sample_rate = srate; |
| 369 | avpriv_set_pts_info(st, 64, 1, srate); |
| 370 | } |
| 371 | } else if (os->buf[os->pstart] == 3) { |
| 372 | if (vorbis_update_metadata(s, idx) >= 0 && priv->len[1] > 10) { |
| 373 | unsigned new_len; |
| 374 | |
| 375 | int ret = ff_replaygain_export(st, st->metadata); |
| 376 | if (ret < 0) |
| 377 | return ret; |
| 378 | |
| 379 | // drop all metadata we parsed and which is not required by libvorbis |
| 380 | new_len = 7 + 4 + AV_RL32(priv->packet[1] + 7) + 4 + 1; |
| 381 | if (new_len >= 16 && new_len < os->psize) { |
| 382 | AV_WL32(priv->packet[1] + new_len - 5, 0); |
| 383 | priv->packet[1][new_len - 1] = 1; |
| 384 | priv->len[1] = new_len; |
| 385 | } |
| 386 | } |
| 387 | } else { |
| 388 | int ret; |
| 389 | |
| 390 | if (priv->vp) |
| 391 | return AVERROR_INVALIDDATA; |
| 392 | |
| 393 | ret = fixup_vorbis_headers(s, priv, &st->codecpar->extradata); |
| 394 | if (ret < 0) { |
| 395 | st->codecpar->extradata_size = 0; |
| 396 | return ret; |
| 397 | } |
| 398 | st->codecpar->extradata_size = ret; |
| 399 | |
| 400 | priv->vp = av_vorbis_parse_init(st->codecpar->extradata, st->codecpar->extradata_size); |
| 401 | if (!priv->vp) { |
| 402 | av_freep(&st->codecpar->extradata); |
| 403 | st->codecpar->extradata_size = 0; |
| 404 | return AVERROR_UNKNOWN; |
| 405 | } |
| 406 | } |
| 407 | |
| 408 | return 1; |
| 409 | } |
| 410 | |
| 411 | static int vorbis_packet(AVFormatContext *s, int idx) |
| 412 | { |
| 413 | struct ogg *ogg = s->priv_data; |
| 414 | struct ogg_stream *os = ogg->streams + idx; |
| 415 | struct oggvorbis_private *priv = os->private; |
| 416 | int duration, flags = 0; |
| 417 | |
| 418 | if (!priv->vp) |
| 419 | return AVERROR_INVALIDDATA; |
| 420 | |
| 421 | /* first packet handling |
| 422 | * here we parse the duration of each packet in the first page and compare |
| 423 | * the total duration to the page granule to find the encoder delay and |
| 424 | * set the first timestamp */ |
| 425 | if ((!os->lastpts || os->lastpts == AV_NOPTS_VALUE) && !(os->flags & OGG_FLAG_EOS) && (int64_t)os->granule>=0) { |
| 426 | int seg, d; |
| 427 | uint8_t *last_pkt = os->buf + os->pstart; |
| 428 | uint8_t *next_pkt = last_pkt; |
| 429 | |
| 430 | av_vorbis_parse_reset(priv->vp); |
| 431 | duration = 0; |
| 432 | seg = os->segp; |
| 433 | d = av_vorbis_parse_frame_flags(priv->vp, last_pkt, 1, &flags); |
| 434 | if (d < 0) { |
| 435 | os->pflags |= AV_PKT_FLAG_CORRUPT; |
| 436 | return 0; |
| 437 | } else if (flags & VORBIS_FLAG_COMMENT) { |
| 438 | vorbis_update_metadata(s, idx); |
| 439 | flags = 0; |
| 440 | } |
| 441 | duration += d; |
| 442 | last_pkt = next_pkt = next_pkt + os->psize; |
| 443 | for (; seg < os->nsegs; seg++) { |
| 444 | if (os->segments[seg] < 255) { |
| 445 | int d = av_vorbis_parse_frame_flags(priv->vp, last_pkt, 1, &flags); |
| 446 | if (d < 0) { |
| 447 | duration = os->granule; |
| 448 | break; |
| 449 | } else if (flags & VORBIS_FLAG_COMMENT) { |
| 450 | vorbis_update_metadata(s, idx); |
| 451 | flags = 0; |
| 452 | } |
| 453 | duration += d; |
| 454 | last_pkt = next_pkt + os->segments[seg]; |
| 455 | } |
| 456 | next_pkt += os->segments[seg]; |
| 457 | } |
| 458 | os->lastpts = |
| 459 | os->lastdts = os->granule - duration; |
| 460 | |
| 461 | if (!os->granule && duration) //hack to deal with broken files (Ticket3710) |
| 462 | os->lastpts = os->lastdts = AV_NOPTS_VALUE; |
| 463 | |
| 464 | if (s->streams[idx]->start_time == AV_NOPTS_VALUE) { |
| 465 | s->streams[idx]->start_time = FFMAX(os->lastpts, 0); |
| 466 | if (s->streams[idx]->duration != AV_NOPTS_VALUE) |
| 467 | s->streams[idx]->duration -= s->streams[idx]->start_time; |
| 468 | } |
| 469 | priv->final_pts = AV_NOPTS_VALUE; |
| 470 | av_vorbis_parse_reset(priv->vp); |
| 471 | } |
| 472 | |
| 473 | /* parse packet duration */ |
| 474 | if (os->psize > 0) { |
| 475 | duration = av_vorbis_parse_frame_flags(priv->vp, os->buf + os->pstart, 1, &flags); |
| 476 | if (duration < 0) { |
| 477 | os->pflags |= AV_PKT_FLAG_CORRUPT; |
| 478 | return 0; |
| 479 | } else if (flags & VORBIS_FLAG_COMMENT) { |
| 480 | vorbis_update_metadata(s, idx); |
| 481 | flags = 0; |
| 482 | } |
| 483 | os->pduration = duration; |
| 484 | } |
| 485 | |
| 486 | /* final packet handling |
| 487 | * here we save the pts of the first packet in the final page, sum up all |
| 488 | * packet durations in the final page except for the last one, and compare |
| 489 | * to the page granule to find the duration of the final packet */ |
| 490 | if (os->flags & OGG_FLAG_EOS) { |
| 491 | if (os->lastpts != AV_NOPTS_VALUE) { |
| 492 | priv->final_pts = os->lastpts; |
| 493 | priv->final_duration = 0; |
| 494 | } |
| 495 | if (os->segp == os->nsegs) |
| 496 | os->pduration = os->granule - priv->final_pts - priv->final_duration; |
| 497 | priv->final_duration += os->pduration; |
| 498 | } |
| 499 | |
| 500 | return 0; |
| 501 | } |
| 502 | |
| 503 | const struct ogg_codec ff_vorbis_codec = { |
| 504 | .magic = "\001vorbis", |
| 505 | .magicsize = 7, |
| 506 | .header = vorbis_header, |
| 507 | .packet = vorbis_packet, |
| 508 | .cleanup = vorbis_cleanup, |
| 509 | .nb_header = 3, |
| 510 | }; |